aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile3
-rw-r--r--tools/arch/parisc/include/uapi/asm/mman.h1
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h5
-rw-r--r--tools/arch/powerpc/include/uapi/asm/perf_regs.h20
-rw-r--r--tools/arch/riscv/include/uapi/asm/unistd.h2
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm.h7
-rw-r--r--tools/arch/sh/include/asm/barrier.h2
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h6
-rw-r--r--tools/arch/x86/include/asm/mcsafe_test.h13
-rw-r--r--tools/arch/x86/include/asm/msr-index.h33
-rw-r--r--tools/arch/x86/include/asm/orc_types.h35
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h21
-rw-r--r--tools/arch/x86/include/uapi/asm/unistd.h9
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h3
-rw-r--r--tools/arch/x86/lib/memcpy_64.S117
-rw-r--r--tools/arch/x86/tools/gen-insn-attr-x86.awk50
-rw-r--r--tools/bootconfig/main.c159
-rw-r--r--tools/bootconfig/samples/bad-override.bconf3
-rw-r--r--tools/bootconfig/samples/bad-override2.bconf3
-rw-r--r--tools/bootconfig/samples/good-override.bconf6
-rwxr-xr-xtools/bootconfig/scripts/bconf2ftrace.sh199
-rw-r--r--tools/bootconfig/scripts/ftrace.sh109
-rwxr-xr-xtools/bootconfig/scripts/ftrace2bconf.sh244
-rw-r--r--tools/bootconfig/scripts/xbc.sh56
-rwxr-xr-xtools/bootconfig/test-bootconfig.sh55
-rw-r--r--tools/bpf/Makefile23
-rw-r--r--tools/bpf/bpf_asm.c2
-rw-r--r--tools/bpf/bpf_dbg.c2
-rw-r--r--tools/bpf/bpftool/.gitignore5
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile15
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst37
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst41
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst43
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst46
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst70
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst108
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst79
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst32
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst32
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst61
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst34
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst31
-rw-r--r--tools/bpf/bpftool/Documentation/common_options.rst22
-rw-r--r--tools/bpf/bpftool/Makefile92
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool112
-rw-r--r--tools/bpf/bpftool/btf.c73
-rw-r--r--tools/bpf/bpftool/btf_dumper.c6
-rw-r--r--tools/bpf/bpftool/cfg.c4
-rw-r--r--tools/bpf/bpftool/cgroup.c69
-rw-r--r--tools/bpf/bpftool/common.c485
-rw-r--r--tools/bpf/bpftool/feature.c326
-rw-r--r--tools/bpf/bpftool/gen.c70
-rw-r--r--tools/bpf/bpftool/iter.c116
-rw-r--r--tools/bpf/bpftool/jit_disasm.c1
-rw-r--r--tools/bpf/bpftool/json_writer.c6
-rw-r--r--tools/bpf/bpftool/json_writer.h3
-rw-r--r--tools/bpf/bpftool/link.c423
-rw-r--r--tools/bpf/bpftool/main.c52
-rw-r--r--tools/bpf/bpftool/main.h110
-rw-r--r--tools/bpf/bpftool/map.c360
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c2
-rw-r--r--tools/bpf/bpftool/net.c311
-rw-r--r--tools/bpf/bpftool/perf.c2
-rw-r--r--tools/bpf/bpftool/pids.c233
-rw-r--r--tools/bpf/bpftool/prog.c431
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.bpf.c81
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.h12
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c7
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.h46
-rw-r--r--tools/bpf/bpftool/struct_ops.c15
-rw-r--r--tools/bpf/resolve_btfids/.gitignore4
-rw-r--r--tools/bpf/resolve_btfids/Build10
-rw-r--r--tools/bpf/resolve_btfids/Makefile94
-rw-r--r--tools/bpf/resolve_btfids/main.c727
-rw-r--r--tools/bpf/runqslower/Makefile3
-rw-r--r--tools/build/Build.include3
-rw-r--r--tools/build/Makefile2
-rw-r--r--tools/build/Makefile.feature16
-rw-r--r--tools/build/feature/Makefile26
-rw-r--r--tools/build/feature/test-all.c24
-rw-r--r--tools/build/feature/test-clang-bpf-co-re.c9
-rw-r--r--tools/build/feature/test-clang-bpf-global-var.c4
-rw-r--r--tools/build/feature/test-libbfd-buildid.c8
-rw-r--r--tools/build/feature/test-libdebuginfod.c8
-rw-r--r--tools/build/feature/test-libelf-mmap.c9
-rw-r--r--tools/build/feature/test-libopencsd.c4
-rw-r--r--tools/build/feature/test-libpfm4.c9
-rw-r--r--tools/cgroup/iocost_monitor.py100
-rw-r--r--tools/cgroup/memcg_slabinfo.py226
-rw-r--r--tools/gpio/gpio-event-mon.c149
-rw-r--r--tools/gpio/gpio-hammer.c56
-rw-r--r--tools/gpio/gpio-utils.c180
-rw-r--r--tools/gpio/gpio-utils.h48
-rw-r--r--tools/gpio/gpio-watch.c16
-rw-r--r--tools/gpio/lsgpio.c69
-rw-r--r--tools/hv/hv_kvp_daemon.c2
-rw-r--r--tools/iio/iio_event_monitor.c2
-rw-r--r--tools/include/linux/bits.h3
-rw-r--r--tools/include/linux/btf_ids.h187
-rw-r--r--tools/include/linux/compiler-gcc.h12
-rw-r--r--tools/include/linux/compiler.h9
-rw-r--r--tools/include/linux/irqflags.h4
-rw-r--r--tools/include/linux/jhash.h2
-rw-r--r--tools/include/linux/kallsyms.h2
-rw-r--r--tools/include/linux/objtool.h129
-rw-r--r--tools/include/linux/rbtree.h2
-rw-r--r--tools/include/linux/rbtree_augmented.h2
-rw-r--r--tools/include/linux/static_call_types.h35
-rw-r--r--tools/include/uapi/asm-generic/unistd.h24
-rw-r--r--tools/include/uapi/drm/i915_drm.h28
-rw-r--r--tools/include/uapi/linux/bpf.h1425
-rw-r--r--tools/include/uapi/linux/fcntl.h10
-rw-r--r--tools/include/uapi/linux/filter.h90
-rw-r--r--tools/include/uapi/linux/fs.h1
-rw-r--r--tools/include/uapi/linux/fscrypt.h3
-rw-r--r--tools/include/uapi/linux/if_link.h2
-rw-r--r--tools/include/uapi/linux/if_xdp.h5
-rw-r--r--tools/include/uapi/linux/in.h3
-rw-r--r--tools/include/uapi/linux/kvm.h28
-rw-r--r--tools/include/uapi/linux/perf_event.h51
-rw-r--r--tools/include/uapi/linux/stat.h19
-rw-r--r--tools/include/uapi/linux/vhost.h6
-rw-r--r--tools/io_uring/io_uring-bench.c4
-rw-r--r--tools/io_uring/liburing.h6
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat84
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.service16
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt15
-rw-r--r--tools/lib/api/fd/array.c23
-rw-r--r--tools/lib/api/fd/array.h16
-rw-r--r--tools/lib/api/fs/fs.c17
-rw-r--r--tools/lib/api/fs/fs.h12
-rw-r--r--tools/lib/api/io.h115
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile38
-rw-r--r--tools/lib/bpf/bpf.c117
-rw-r--r--tools/lib/bpf/bpf.h54
-rw-r--r--tools/lib/bpf/bpf_core_read.h128
-rw-r--r--tools/lib/bpf/bpf_endian.h43
-rw-r--r--tools/lib/bpf/bpf_helpers.h73
-rw-r--r--tools/lib/bpf/bpf_prog_linfo.c3
-rw-r--r--tools/lib/bpf/bpf_tracing.h24
-rw-r--r--tools/lib/bpf/btf.c1831
-rw-r--r--tools/lib/bpf/btf.h122
-rw-r--r--tools/lib/bpf/btf_dump.c173
-rw-r--r--tools/lib/bpf/hashmap.c13
-rw-r--r--tools/lib/bpf/hashmap.h28
-rw-r--r--tools/lib/bpf/libbpf.c4902
-rw-r--r--tools/lib/bpf/libbpf.h84
-rw-r--r--tools/lib/bpf/libbpf.map83
-rw-r--r--tools/lib/bpf/libbpf_common.h2
-rw-r--r--tools/lib/bpf/libbpf_internal.h149
-rw-r--r--tools/lib/bpf/libbpf_probes.c16
-rw-r--r--tools/lib/bpf/netlink.c128
-rw-r--r--tools/lib/bpf/nlattr.c9
-rw-r--r--tools/lib/bpf/ringbuf.c284
-rw-r--r--tools/lib/bpf/xsk.c383
-rw-r--r--tools/lib/bpf/xsk.h9
-rw-r--r--tools/lib/perf/Documentation/libperf-counting.txt14
-rw-r--r--tools/lib/perf/Documentation/libperf-sampling.txt13
-rw-r--r--tools/lib/perf/Documentation/libperf.txt4
-rw-r--r--tools/lib/perf/cpumap.c2
-rw-r--r--tools/lib/perf/evlist.c13
-rw-r--r--tools/lib/perf/include/internal/evlist.h2
-rw-r--r--tools/lib/perf/include/perf/event.h25
-rw-r--r--tools/lib/rbtree.c2
-rw-r--r--tools/lib/subcmd/help.c10
-rw-r--r--tools/lib/subcmd/parse-options.c3
-rw-r--r--tools/lib/subcmd/parse-options.h2
-rw-r--r--tools/lib/symbol/kallsyms.c86
-rw-r--r--tools/lib/symbol/kallsyms.h2
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-plugins.txt25
-rw-r--r--tools/lib/traceevent/event-parse-api.c8
-rw-r--r--tools/lib/traceevent/event-parse-local.h42
-rw-r--r--tools/lib/traceevent/event-parse.c1296
-rw-r--r--tools/lib/traceevent/event-parse.h44
-rw-r--r--tools/lib/traceevent/event-plugin.c285
-rw-r--r--tools/lib/traceevent/kbuffer-parse.c45
-rw-r--r--tools/lib/traceevent/kbuffer.h19
-rw-r--r--tools/lib/traceevent/parse-filter.c52
-rw-r--r--tools/lib/traceevent/plugins/Build2
-rw-r--r--tools/lib/traceevent/plugins/Makefile4
-rw-r--r--tools/lib/traceevent/plugins/plugin_function.c123
-rw-r--r--tools/lib/traceevent/plugins/plugin_futex.c123
-rw-r--r--tools/lib/traceevent/plugins/plugin_hrtimer.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_jbd2.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_kmem.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_kvm.c42
-rw-r--r--tools/lib/traceevent/plugins/plugin_mac80211.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_sched_switch.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_tlb.c66
-rw-r--r--tools/memory-model/Documentation/cheatsheet.txt33
-rw-r--r--tools/memory-model/Documentation/explanation.txt109
-rw-r--r--tools/memory-model/Documentation/litmus-tests.txt1074
-rw-r--r--tools/memory-model/Documentation/recipes.txt6
-rw-r--r--tools/memory-model/Documentation/references.txt23
-rw-r--r--tools/memory-model/Documentation/simple.txt271
-rw-r--r--tools/memory-model/README186
-rwxr-xr-xtools/nfsd/inject_fault.sh50
-rw-r--r--tools/objtool/Build13
-rw-r--r--tools/objtool/Documentation/stack-validation.txt41
-rw-r--r--tools/objtool/Makefile21
-rw-r--r--tools/objtool/arch.h25
-rw-r--r--tools/objtool/arch/x86/Build1
-rw-r--r--tools/objtool/arch/x86/decode.c357
-rw-r--r--tools/objtool/arch/x86/include/arch_elf.h6
-rw-r--r--tools/objtool/arch/x86/include/arch_special.h20
-rw-r--r--tools/objtool/arch/x86/include/cfi_regs.h25
-rw-r--r--tools/objtool/arch/x86/special.c145
-rw-r--r--tools/objtool/builtin-check.c28
-rw-r--r--tools/objtool/builtin-orc.c30
-rw-r--r--tools/objtool/builtin.h2
-rw-r--r--tools/objtool/cfi.h35
-rw-r--r--tools/objtool/check.c1418
-rw-r--r--tools/objtool/check.h45
-rw-r--r--tools/objtool/elf.c417
-rw-r--r--tools/objtool/elf.h63
-rw-r--r--tools/objtool/objtool.c34
-rw-r--r--tools/objtool/objtool.h32
-rw-r--r--tools/objtool/orc.h18
-rw-r--r--tools/objtool/orc_dump.c12
-rw-r--r--tools/objtool/orc_gen.c68
-rw-r--r--tools/objtool/special.c76
-rw-r--r--tools/objtool/special.h10
-rwxr-xr-xtools/objtool/sync-check.sh33
-rw-r--r--tools/objtool/weak.c36
-rw-r--r--tools/perf/Documentation/Makefile4
-rw-r--r--tools/perf/Documentation/itrace.txt26
-rw-r--r--tools/perf/Documentation/perf-bench.txt19
-rw-r--r--tools/perf/Documentation/perf-c2c.txt47
-rw-r--r--tools/perf/Documentation/perf-config.txt15
-rw-r--r--tools/perf/Documentation/perf-data.txt3
-rw-r--r--tools/perf/Documentation/perf-diff.txt4
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt76
-rw-r--r--tools/perf/Documentation/perf-inject.txt6
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt171
-rw-r--r--tools/perf/Documentation/perf-list.txt10
-rw-r--r--tools/perf/Documentation/perf-record.txt82
-rw-r--r--tools/perf/Documentation/perf-report.txt11
-rw-r--r--tools/perf/Documentation/perf-script.txt15
-rw-r--r--tools/perf/Documentation/perf-stat.txt100
-rw-r--r--tools/perf/Documentation/perf-top.txt20
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt29
-rw-r--r--tools/perf/Documentation/perf.txt69
-rw-r--r--tools/perf/Documentation/security.txt237
-rw-r--r--tools/perf/Documentation/topdown.txt256
-rw-r--r--tools/perf/Makefile.config94
-rw-r--r--tools/perf/Makefile.perf55
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c17
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c20
-rw-r--r--tools/perf/arch/arm64/util/Build1
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c12
-rw-r--r--tools/perf/arch/arm64/util/tsc.c21
-rw-r--r--tools/perf/arch/arm64/util/unwind-libdw.c6
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl18
-rw-r--r--tools/perf/arch/powerpc/include/perf_regs.h8
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/book3s_hcalls.h2
-rw-r--r--tools/perf/arch/powerpc/util/header.c20
-rw-r--r--tools/perf/arch/powerpc/util/kvm-stat.c2
-rw-r--r--tools/perf/arch/powerpc/util/perf_regs.c55
-rw-r--r--tools/perf/arch/powerpc/util/unwind-libdw.c6
-rw-r--r--tools/perf/arch/powerpc/util/utils_header.h15
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl18
-rw-r--r--tools/perf/arch/s390/util/kvm-stat.c8
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c1
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl18
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c8
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c6
-rw-r--r--tools/perf/arch/x86/util/Build2
-rw-r--r--tools/perf/arch/x86/util/group.c28
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c2
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c67
-rw-r--r--tools/perf/arch/x86/util/kvm-stat.c12
-rw-r--r--tools/perf/arch/x86/util/topdown.c63
-rw-r--r--tools/perf/arch/x86/util/tsc.c73
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c6
-rw-r--r--tools/perf/bench/Build7
-rw-r--r--tools/perf/bench/bench.h6
-rw-r--r--tools/perf/bench/epoll-ctl.c4
-rw-r--r--tools/perf/bench/epoll-wait.c7
-rw-r--r--tools/perf/bench/find-bit-bench.c135
-rw-r--r--tools/perf/bench/futex-hash.c3
-rw-r--r--tools/perf/bench/futex-lock-pi.c3
-rw-r--r--tools/perf/bench/inject-buildid.c476
-rw-r--r--tools/perf/bench/kallsyms-parse.c75
-rw-r--r--tools/perf/bench/mem-functions.c21
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-lib.c24
-rw-r--r--tools/perf/bench/numa.c144
-rw-r--r--tools/perf/bench/sched-messaging.c6
-rw-r--r--tools/perf/bench/synthesize.c262
-rw-r--r--tools/perf/bench/syscall.c81
-rw-r--r--tools/perf/builtin-annotate.c17
-rw-r--r--tools/perf/builtin-bench.c23
-rw-r--r--tools/perf/builtin-buildid-cache.c25
-rw-r--r--tools/perf/builtin-c2c.c117
-rw-r--r--tools/perf/builtin-data.c1
-rw-r--r--tools/perf/builtin-diff.c127
-rw-r--r--tools/perf/builtin-evlist.c2
-rw-r--r--tools/perf/builtin-ftrace.c528
-rw-r--r--tools/perf/builtin-inject.c228
-rw-r--r--tools/perf/builtin-kmem.c68
-rw-r--r--tools/perf/builtin-kvm.c25
-rw-r--r--tools/perf/builtin-list.c9
-rw-r--r--tools/perf/builtin-lock.c42
-rw-r--r--tools/perf/builtin-mem.c26
-rw-r--r--tools/perf/builtin-probe.c3
-rw-r--r--tools/perf/builtin-record.c481
-rw-r--r--tools/perf/builtin-report.c84
-rw-r--r--tools/perf/builtin-sched.c140
-rw-r--r--tools/perf/builtin-script.c660
-rw-r--r--tools/perf/builtin-stat.c517
-rw-r--r--tools/perf/builtin-timechart.c54
-rw-r--r--tools/perf/builtin-top.c63
-rw-r--r--tools/perf/builtin-trace.c220
-rw-r--r--tools/perf/builtin-version.c1
-rwxr-xr-xtools/perf/check-headers.sh25
-rw-r--r--tools/perf/design.txt3
-rw-r--r--tools/perf/jvmti/libjvmti.c92
-rw-r--r--tools/perf/perf-sys.h22
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/cache.json10
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/frontend.json12
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/marked.json10
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/other.json16
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/translation.json2
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/metrics.json189
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json63
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/extended.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/branch.json5
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/cache.json41
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/core.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json98
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/recommended.json178
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/cache.json23
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/core.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json98
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/recommended.json178
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/cache.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json159
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json34
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/memory.json704
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/other.json1100
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json21
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv1
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/cache.json2342
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/floating-point.json96
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/frontend.json656
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/memory.json1973
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/other.json172
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/pipeline.json1200
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json141
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json730
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json358
-rw-r--r--tools/perf/pmu-events/jevents.c239
-rw-r--r--tools/perf/pmu-events/jevents.h23
-rw-r--r--tools/perf/pmu-events/jsmn.h2
-rw-r--r--tools/perf/pmu-events/pmu-events.h8
-rwxr-xr-xtools/perf/scripts/python/bin/flamegraph-record2
-rwxr-xr-xtools/perf/scripts/python/bin/flamegraph-report3
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py2
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py11
-rwxr-xr-xtools/perf/scripts/python/flamegraph.py126
-rw-r--r--tools/perf/scripts/python/futex-contention.py51
-rw-r--r--tools/perf/tests/Build6
-rw-r--r--tools/perf/tests/api-io.c304
-rw-r--r--tools/perf/tests/attr.c2
-rw-r--r--tools/perf/tests/attr/README2
-rw-r--r--tools/perf/tests/attr/system-wide-dummy50
-rw-r--r--tools/perf/tests/attr/test-record-C012
-rw-r--r--tools/perf/tests/attr/test-record-group229
-rw-r--r--tools/perf/tests/attr/test-record-pfm-period9
-rw-r--r--tools/perf/tests/bp_signal.c5
-rw-r--r--tools/perf/tests/bpf.c2
-rw-r--r--tools/perf/tests/builtin-test.c81
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/demangle-java-test.c42
-rw-r--r--tools/perf/tests/dwarf-unwind.c11
-rw-r--r--tools/perf/tests/event-times.c8
-rw-r--r--tools/perf/tests/event_update.c2
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c25
-rw-r--r--tools/perf/tests/evsel-tp-sched.c10
-rw-r--r--tools/perf/tests/expand-cgroup.c241
-rw-r--r--tools/perf/tests/expr.c59
-rw-r--r--tools/perf/tests/fdarray.c22
-rw-r--r--tools/perf/tests/hists_cumulate.c10
-rw-r--r--tools/perf/tests/hists_filter.c2
-rw-r--r--tools/perf/tests/hists_output.c10
-rw-r--r--tools/perf/tests/make12
-rw-r--r--tools/perf/tests/mmap-basic.c8
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c14
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c10
-rw-r--r--tools/perf/tests/openat-syscall.c10
-rw-r--r--tools/perf/tests/parse-events.c259
-rw-r--r--tools/perf/tests/parse-metric.c357
-rw-r--r--tools/perf/tests/pe-file-parsing.c98
-rw-r--r--tools/perf/tests/pe-file.c14
-rw-r--r--tools/perf/tests/pe-file.exebin0 -> 75595 bytes
-rw-r--r--tools/perf/tests/pe-file.exe.debugbin0 -> 141644 bytes
-rw-r--r--tools/perf/tests/perf-record.c10
-rw-r--r--tools/perf/tests/pfm.c203
-rw-r--r--tools/perf/tests/pmu-events.c297
-rw-r--r--tools/perf/tests/pmu.c5
-rw-r--r--tools/perf/tests/python-use.c1
-rw-r--r--tools/perf/tests/sample-parsing.c6
-rw-r--r--tools/perf/tests/sdt.c6
-rwxr-xr-xtools/perf/tests/shell/buildid.sh101
-rwxr-xr-xtools/perf/tests/shell/record+script_probe_vfs_getname.sh4
-rwxr-xr-xtools/perf/tests/shell/record+zstd_comp_decomp.sh3
-rwxr-xr-xtools/perf/tests/shell/test_arm_coresight.sh183
-rw-r--r--tools/perf/tests/sw-clock.c2
-rw-r--r--tools/perf/tests/switch-tracking.c14
-rw-r--r--tools/perf/tests/tests.h12
-rw-r--r--tools/perf/tests/topology.c12
-rwxr-xr-xtools/perf/trace/beauty/arch_errno_names.sh4
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h442
-rw-r--r--tools/perf/trace/beauty/mmap.c74
-rwxr-xr-xtools/perf/trace/beauty/mmap_flags.sh16
-rwxr-xr-xtools/perf/trace/beauty/mmap_prot.sh30
-rwxr-xr-xtools/perf/trace/beauty/mremap_flags.sh18
-rw-r--r--tools/perf/trace/beauty/sockaddr.c9
-rwxr-xr-xtools/perf/trace/beauty/socket.sh24
-rw-r--r--tools/perf/trace/beauty/statx.c1
-rw-r--r--tools/perf/ui/browsers/annotate.c2
-rw-r--r--tools/perf/ui/browsers/hists.c57
-rw-r--r--tools/perf/ui/gtk/annotate.c2
-rw-r--r--tools/perf/ui/gtk/hists.c6
-rw-r--r--tools/perf/ui/hist.c16
-rw-r--r--tools/perf/util/Build76
-rw-r--r--tools/perf/util/annotate.c61
-rw-r--r--tools/perf/util/annotate.h4
-rw-r--r--tools/perf/util/arm-spe-decoder/Build1
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c219
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h82
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c (renamed from tools/perf/util/arm-spe-pkt-decoder.c)0
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h (renamed from tools/perf/util/arm-spe-pkt-decoder.h)16
-rw-r--r--tools/perf/util/arm-spe.c832
-rw-r--r--tools/perf/util/auxtrace.c197
-rw-r--r--tools/perf/util/auxtrace.h62
-rw-r--r--tools/perf/util/bpf-event.c96
-rw-r--r--tools/perf/util/bpf-event.h7
-rw-r--r--tools/perf/util/bpf-loader.c16
-rw-r--r--tools/perf/util/bpf-prologue.c14
-rw-r--r--tools/perf/util/branch.h21
-rw-r--r--tools/perf/util/build-id.c67
-rw-r--r--tools/perf/util/build-id.h12
-rw-r--r--tools/perf/util/callchain.c113
-rw-r--r--tools/perf/util/callchain.h18
-rw-r--r--tools/perf/util/cap.h4
-rw-r--r--tools/perf/util/cgroup.c118
-rw-r--r--tools/perf/util/cgroup.h3
-rw-r--r--tools/perf/util/clockid.c119
-rw-r--r--tools/perf/util/clockid.h11
-rw-r--r--tools/perf/util/cloexec.c6
-rw-r--r--tools/perf/util/config.c16
-rw-r--r--tools/perf/util/config.h2
-rw-r--r--tools/perf/util/counts.c10
-rw-r--r--tools/perf/util/counts.h7
-rw-r--r--tools/perf/util/cputopo.h2
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c2
-rw-r--r--tools/perf/util/cs-etm.c23
-rw-r--r--tools/perf/util/cs-etm.h3
-rw-r--r--tools/perf/util/data-convert-bt.c63
-rw-r--r--tools/perf/util/data-convert.h1
-rw-r--r--tools/perf/util/debug.c61
-rw-r--r--tools/perf/util/demangle-java.c13
-rw-r--r--tools/perf/util/dso.c45
-rw-r--r--tools/perf/util/dso.h24
-rw-r--r--tools/perf/util/dsos.c9
-rw-r--r--tools/perf/util/env.h17
-rw-r--r--tools/perf/util/event.c64
-rw-r--r--tools/perf/util/event.h9
-rw-r--r--tools/perf/util/evlist.c435
-rw-r--r--tools/perf/util/evlist.h75
-rw-r--r--tools/perf/util/evsel.c627
-rw-r--r--tools/perf/util/evsel.h345
-rw-r--r--tools/perf/util/evsel_config.h45
-rw-r--r--tools/perf/util/evsel_fprintf.c11
-rw-r--r--tools/perf/util/evsel_fprintf.h3
-rw-r--r--tools/perf/util/expr.c257
-rw-r--r--tools/perf/util/expr.h62
-rw-r--r--tools/perf/util/expr.l54
-rw-r--r--tools/perf/util/expr.y72
-rw-r--r--tools/perf/util/genelf_debug.c4
-rw-r--r--tools/perf/util/group.h8
-rw-r--r--tools/perf/util/hashmap.c238
-rw-r--r--tools/perf/util/hashmap.h180
-rw-r--r--tools/perf/util/header.c304
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/hist.c44
-rw-r--r--tools/perf/util/hist.h6
-rw-r--r--tools/perf/util/intel-bts.c16
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c214
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c2
-rw-r--r--tools/perf/util/intel-pt.c471
-rw-r--r--tools/perf/util/jitdump.c45
-rw-r--r--tools/perf/util/jitdump.h6
-rw-r--r--tools/perf/util/machine.c507
-rw-r--r--tools/perf/util/machine.h7
-rw-r--r--tools/perf/util/map.c46
-rw-r--r--tools/perf/util/map.h24
-rw-r--r--tools/perf/util/mem-events.c15
-rw-r--r--tools/perf/util/mem-events.h2
-rw-r--r--tools/perf/util/mem2node.c3
-rw-r--r--tools/perf/util/metricgroup.c893
-rw-r--r--tools/perf/util/metricgroup.h31
-rw-r--r--tools/perf/util/ordered-events.c2
-rw-r--r--tools/perf/util/ordered-events.h2
-rw-r--r--tools/perf/util/parse-events.c287
-rw-r--r--tools/perf/util/parse-events.h24
-rw-r--r--tools/perf/util/parse-events.l43
-rw-r--r--tools/perf/util/parse-events.y65
-rw-r--r--tools/perf/util/parse-sublevel-options.c70
-rw-r--r--tools/perf/util/parse-sublevel-options.h11
-rw-r--r--tools/perf/util/perf_api_probe.c174
-rw-r--r--tools/perf/util/perf_api_probe.h15
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c1
-rw-r--r--tools/perf/util/pfm.c281
-rw-r--r--tools/perf/util/pfm.h37
-rw-r--r--tools/perf/util/pmu.c170
-rw-r--r--tools/perf/util/pmu.h19
-rw-r--r--tools/perf/util/print_binary.c2
-rw-r--r--tools/perf/util/probe-event.c141
-rw-r--r--tools/perf/util/probe-file.c2
-rw-r--r--tools/perf/util/probe-finder.c69
-rw-r--r--tools/perf/util/probe-finder.h7
-rw-r--r--tools/perf/util/pstack.c2
-rw-r--r--tools/perf/util/python.c25
-rw-r--r--tools/perf/util/record.c196
-rw-r--r--tools/perf/util/record.h13
-rw-r--r--tools/perf/util/s390-cpumcf-kernel.h1
-rw-r--r--tools/perf/util/s390-cpumsf.c12
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c8
-rw-r--r--tools/perf/util/session.c62
-rw-r--r--tools/perf/util/sideband_evlist.c148
-rw-r--r--tools/perf/util/smt.c10
-rw-r--r--tools/perf/util/sort.c14
-rw-r--r--tools/perf/util/sort.h2
-rw-r--r--tools/perf/util/stat-display.c30
-rw-r--r--tools/perf/util/stat-shadow.c288
-rw-r--r--tools/perf/util/stat.c116
-rw-r--r--tools/perf/util/stat.h25
-rw-r--r--tools/perf/util/stream.c342
-rw-r--r--tools/perf/util/stream.h41
-rw-r--r--tools/perf/util/symbol-elf.c122
-rw-r--r--tools/perf/util/symbol-minimal.c31
-rw-r--r--tools/perf/util/symbol.c202
-rw-r--r--tools/perf/util/symbol.h13
-rw-r--r--tools/perf/util/synthetic-events.c189
-rw-r--r--tools/perf/util/syscalltbl.c4
-rw-r--r--tools/perf/util/syscalltbl.h14
-rw-r--r--tools/perf/util/thread-stack.c274
-rw-r--r--tools/perf/util/thread-stack.h11
-rw-r--r--tools/perf/util/thread.c24
-rw-r--r--tools/perf/util/thread.h15
-rw-r--r--tools/perf/util/tool.h3
-rw-r--r--tools/perf/util/top.c2
-rw-r--r--tools/perf/util/top.h3
-rw-r--r--tools/perf/util/topdown.c58
-rw-r--r--tools/perf/util/topdown.h12
-rw-r--r--tools/perf/util/trace-event-info.c2
-rw-r--r--tools/perf/util/trace-event-read.c2
-rw-r--r--tools/perf/util/tsc.c81
-rw-r--r--tools/perf/util/tsc.h5
-rw-r--r--tools/perf/util/unwind-libunwind-local.c2
-rw-r--r--tools/perf/util/util.c1
-rw-r--r--tools/perf/util/util.h6
-rw-r--r--tools/perf/util/zstd.c2
-rw-r--r--tools/power/acpi/Makefile2
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c13
-rw-r--r--tools/power/cpupower/lib/cpufreq.c10
-rw-r--r--tools/power/cpupower/man/cpupower-idle-info.12
-rw-r--r--tools/power/cpupower/man/cpupower-monitor.14
-rw-r--r--tools/power/cpupower/utils/cpufreq-set.c14
-rw-r--r--tools/power/cpupower/utils/cpupower-info.c2
-rw-r--r--tools/power/cpupower/utils/cpupower-set.c2
-rw-r--r--tools/power/cpupower/utils/helpers/bitmask.c6
-rw-r--r--tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c6
-rw-r--r--tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/nhm_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/snb_idle.c2
-rw-r--r--tools/power/pm-graph/README2
-rw-r--r--tools/power/pm-graph/config/custom-timeline-functions.cfg2
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py249
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c149
-rw-r--r--tools/power/x86/intel-speed-select/isst-core.c14
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c28
-rw-r--r--tools/power/x86/intel-speed-select/isst.h3
-rw-r--r--tools/spi/Makefile4
-rw-r--r--tools/spi/spidev_test.c31
-rw-r--r--tools/testing/ktest/examples/README2
-rw-r--r--tools/testing/ktest/examples/crosstests.conf2
-rwxr-xr-xtools/testing/ktest/ktest.pl103
-rw-r--r--tools/testing/ktest/sample.conf18
-rw-r--r--tools/testing/kunit/configs/broken_on_uml.config1
-rwxr-xr-xtools/testing/kunit/kunit.py311
-rw-r--r--tools/testing/kunit/kunit_config.py2
-rw-r--r--tools/testing/kunit/kunit_json.py63
-rw-r--r--tools/testing/kunit/kunit_kernel.py33
-rw-r--r--tools/testing/kunit/kunit_parser.py80
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py113
-rw-r--r--tools/testing/kunit/test_data/test_insufficient_memory.log0
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-all_passed.log1
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-crash.log1
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-failure.log1
-rw-r--r--tools/testing/nvdimm/dax-dev.c22
-rw-r--r--tools/testing/nvdimm/test/iomap.c2
-rw-r--r--tools/testing/nvdimm/test/nfit.c416
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h6
-rw-r--r--tools/testing/radix-tree/idr-test.c29
-rw-r--r--tools/testing/radix-tree/linux/kernel.h1
-rw-r--r--tools/testing/radix-tree/linux/local_lock.h8
-rw-r--r--tools/testing/radix-tree/test.h4
-rw-r--r--tools/testing/scatterlist/Makefile3
-rw-r--r--tools/testing/scatterlist/linux/mm.h35
-rw-r--r--tools/testing/scatterlist/main.c53
-rw-r--r--tools/testing/selftests/Makefile47
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore5
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile17
-rw-r--r--tools/testing/selftests/arm64/fp/README100
-rw-r--r--tools/testing/selftests/arm64/fp/asm-offsets.h11
-rw-r--r--tools/testing/selftests/arm64/fp/assembler.h57
-rwxr-xr-xtools/testing/selftests/arm64/fp/fpsimd-stress60
-rw-r--r--tools/testing/selftests/arm64/fp/fpsimd-test.S482
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c58
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace-asm.S33
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c336
-rwxr-xr-xtools/testing/selftests/arm64/fp/sve-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S672
-rw-r--r--tools/testing/selftests/arm64/fp/vlset.c155
-rw-r--r--tools/testing/selftests/arm64/mte/.gitignore6
-rw-r--r--tools/testing/selftests/arm64/mte/Makefile29
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c475
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c195
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c159
-rw-r--r--tools/testing/selftests/arm64/mte/check_mmap_options.c262
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c185
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c111
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c341
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.h118
-rw-r--r--tools/testing/selftests/arm64/mte/mte_def.h60
-rw-r--r--tools/testing/selftests/arm64/mte/mte_helper.S128
-rw-r--r--tools/testing/selftests/arm64/pauth/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/pauth/Makefile39
-rw-r--r--tools/testing/selftests/arm64/pauth/exec_target.c34
-rw-r--r--tools/testing/selftests/arm64/pauth/helper.c39
-rw-r--r--tools/testing/selftests/arm64/pauth/helper.h28
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c368
-rw-r--r--tools/testing/selftests/arm64/pauth/pac_corruptor.S19
-rw-r--r--tools/testing/selftests/arm64/signal/Makefile4
-rw-r--r--tools/testing/selftests/bpf/.gitignore8
-rw-r--r--tools/testing/selftests/bpf/Makefile121
-rw-r--r--tools/testing/selftests/bpf/README.rst104
-rw-r--r--tools/testing/selftests/bpf/bench.c464
-rw-r--r--tools/testing/selftests/bpf/bench.h81
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_count.c91
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c178
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c566
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c184
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_rename.sh9
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh75
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh9
-rw-r--r--tools/testing/selftests/bpf/bpf_legacy.h14
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h13
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c23
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.h8
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c14
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c245
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c (renamed from tools/testing/selftests/bpf/test_align.c)117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/autoload.c41
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c1074
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c110
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c (renamed from tools/testing/selftests/bpf/test_btf.c)418
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c132
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_endian.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c197
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c234
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c244
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c417
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c498
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_force_port.c166
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_autosize.c225
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_extern.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c372
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_retro.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/enable_stats.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/endian.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c294
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c169
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c622
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data_init.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/hashmap.c (renamed from tools/testing/selftests/bpf/test_hashmap.c)280
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c109
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/metadata.c141
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c66
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c110
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c116
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_access.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_md_access.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c96
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c172
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c239
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c102
-rw-r--r--tools/testing/selftests/bpf/prog_tests/section_names.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/signal_pending.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c1330
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_helpers.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf_btf.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c382
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c224
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subprogs.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c332
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c610
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c116
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpffs.c94
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_global_funcs.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_local_storage.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_profiler.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_ext.c111
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c75
-rw-r--r--tools/testing/selftests/bpf/prog_tests/udp_limit.c75
-rw-r--r--tools/testing/selftests/bpf/prog_tests/varlen.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c124
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c70
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c151
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_noinline.c52
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c7
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c32
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h130
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c40
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c115
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c28
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c46
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c50
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c34
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c57
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c64
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c59
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c50
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c34
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c37
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c234
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c250
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c52
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c35
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c21
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h22
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c71
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c79
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h51
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c4
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf_data.c50
-rw-r--r--tools/testing/selftests/bpf/progs/btf_ptr.h27
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi.h13
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c33
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c57
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c57
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c97
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c125
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port4.c83
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port6.c94
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h423
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c22
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c27
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c22
-rw-r--r--tools/testing/selftests/bpf/progs/fmod_ret_freplace.c14
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_attach_probe.c40
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_cls_redirect.c34
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c19
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_get_constant.c15
-rw-r--r--tools/testing/selftests/bpf/progs/load_bytes_relative.c48
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c140
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c64
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c694
-rw-r--r--tools/testing/selftests/bpf/progs/metadata_unused.c15
-rw-r--r--tools/testing/selftests/bpf/progs/metadata_used.c15
-rw-r--r--tools/testing/selftests/bpf/progs/netif_receive_skb.c249
-rw-r--r--tools/testing/selftests/bpf/progs/perf_event_stackmap.c59
-rw-r--r--tools/testing/selftests/bpf/progs/perfbuf_bench.c33
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.h177
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h969
-rw-r--r--tools/testing/selftests/bpf/progs/profiler1.c6
-rw-r--r--tools/testing/selftests/bpf/progs/profiler2.c6
-rw-r--r--tools/testing/selftests/bpf/progs/profiler3.c6
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h11
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf_subprogs.c5
-rw-r--r--tools/testing/selftests/bpf/progs/ringbuf_bench.c60
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c54
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h30
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_subprogs.c10
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall1.c28
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall2.c14
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c4
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c38
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c41
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c61
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_autoload.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_map_in_map.c150
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c174
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c1068
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.h54
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_autosize.c172
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c72
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c110
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c115
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_retro.c43
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_enable_stats.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_endian.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf.c55
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c325
-rw-r--r--tools/testing/selftests/bpf/progs/test_obj_id.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_buffer.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_access.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c78
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_multi.c77
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign.c82
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c627
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_helpers.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c21
-rw-r--r--tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c (renamed from tools/testing/selftests/bpf/progs/test_sock_fields_kern.c)176
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h (renamed from tools/testing/selftests/bpf/test_sockmap_kern.h)226
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_update.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs.c103
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh.c149
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c155
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_peer.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c626
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_trace_ext.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_varlen.c158
-rw-r--r--tools/testing/selftests/bpf/progs/test_vmlinux.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c33
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c (renamed from tools/testing/selftests/bpf/progs/test_adjust_tail.c)12
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c43
-rw-r--r--tools/testing/selftests/bpf/progs/trace_printk.c21
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c54
-rw-r--r--tools/testing/selftests/bpf/progs/udp_limit.c61
-rw-r--r--tools/testing/selftests/bpf/settings1
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py2
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py2
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh21
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_metadata.sh82
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c17
-rw-r--r--tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c1
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c15
-rwxr-xr-xtools/testing/selftests/bpf/test_kmod.sh12
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh2
-rw-r--r--tools/testing/selftests/bpf/test_maps.c66
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c21
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py3
-rw-r--r--tools/testing/selftests/bpf/test_progs.c188
-rw-r--r--tools/testing/selftests/bpf/test_progs.h99
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c8
-rw-r--r--tools/testing/selftests/bpf/test_sock.c8
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c46
-rw-r--r--tools/testing/selftests/bpf/test_sock_fields.c490
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c10
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c1085
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c8
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_redirect.sh216
-rw-r--r--tools/testing/selftests/bpf/test_tcp_hdr_options.h152
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c8
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c21
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c82
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh84
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c80
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h8
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/verifier/and.c20
-rw-r--r--tools/testing/selftests/bpf/verifier/array_access.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/basic.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c152
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c24
-rw-r--r--tools/testing/selftests/bpf/verifier/const_or.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c492
-rw-r--r--tools/testing/selftests/bpf/verifier/d_path.c37
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c10
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_packet_access.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_value_access.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/event_output.c25
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c46
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_value_access.c42
-rw-r--r--tools/testing/selftests/bpf/verifier/ld_imm64.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c94
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr_mixing.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/prevent_map_lookup.c30
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c80
-rw-r--r--tools/testing/selftests/bpf/verifier/regalloc.c269
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c142
-rw-r--r--tools/testing/selftests/bpf/verifier/value_or_null.c19
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c46
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c53
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c450
-rw-r--r--tools/testing/selftests/clone3/.gitignore1
-rw-r--r--tools/testing/selftests/clone3/Makefile4
-rw-r--r--tools/testing/selftests/clone3/clone3.c47
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c182
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c5
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h24
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c6
-rw-r--r--tools/testing/selftests/core/.gitignore1
-rw-r--r--tools/testing/selftests/core/Makefile7
-rw-r--r--tools/testing/selftests/core/close_range_test.c227
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh688
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh33
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh35
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh20
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh33
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh379
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh403
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_ets.sh6
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh130
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh22
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh2
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh2
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh186
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh108
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh100
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh394
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh49
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh108
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh953
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh316
-rw-r--r--tools/testing/selftests/exec/.gitignore3
-rw-r--r--tools/testing/selftests/exec/Makefile15
-rwxr-xr-xtools/testing/selftests/exec/binfmt_script171
-rw-r--r--tools/testing/selftests/exec/execveat.c8
-rw-r--r--tools/testing/selftests/exec/load_address.c68
-rw-r--r--tools/testing/selftests/exec/non-regular.c196
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c284
-rw-r--r--tools/testing/selftests/firmware/.gitignore2
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh91
-rw-r--r--tools/testing/selftests/firmware/settings8
-rw-r--r--tools/testing/selftests/fpu/.gitignore2
-rw-r--r--tools/testing/selftests/fpu/Makefile9
-rwxr-xr-xtools/testing/selftests/fpu/run_test_fpu.sh46
-rw-r--r--tools/testing/selftests/fpu/test_fpu.c61
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest11
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc8
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-enable.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-pid.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc14
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc8
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc8
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc12
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc8
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions36
-rw-r--r--tools/testing/selftests/ftrace/test.d/instances/instance-event.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/instances/instance.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc20
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc25
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc21
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/profile.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/template4
-rw-r--r--tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc19
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc8
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc15
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc31
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc19
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc16
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc16
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc16
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc23
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc23
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc31
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc26
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc11
-rwxr-xr-xtools/testing/selftests/gen_kselftest_tar.sh5
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh42
-rw-r--r--tools/testing/selftests/kselftest.h108
-rw-r--r--tools/testing/selftests/kselftest/runner.sh8
-rw-r--r--tools/testing/selftests/kselftest_harness.h410
-rw-r--r--tools/testing/selftests/kvm/.gitignore5
-rw-r--r--tools/testing/selftests/kvm/Makefile11
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h30
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h11
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm_util.h11
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h26
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c165
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h8
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c5
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c10
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c9
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c408
-rw-r--r--tools/testing/selftests/kvm/x86_64/debug_regs.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c104
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_memory_region_test.c141
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c30
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c69
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c168
-rw-r--r--tools/testing/selftests/kvm/x86_64/user_msr_test.c248
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c259
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c42
-rw-r--r--tools/testing/selftests/lib.mk29
-rw-r--r--tools/testing/selftests/lib/config1
-rw-r--r--tools/testing/selftests/livepatch/README16
-rw-r--r--tools/testing/selftests/livepatch/functions.sh40
-rwxr-xr-xtools/testing/selftests/livepatch/test-callbacks.sh84
-rwxr-xr-xtools/testing/selftests/livepatch/test-ftrace.sh6
-rwxr-xr-xtools/testing/selftests/livepatch/test-livepatch.sh12
-rwxr-xr-xtools/testing/selftests/livepatch/test-shadow-vars.sh85
-rwxr-xr-xtools/testing/selftests/livepatch/test-state.sh21
-rwxr-xr-xtools/testing/selftests/lkdtm/run.sh8
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt3
-rw-r--r--tools/testing/selftests/mincore/.gitignore2
-rw-r--r--tools/testing/selftests/mincore/Makefile6
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c361
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile6
-rw-r--r--tools/testing/selftests/net/config7
-rwxr-xr-xtools/testing/selftests/net/devlink_port_split.py277
-rwxr-xr-xtools/testing/selftests/net/drop_monitor_tests.sh215
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_multiprefix.sh2
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh627
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh107
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool.sh19
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_extended_state.sh102
-rw-r--r--tools/testing/selftests/net/forwarding/ethtool_lib.sh17
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample3
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh43
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh73
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_l4port.sh198
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh492
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh26
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_police.sh333
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_asymmetric.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_symmetric.sh10
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh2
-rw-r--r--tools/testing/selftests/net/ip_defrag.c8
-rwxr-xr-xtools/testing/selftests/net/ip_defrag.sh2
-rw-r--r--tools/testing/selftests/net/ipsec.c2195
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile3
-rw-r--r--tools/testing/selftests/net/mptcp/config3
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh121
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c47
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh133
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh259
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh293
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c5
-rw-r--r--tools/testing/selftests/net/nettest.c2
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh468
-rw-r--r--tools/testing/selftests/net/psock_fanout.c3
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh16
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh120
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c126
-rwxr-xr-xtools/testing/selftests/net/rxtimestamp.sh4
-rw-r--r--tools/testing/selftests/net/so_txtime.c35
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c48
-rw-r--r--tools/testing/selftests/net/timestamping.c10
-rw-r--r--tools/testing/selftests/net/tls.c151
-rw-r--r--tools/testing/selftests/net/txtimestamp.c10
-rwxr-xr-xtools/testing/selftests/net/txtimestamp.sh2
-rwxr-xr-xtools/testing/selftests/net/vrf-xfrm-tests.sh436
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh626
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh396
-rw-r--r--tools/testing/selftests/netfilter/.gitignore2
-rw-r--r--tools/testing/selftests/netfilter/Makefile4
-rw-r--r--tools/testing/selftests/netfilter/nf-queue.c61
-rwxr-xr-xtools/testing/selftests/netfilter/nft_conntrack_helper.sh175
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh130
-rwxr-xr-xtools/testing/selftests/netfilter/nft_meta.sh142
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh70
-rwxr-xr-xtools/testing/selftests/ntb/ntb_test.sh2
-rw-r--r--tools/testing/selftests/pid_namespace/regression_enomem.c1
-rw-r--r--tools/testing/selftests/pidfd/.gitignore1
-rw-r--r--tools/testing/selftests/pidfd/Makefile3
-rw-r--r--tools/testing/selftests/pidfd/config6
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h13
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c6
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c560
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c55
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c304
-rw-r--r--tools/testing/selftests/powerpc/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c162
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c27
-rw-r--r--tools/testing/selftests/powerpc/copyloops/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile6
l---------tools/testing/selftests/powerpc/copyloops/copy_mc_64.S1
l---------tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S1
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_default_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_user_test.c2
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-basic.sh9
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-functions.sh11
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h77
-rw-r--r--tools/testing/selftests/powerpc/include/pkeys.h136
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h6
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h31
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile10
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_denormal.c38
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_preempt.c3
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_signal.c3
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_syscall.c7
-rw-r--r--tools/testing/selftests/powerpc/math/vsx_preempt.c2
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore4
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile14
-rw-r--r--tools/testing/selftests/powerpc/mm/bad_accesses.c29
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_exec_prot.c294
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_siginfo.c333
-rw-r--r--tools/testing/selftests/powerpc/mm/prot_sao.c10
-rw-r--r--tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c202
-rw-r--r--tools/testing/selftests/powerpc/mm/stack_expansion_signal.c118
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules1
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/Makefile8
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/README45
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gunz_test.c1028
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c433
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c316
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h56
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/crb.h155
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/nx.h38
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h95
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/nxu.h650
l---------tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h1
-rwxr-xr-xtools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh46
-rw-r--r--tools/testing/selftests/powerpc/pmu/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile8
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_stcx_fail.c164
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c7
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/trace.h4
-rw-r--r--tools/testing/selftests/powerpc/pmu/l3_bank_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.h1
-rw-r--r--tools/testing/selftests/powerpc/pmu/loop.S35
-rw-r--r--tools/testing/selftests/powerpc/pmu/per_event_excludes.c5
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c48
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c57
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tar.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c2
-rw-r--r--tools/testing/selftests/powerpc/security/rfi_flush.c38
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c13
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c174
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/stringloops/memcmp.c46
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S23
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/syscalls/rtas_filter.c285
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c11
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c10
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c10
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c9
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h3
-rw-r--r--tools/testing/selftests/powerpc/utils.c90
-rw-r--r--tools/testing/selftests/proc/.gitignore2
-rw-r--r--tools/testing/selftests/proc/Makefile2
-rw-r--r--tools/testing/selftests/proc/proc-fsconfig-hidepid.c50
-rw-r--r--tools/testing/selftests/proc/proc-multiple-procfs.c48
-rwxr-xr-xtools/testing/selftests/pstore/pstore_tests2
-rw-r--r--tools/testing/selftests/ptp/testptp.c57
-rw-r--r--tools/testing/selftests/ptrace/.gitignore1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/console-badness.sh16
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh23
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kcsan-collapse.sh22
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-check-branches.sh108
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh16
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh (renamed from tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh)6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh (renamed from tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh)14
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh71
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh38
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh27
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh110
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-transform.sh51
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh60
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh34
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE0110
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE0111
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE0211
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE051
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE102
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TINY (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/TINY)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/TREE)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/TREE54)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh)4
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT18
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/PREEMPT18
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh16
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT9
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/PREEMPT9
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh30
-rw-r--r--tools/testing/selftests/rcutorture/doc/initrd.txt36
-rw-r--r--tools/testing/selftests/rcutorture/doc/rcu-test-image.txt41
-rw-r--r--tools/testing/selftests/rseq/param_test.c223
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h57
-rwxr-xr-xtools/testing/selftests/rseq/run_param_test.sh2
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh93
-rw-r--r--tools/testing/selftests/seccomp/config1
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c80
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c1133
-rw-r--r--tools/testing/selftests/seccomp/settings1
-rw-r--r--tools/testing/selftests/sigaltstack/sas.c4
-rw-r--r--tools/testing/selftests/splice/.gitignore1
-rw-r--r--tools/testing/selftests/splice/Makefile4
-rw-r--r--tools/testing/selftests/splice/config1
-rw-r--r--tools/testing/selftests/splice/settings1
-rwxr-xr-xtools/testing/selftests/splice/short_splice_read.sh56
-rw-r--r--tools/testing/selftests/splice/splice_read.c57
-rw-r--r--tools/testing/selftests/sync/sync_test.c2
-rw-r--r--tools/testing/selftests/sysctl/config2
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh57
-rw-r--r--tools/testing/selftests/tc-testing/Makefile (renamed from tools/testing/selftests/tc-testing/bpf/Makefile)9
-rw-r--r--tools/testing/selftests/tc-testing/action.c (renamed from tools/testing/selftests/tc-testing/bpf/action.c)0
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json4
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/csum.json4
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json20
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json44
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh5
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc_batch.py6
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py2
-rw-r--r--tools/testing/selftests/timens/clock_nanosleep.c2
-rw-r--r--tools/testing/selftests/timens/timens.c2
-rw-r--r--tools/testing/selftests/timens/timens.h13
-rw-r--r--tools/testing/selftests/timens/timer.c5
-rw-r--r--tools/testing/selftests/timens/timerfd.c5
-rw-r--r--tools/testing/selftests/timers/Makefile1
-rw-r--r--tools/testing/selftests/timers/settings1
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh14
-rwxr-xr-xtools/testing/selftests/tpm2/test_space.sh9
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py56
-rw-r--r--tools/testing/selftests/tpm2/tpm2_tests.py39
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c1
-rw-r--r--tools/testing/selftests/vDSO/.gitignore2
-rw-r--r--tools/testing/selftests/vDSO/Makefile5
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c24
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.h31
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c4
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c54
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c (renamed from tools/testing/selftests/vDSO/vdso_test.c)10
-rw-r--r--tools/testing/selftests/vm/.gitignore3
-rw-r--r--tools/testing/selftests/vm/Makefile94
-rw-r--r--tools/testing/selftests/vm/compaction_test.c11
-rw-r--r--tools/testing/selftests/vm/config3
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c32
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c1480
-rw-r--r--tools/testing/selftests/vm/khugepaged.c1035
-rw-r--r--tools/testing/selftests/vm/map_hugetlb.c2
-rw-r--r--tools/testing/selftests/vm/mremap_dontunmap.c1
-rw-r--r--tools/testing/selftests/vm/pkey-helpers.h225
-rw-r--r--tools/testing/selftests/vm/pkey-powerpc.h133
-rw-r--r--tools/testing/selftests/vm/pkey-x86.h181
-rw-r--r--tools/testing/selftests/vm/protection_keys.c (renamed from tools/testing/selftests/x86/protection_keys.c)696
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests16
-rwxr-xr-xtools/testing/selftests/vm/test_hmm.sh97
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c296
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh13
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config1
-rw-r--r--tools/testing/selftests/x86/.gitignore1
-rw-r--r--tools/testing/selftests/x86/Makefile8
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c96
-rw-r--r--tools/testing/selftests/x86/fsgsbase_restore.c245
-rw-r--r--tools/testing/selftests/x86/helpers.h41
-rw-r--r--tools/testing/selftests/x86/pkey-helpers.h219
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c17
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c47
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c47
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c37
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c23
-rw-r--r--tools/usb/Build2
-rw-r--r--tools/usb/Makefile53
-rw-r--r--tools/usb/usbip/doc/usbip.84
-rw-r--r--tools/usb/usbip/doc/usbipd.82
-rw-r--r--tools/usb/usbip/libsrc/usbip_host_common.c2
-rwxr-xr-xtools/usb/usbip/vudc/vudc_server_example.sh2
-rw-r--r--tools/virtio/linux/kernel.h9
-rw-r--r--tools/virtio/linux/virtio.h5
-rw-r--r--tools/virtio/linux/virtio_config.h6
-rw-r--r--tools/virtio/virtio_test.c139
-rw-r--r--tools/virtio/vringh_test.c2
-rw-r--r--tools/vm/page-types.c2
-rw-r--r--tools/vm/page_owner_sort.c5
1452 files changed, 106353 insertions, 22151 deletions
diff --git a/tools/Makefile b/tools/Makefile
index bd778812e915..85af6ebbce91 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -67,6 +67,9 @@ cpupower: FORCE
cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE
$(call descend,$@)
+bpf/%: FORCE
+ $(call descend,$@)
+
liblockdep: FORCE
$(call descend,lib/lockdep)
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
index f9fd1325f5bd..506c06a6536f 100644
--- a/tools/arch/parisc/include/uapi/asm/mman.h
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -39,6 +39,5 @@
#define MADV_SOFT_OFFLINE 101
/* MAP_32BIT is undefined on parisc, fix it for perf */
#define MAP_32BIT 0
-/* MAP_UNINITIALIZED is undefined on parisc, fix it for perf */
#define MAP_UNINITIALIZED 0
#endif
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 264e266a85bf..c3af3f324c5a 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
+/* POWER10 registers */
+#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
+#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
+#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index f599064dd8dc..bdf5f10f8b9f 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -48,6 +48,24 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MMCRA,
- PERF_REG_POWERPC_MAX,
+ /* Extended registers */
+ PERF_REG_POWERPC_MMCR0,
+ PERF_REG_POWERPC_MMCR1,
+ PERF_REG_POWERPC_MMCR2,
+ PERF_REG_POWERPC_MMCR3,
+ PERF_REG_POWERPC_SIER2,
+ PERF_REG_POWERPC_SIER3,
+ /* Max regs without the extended regs */
+ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
};
+
+#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
+#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
+#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK)
+
+#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1)
+#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1)
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/arch/riscv/include/uapi/asm/unistd.h b/tools/arch/riscv/include/uapi/asm/unistd.h
index 0e2eeeb1fd27..f506cca520b0 100644
--- a/tools/arch/riscv/include/uapi/asm/unistd.h
+++ b/tools/arch/riscv/include/uapi/asm/unistd.h
@@ -12,7 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#ifdef __LP64__
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 436ec7636927..7a6b14874d65 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -231,11 +231,13 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
+#define KVM_SYNC_DIAG318 (1UL << 12)
#define KVM_SYNC_S390_VALID_FIELDS \
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
- KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
+ KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
+ KVM_SYNC_DIAG318)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
@@ -264,7 +266,8 @@ struct kvm_sync_regs {
__u8 reserved2 : 7;
__u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
- __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
+ __u64 diag318; /* diagnose 0x318 info */
+ __u8 padding2[184]; /* sdnx needs to be 256byte aligned */
union {
__u8 sdnx[SDNXL]; /* state description annex */
struct {
diff --git a/tools/arch/sh/include/asm/barrier.h b/tools/arch/sh/include/asm/barrier.h
index bde5221af282..7eaea27cdd67 100644
--- a/tools/arch/sh/include/asm/barrier.h
+++ b/tools/arch/sh/include/asm/barrier.h
@@ -22,7 +22,7 @@
* Historically we have only done this type of barrier for the MMUCR, but
* it's also necessary for the CCR, so we make it generic here instead.
*/
-#if defined(__SH4A__) || defined(__SH5__)
+#if defined(__SH4A__)
#define mb() __asm__ __volatile__ ("synco": : :"memory")
#define rmb() mb()
#define wmb() mb()
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index db189945e9b0..2901d5df4366 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -96,6 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
+/* free ( 3*32+17) */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -107,6 +108,7 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+/* free ( 3*32+29) */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
@@ -362,9 +364,12 @@
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */
#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
+#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
@@ -407,5 +412,6 @@
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index 2ccd588fbad4..000000000000
--- a/tools/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 12c9684d59ba..2859ee4f39a8 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -128,6 +128,10 @@
#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
+/* SRBDS support */
+#define MSR_IA32_MCU_OPT_CTRL 0x00000123
+#define RNGDS_MITG_DIS BIT(0)
+
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
@@ -145,6 +149,10 @@
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
+
+#define MSR_IA32_POWER_CTL 0x000001fc
+#define MSR_IA32_POWER_CTL_BIT_EE 19
+
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
@@ -154,7 +162,23 @@
#define LBR_INFO_MISPRED BIT_ULL(63)
#define LBR_INFO_IN_TX BIT_ULL(62)
#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60)
#define LBR_INFO_CYCLES 0xffff
+#define LBR_INFO_BR_TYPE_OFFSET 56
+#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+
+#define MSR_ARCH_LBR_CTL 0x000014ce
+#define ARCH_LBR_CTL_LBREN BIT(0)
+#define ARCH_LBR_CTL_CPL_OFFSET 1
+#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
+#define ARCH_LBR_CTL_STACK_OFFSET 3
+#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET)
+#define ARCH_LBR_CTL_FILTER_OFFSET 16
+#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET)
+#define MSR_ARCH_LBR_DEPTH 0x000014cf
+#define MSR_ARCH_LBR_FROM_0 0x00001500
+#define MSR_ARCH_LBR_TO_0 0x00001600
+#define MSR_ARCH_LBR_INFO_0 0x00001200
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_PEBS_DATA_CFG 0x000003f2
@@ -249,8 +273,6 @@
#define MSR_PEBS_FRONTEND 0x000003f7
-#define MSR_IA32_POWER_CTL 0x000001fc
-
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
@@ -301,6 +323,9 @@
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
+#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b
+#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299
+
/* Config TDP MSRs */
#define MSR_CONFIG_TDP_NOMINAL 0x00000648
#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
@@ -411,7 +436,6 @@
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
-#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_AMD_PERF_STATUS 0xc0010063
@@ -420,6 +444,7 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
@@ -459,6 +484,8 @@
#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
/* Fam 15h MSRs */
+#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL
#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2)
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index 6e060907c163..fdbffec4cfde 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -39,28 +39,6 @@
#define ORC_REG_SP_INDIRECT 9
#define ORC_REG_MAX 15
-/*
- * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
- * caller's SP right before it made the call). Used for all callable
- * functions, i.e. all C code and all callable asm functions.
- *
- * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
- * to a fully populated pt_regs from a syscall, interrupt, or exception.
- *
- * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
- * points to the iret return frame.
- *
- * The UNWIND_HINT macros are used only for the unwind_hint struct. They
- * aren't used in struct orc_entry due to size and complexity constraints.
- * Objtool converts them to real types when it converts the hints to orc
- * entries.
- */
-#define ORC_TYPE_CALL 0
-#define ORC_TYPE_REGS 1
-#define ORC_TYPE_REGS_IRET 2
-#define UNWIND_HINT_TYPE_SAVE 3
-#define UNWIND_HINT_TYPE_RESTORE 4
-
#ifndef __ASSEMBLY__
/*
* This struct is more or less a vastly simplified version of the DWARF Call
@@ -79,19 +57,6 @@ struct orc_entry {
unsigned end:1;
} __packed;
-/*
- * This struct is used by asm and inline asm code to manually annotate the
- * location of registers on the stack for the ORC unwinder.
- *
- * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
- */
-struct unwind_hint {
- u32 ip;
- s16 sp_offset;
- u8 sp_reg;
- u8 type;
- u8 end;
-};
#endif /* __ASSEMBLY__ */
#endif /* _ORC_TYPES_H */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c65..0780f97c1850 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -385,18 +385,23 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
#define KVM_STATE_NESTED_FORMAT_VMX 0
-#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */
+#define KVM_STATE_NESTED_FORMAT_SVM 1
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
+#define KVM_STATE_NESTED_GIF_SET 0x00000100
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
+#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000
+
+#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -409,6 +414,18 @@ struct kvm_vmx_nested_state_hdr {
struct {
__u16 flags;
} smm;
+
+ __u32 flags;
+ __u64 preemption_timer_deadline;
+};
+
+struct kvm_svm_nested_state_data {
+ /* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */
+ __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
+};
+
+struct kvm_svm_nested_state_hdr {
+ __u64 vmcb_pa;
};
/* for KVM_CAP_NESTED_STATE */
@@ -419,6 +436,7 @@ struct kvm_nested_state {
union {
struct kvm_vmx_nested_state_hdr vmx;
+ struct kvm_svm_nested_state_hdr svm;
/* Pad the header to 128 bytes. */
__u8 pad[120];
@@ -431,6 +449,7 @@ struct kvm_nested_state {
*/
union {
struct kvm_vmx_nested_state_data vmx[0];
+ struct kvm_svm_nested_state_data svm[0];
} data;
};
diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h
index 30d7d04d72d6..be5e2e747f50 100644
--- a/tools/arch/x86/include/uapi/asm/unistd.h
+++ b/tools/arch/x86/include/uapi/asm/unistd.h
@@ -2,7 +2,14 @@
#ifndef _UAPI_ASM_X86_UNISTD_H
#define _UAPI_ASM_X86_UNISTD_H
-/* x32 syscall flag bit */
+/*
+ * x32 syscall flag bit. Some user programs expect syscall NR macros
+ * and __X32_SYSCALL_BIT to have type int, even though syscall numbers
+ * are, for practical purposes, unsigned long.
+ *
+ * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right
+ * thing regardless.
+ */
#define __X32_SYSCALL_BIT 0x40000000
#ifndef __KERNEL__
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index e95b72ec19bc..b8ff9e8ac0d5 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -150,6 +150,9 @@
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
{ EXIT_REASON_TPAUSE, "TPAUSE" }
+#define VMX_EXIT_REASON_FLAGS \
+ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
+
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index df767afc690f..0b5b8ae56bd9 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,10 +4,11 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
+.pushsection .noinstr.text, "ax"
+
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
@@ -184,116 +185,4 @@ SYM_FUNC_START(memcpy_orig)
retq
SYM_FUNC_END(memcpy_orig)
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
- cmpl $8, %edx
- /* Less than 8 bytes? Go to byte copy loop */
- jb .L_no_whole_words
-
- /* Check for bad alignment of source */
- testl $7, %esi
- /* Already aligned */
- jz .L_8byte_aligned
-
- /* Copy one byte at a time until source is 8-byte aligned */
- movl %esi, %ecx
- andl $7, %ecx
- subl $8, %ecx
- negl %ecx
- subl %ecx, %edx
-.L_read_leading_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
- movl %edx, %ecx
- andl $7, %edx
- shrl $3, %ecx
- jz .L_no_whole_words
-
-.L_read_words:
- movq (%rsi), %r8
- MCSAFE_TEST_SRC %rsi 8 .E_read_words
- MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
- movq %r8, (%rdi)
- addq $8, %rsi
- addq $8, %rdi
- decl %ecx
- jnz .L_read_words
-
- /* Any trailing bytes? */
-.L_no_whole_words:
- andl %edx, %edx
- jz .L_done_memcpy_trap
-
- /* Copy trailing bytes */
- movl %edx, %ecx
-.L_read_trailing_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_trailing_bytes
-
- /* Copy successful. Return zero */
-.L_done_memcpy_trap:
- xorl %eax, %eax
-.L_done:
- ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
- .section .fixup, "ax"
- /*
- * Return number of bytes not copied for any failure. Note that
- * there is no "tail" handling since the source buffer is 8-byte
- * aligned and poison is cacheline aligned.
- */
-.E_read_words:
- shll $3, %ecx
-.E_leading_bytes:
- addl %edx, %ecx
-.E_trailing_bytes:
- mov %ecx, %eax
- jmp .L_done
-
- /*
- * For write fault handling, given the destination is unaligned,
- * we handle faults on multi-byte writes with a byte-by-byte
- * copy up to the write-protected page.
- */
-.E_write_words:
- shll $3, %ecx
- addl %edx, %ecx
- movl %ecx, %edx
- jmp mcsafe_handle_tail
-
- .previous
-
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE(.L_write_words, .E_write_words)
- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
+.popsection
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index a42015b305f4..af38469afd14 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -362,6 +362,9 @@ function convert_operands(count,opnd, i,j,imm,mod)
END {
if (awkchecked != "")
exit 1
+
+ print "#ifndef __BOOT_COMPRESSED\n"
+
# print escape opcode map's array
print "/* Escape opcode map array */"
print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
@@ -388,6 +391,51 @@ END {
for (j = 0; j < max_lprefix; j++)
if (atable[i,j])
print " ["i"]["j"] = "atable[i,j]","
- print "};"
+ print "};\n"
+
+ print "#else /* !__BOOT_COMPRESSED */\n"
+
+ print "/* Escape opcode map array */"
+ print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "/* Group opcode map array */"
+ print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "/* AVX opcode map array */"
+ print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "static void inat_init_tables(void)"
+ print "{"
+
+ # print escape opcode map's array
+ print "\t/* Print Escape opcode map array */"
+ for (i = 0; i < geid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (etable[i,j])
+ print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";"
+ print ""
+
+ # print group opcode map's array
+ print "\t/* Print Group opcode map array */"
+ for (i = 0; i < ggid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (gtable[i,j])
+ print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";"
+ print ""
+ # print AVX opcode map's array
+ print "\t/* Print AVX opcode map array */"
+ for (i = 0; i < gaid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (atable[i,j])
+ print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
+
+ print "}"
+ print "#endif"
}
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 0efaf45f7367..eb92027817a7 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -14,13 +14,19 @@
#include <linux/kernel.h>
#include <linux/bootconfig.h>
-static int xbc_show_array(struct xbc_node *node)
+static int xbc_show_value(struct xbc_node *node, bool semicolon)
{
- const char *val;
+ const char *val, *eol;
+ char q;
int i = 0;
+ eol = semicolon ? ";\n" : "\n";
xbc_array_for_each_value(node, val) {
- printf("\"%s\"%s", val, node->next ? ", " : ";\n");
+ if (strchr(val, '"'))
+ q = '\'';
+ else
+ q = '"';
+ printf("%c%s%c%s", q, val, q, node->next ? ", " : eol);
i++;
}
return i;
@@ -48,10 +54,7 @@ static void xbc_show_compact_tree(void)
continue;
} else if (cnode && xbc_node_is_value(cnode)) {
printf("%s = ", xbc_node_get_data(node));
- if (cnode->next)
- xbc_show_array(cnode);
- else
- printf("\"%s\";\n", xbc_node_get_data(cnode));
+ xbc_show_value(cnode, true);
} else {
printf("%s;\n", xbc_node_get_data(node));
}
@@ -75,8 +78,28 @@ static void xbc_show_compact_tree(void)
}
}
+static void xbc_show_list(void)
+{
+ char key[XBC_KEYLEN_MAX];
+ struct xbc_node *leaf;
+ const char *val;
+ int ret = 0;
+
+ xbc_for_each_key_value(leaf, val) {
+ ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX);
+ if (ret < 0)
+ break;
+ printf("%s = ", key);
+ if (!val || val[0] == '\0') {
+ printf("\"\"\n");
+ continue;
+ }
+ xbc_show_value(xbc_node_get_child(leaf), false);
+ }
+}
+
/* Simple real checksum */
-int checksum(unsigned char *buf, int len)
+static int checksum(unsigned char *buf, int len)
{
int i, sum = 0;
@@ -88,7 +111,7 @@ int checksum(unsigned char *buf, int len)
#define PAGE_SIZE 4096
-int load_xbc_fd(int fd, char **buf, int size)
+static int load_xbc_fd(int fd, char **buf, int size)
{
int ret;
@@ -105,7 +128,7 @@ int load_xbc_fd(int fd, char **buf, int size)
}
/* Return the read size or -errno */
-int load_xbc_file(const char *path, char **buf)
+static int load_xbc_file(const char *path, char **buf)
{
struct stat stat;
int fd, ret;
@@ -124,7 +147,7 @@ int load_xbc_file(const char *path, char **buf)
return ret;
}
-int load_xbc_from_initrd(int fd, char **buf)
+static int load_xbc_from_initrd(int fd, char **buf)
{
struct stat stat;
int ret;
@@ -193,10 +216,55 @@ int load_xbc_from_initrd(int fd, char **buf)
return size;
}
-int show_xbc(const char *path)
+static void show_xbc_error(const char *data, const char *msg, int pos)
+{
+ int lin = 1, col, i;
+
+ if (pos < 0) {
+ pr_err("Error: %s.\n", msg);
+ return;
+ }
+
+ /* Note that pos starts from 0 but lin and col should start from 1. */
+ col = pos + 1;
+ for (i = 0; i < pos; i++) {
+ if (data[i] == '\n') {
+ lin++;
+ col = pos - i;
+ }
+ }
+ pr_err("Parse Error: %s at %d:%d\n", msg, lin, col);
+
+}
+
+static int init_xbc_with_error(char *buf, int len)
+{
+ char *copy = strdup(buf);
+ const char *msg;
+ int ret, pos;
+
+ if (!copy)
+ return -ENOMEM;
+
+ ret = xbc_init(buf, &msg, &pos);
+ if (ret < 0)
+ show_xbc_error(copy, msg, pos);
+ free(copy);
+
+ return ret;
+}
+
+static int show_xbc(const char *path, bool list)
{
int ret, fd;
char *buf = NULL;
+ struct stat st;
+
+ ret = stat(path, &st);
+ if (ret < 0) {
+ pr_err("Failed to stat %s: %d\n", path, -errno);
+ return -errno;
+ }
fd = open(path, O_RDONLY);
if (fd < 0) {
@@ -205,18 +273,33 @@ int show_xbc(const char *path)
}
ret = load_xbc_from_initrd(fd, &buf);
- if (ret < 0)
+ close(fd);
+ if (ret < 0) {
pr_err("Failed to load a boot config from initrd: %d\n", ret);
+ goto out;
+ }
+ /* Assume a bootconfig file if it is enough small */
+ if (ret == 0 && st.st_size <= XBC_DATA_MAX) {
+ ret = load_xbc_file(path, &buf);
+ if (ret < 0) {
+ pr_err("Failed to load a boot config: %d\n", ret);
+ goto out;
+ }
+ if (init_xbc_with_error(buf, ret) < 0)
+ goto out;
+ }
+ if (list)
+ xbc_show_list();
else
xbc_show_compact_tree();
-
- close(fd);
+ ret = 0;
+out:
free(buf);
return ret;
}
-int delete_xbc(const char *path)
+static int delete_xbc(const char *path)
{
struct stat stat;
int ret = 0, fd, size;
@@ -247,28 +330,7 @@ int delete_xbc(const char *path)
return ret;
}
-static void show_xbc_error(const char *data, const char *msg, int pos)
-{
- int lin = 1, col, i;
-
- if (pos < 0) {
- pr_err("Error: %s.\n", msg);
- return;
- }
-
- /* Note that pos starts from 0 but lin and col should start from 1. */
- col = pos + 1;
- for (i = 0; i < pos; i++) {
- if (data[i] == '\n') {
- lin++;
- col = pos - i;
- }
- }
- pr_err("Parse Error: %s at %d:%d\n", msg, lin, col);
-
-}
-
-int apply_xbc(const char *path, const char *xbc_path)
+static int apply_xbc(const char *path, const char *xbc_path)
{
u32 size, csum;
char *buf, *data;
@@ -345,14 +407,16 @@ out:
return ret;
}
-int usage(void)
+static int usage(void)
{
printf("Usage: bootconfig [OPTIONS] <INITRD>\n"
+ "Or bootconfig <CONFIG>\n"
" Apply, delete or show boot config to initrd.\n"
" Options:\n"
" -a <config>: Apply boot config to initrd\n"
- " -d : Delete boot config file from initrd\n\n"
- " If no option is given, show current applied boot config.\n");
+ " -d : Delete boot config file from initrd\n"
+ " -l : list boot config in initrd or file\n\n"
+ " If no option is given, show the bootconfig in the given file.\n");
return -1;
}
@@ -360,10 +424,10 @@ int main(int argc, char **argv)
{
char *path = NULL;
char *apply = NULL;
- bool delete = false;
+ bool delete = false, list = false;
int opt;
- while ((opt = getopt(argc, argv, "hda:")) != -1) {
+ while ((opt = getopt(argc, argv, "hda:l")) != -1) {
switch (opt) {
case 'd':
delete = true;
@@ -371,14 +435,17 @@ int main(int argc, char **argv)
case 'a':
apply = optarg;
break;
+ case 'l':
+ list = true;
+ break;
case 'h':
default:
return usage();
}
}
- if (apply && delete) {
- pr_err("Error: You can not specify both -a and -d at once.\n");
+ if ((apply && delete) || (delete && list) || (apply && list)) {
+ pr_err("Error: You can give one of -a, -d or -l at once.\n");
return usage();
}
@@ -394,5 +461,5 @@ int main(int argc, char **argv)
else if (delete)
return delete_xbc(path);
- return show_xbc(path);
+ return show_xbc(path, list);
}
diff --git a/tools/bootconfig/samples/bad-override.bconf b/tools/bootconfig/samples/bad-override.bconf
new file mode 100644
index 000000000000..fde6c561512e
--- /dev/null
+++ b/tools/bootconfig/samples/bad-override.bconf
@@ -0,0 +1,3 @@
+key.subkey = value
+# We can not override pre-defined subkeys with value
+key := value
diff --git a/tools/bootconfig/samples/bad-override2.bconf b/tools/bootconfig/samples/bad-override2.bconf
new file mode 100644
index 000000000000..688587cb023c
--- /dev/null
+++ b/tools/bootconfig/samples/bad-override2.bconf
@@ -0,0 +1,3 @@
+key = value
+# We can not override pre-defined value with subkey
+key.subkey := value
diff --git a/tools/bootconfig/samples/good-override.bconf b/tools/bootconfig/samples/good-override.bconf
new file mode 100644
index 000000000000..7d31d5f8fbd8
--- /dev/null
+++ b/tools/bootconfig/samples/good-override.bconf
@@ -0,0 +1,6 @@
+# Override the value
+key.word = 1,2,4
+key.word := 2,3
+
+# No pre-defined key
+key.new.word := "new"
diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh
new file mode 100755
index 000000000000..595e164dc352
--- /dev/null
+++ b/tools/bootconfig/scripts/bconf2ftrace.sh
@@ -0,0 +1,199 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+usage() {
+ echo "Ftrace boottime trace test tool"
+ echo "Usage: $0 [--apply|--init] [--debug] BOOTCONFIG-FILE"
+ echo " --apply: Test actual apply to tracefs (need sudo)"
+ echo " --init: Initialize ftrace before applying (imply --apply)"
+ exit 1
+}
+
+[ $# -eq 0 ] && usage
+
+BCONF=
+DEBUG=
+APPLY=
+INIT=
+while [ x"$1" != x ]; do
+ case "$1" in
+ "--debug")
+ DEBUG=$1;;
+ "--apply")
+ APPLY=$1;;
+ "--init")
+ APPLY=$1
+ INIT=$1;;
+ *)
+ [ ! -f $1 ] && usage
+ BCONF=$1;;
+ esac
+ shift 1
+done
+
+if [ x"$APPLY" != x ]; then
+ if [ `id -u` -ne 0 ]; then
+ echo "This must be run by root user. Try sudo." 1>&2
+ exec sudo $0 $DEBUG $APPLY $BCONF
+ fi
+fi
+
+run_cmd() { # command
+ echo "$*"
+ if [ x"$APPLY" != x ]; then # apply command
+ eval $*
+ fi
+}
+
+if [ x"$DEBUG" != x ]; then
+ set -x
+fi
+
+TRACEFS=`grep -m 1 -w tracefs /proc/mounts | cut -f 2 -d " "`
+if [ -z "$TRACEFS" ]; then
+ if ! grep -wq debugfs /proc/mounts; then
+ echo "Error: No tracefs/debugfs was mounted." 1>&2
+ exit 1
+ fi
+ TRACEFS=`grep -m 1 -w debugfs /proc/mounts | cut -f 2 -d " "`/tracing
+ if [ ! -d $TRACEFS ]; then
+ echo "Error: ftrace is not enabled on this kernel." 1>&2
+ exit 1
+ fi
+fi
+
+if [ x"$INIT" != x ]; then
+ . `dirname $0`/ftrace.sh
+ (cd $TRACEFS; initialize_ftrace)
+fi
+
+. `dirname $0`/xbc.sh
+
+######## main #########
+set -e
+
+xbc_init $BCONF
+
+set_value_of() { # key file
+ if xbc_has_key $1; then
+ val=`xbc_get_val $1 1`
+ run_cmd "echo '$val' >> $2"
+ fi
+}
+
+set_array_of() { # key file
+ if xbc_has_key $1; then
+ xbc_get_val $1 | while read line; do
+ run_cmd "echo '$line' >> $2"
+ done
+ fi
+}
+
+compose_synth() { # event_name branch
+ echo -n "$1 "
+ xbc_get_val $2 | while read field; do echo -n "$field; "; done
+}
+
+setup_event() { # prefix group event [instance]
+ branch=$1.$2.$3
+ if [ "$4" ]; then
+ eventdir="$TRACEFS/instances/$4/events/$2/$3"
+ else
+ eventdir="$TRACEFS/events/$2/$3"
+ fi
+ case $2 in
+ kprobes)
+ xbc_get_val ${branch}.probes | while read line; do
+ run_cmd "echo 'p:kprobes/$3 $line' >> $TRACEFS/kprobe_events"
+ done
+ ;;
+ synthetic)
+ run_cmd "echo '`compose_synth $3 ${branch}.fields`' >> $TRACEFS/synthetic_events"
+ ;;
+ esac
+
+ set_value_of ${branch}.filter ${eventdir}/filter
+ set_array_of ${branch}.actions ${eventdir}/trigger
+
+ if xbc_has_key ${branch}.enable; then
+ run_cmd "echo 1 > ${eventdir}/enable"
+ fi
+}
+
+setup_events() { # prefix("ftrace" or "ftrace.instance.INSTANCE") [instance]
+ prefix="${1}.event"
+ if xbc_has_branch ${1}.event; then
+ for grpev in `xbc_subkeys ${1}.event 2`; do
+ setup_event $prefix ${grpev%.*} ${grpev#*.} $2
+ done
+ fi
+}
+
+size2kb() { # size[KB|MB]
+ case $1 in
+ *KB)
+ echo ${1%KB};;
+ *MB)
+ expr ${1%MB} \* 1024;;
+ *)
+ expr $1 / 1024 ;;
+ esac
+}
+
+setup_instance() { # [instance]
+ if [ "$1" ]; then
+ instance="ftrace.instance.${1}"
+ instancedir=$TRACEFS/instances/$1
+ else
+ instance="ftrace"
+ instancedir=$TRACEFS
+ fi
+
+ set_array_of ${instance}.options ${instancedir}/trace_options
+ set_value_of ${instance}.trace_clock ${instancedir}/trace_clock
+ set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask
+ set_value_of ${instance}.tracer ${instancedir}/current_tracer
+ set_array_of ${instance}.ftrace.filters \
+ ${instancedir}/set_ftrace_filter
+ set_array_of ${instance}.ftrace.notrace \
+ ${instancedir}/set_ftrace_notrace
+
+ if xbc_has_key ${instance}.alloc_snapshot; then
+ run_cmd "echo 1 > ${instancedir}/snapshot"
+ fi
+
+ if xbc_has_key ${instance}.buffer_size; then
+ size=`xbc_get_val ${instance}.buffer_size 1`
+ size=`eval size2kb $size`
+ run_cmd "echo $size >> ${instancedir}/buffer_size_kb"
+ fi
+
+ setup_events ${instance} $1
+ set_array_of ${instance}.events ${instancedir}/set_event
+}
+
+# ftrace global configs (kernel.*)
+if xbc_has_key "kernel.dump_on_oops"; then
+ dump_mode=`xbc_get_val "kernel.dump_on_oops" 1`
+ [ "$dump_mode" ] && dump_mode=`eval echo $dump_mode` || dump_mode=1
+ run_cmd "echo \"$dump_mode\" > /proc/sys/kernel/ftrace_dump_on_oops"
+fi
+
+set_value_of kernel.fgraph_max_depth $TRACEFS/max_graph_depth
+set_array_of kernel.fgraph_filters $TRACEFS/set_graph_function
+set_array_of kernel.fgraph_notraces $TRACEFS/set_graph_notrace
+
+# Per-instance/per-event configs
+if ! xbc_has_branch "ftrace" ; then
+ exit 0
+fi
+
+setup_instance # root instance
+
+if xbc_has_branch "ftrace.instance"; then
+ for i in `xbc_subkeys "ftrace.instance" 1`; do
+ run_cmd "mkdir -p $TRACEFS/instances/$i"
+ setup_instance $i
+ done
+fi
+
diff --git a/tools/bootconfig/scripts/ftrace.sh b/tools/bootconfig/scripts/ftrace.sh
new file mode 100644
index 000000000000..186eed923041
--- /dev/null
+++ b/tools/bootconfig/scripts/ftrace.sh
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+clear_trace() { # reset trace output
+ echo > trace
+}
+
+disable_tracing() { # stop trace recording
+ echo 0 > tracing_on
+}
+
+enable_tracing() { # start trace recording
+ echo 1 > tracing_on
+}
+
+reset_tracer() { # reset the current tracer
+ echo nop > current_tracer
+}
+
+reset_trigger_file() {
+ # remove action triggers first
+ grep -H ':on[^:]*(' $@ |
+ while read line; do
+ cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+ file=`echo $line | cut -f1 -d:`
+ echo "!$cmd" >> $file
+ done
+ grep -Hv ^# $@ |
+ while read line; do
+ cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+ file=`echo $line | cut -f1 -d:`
+ echo "!$cmd" > $file
+ done
+}
+
+reset_trigger() { # reset all current setting triggers
+ if [ -d events/synthetic ]; then
+ reset_trigger_file events/synthetic/*/trigger
+ fi
+ reset_trigger_file events/*/*/trigger
+}
+
+reset_events_filter() { # reset all current setting filters
+ grep -v ^none events/*/*/filter |
+ while read line; do
+ echo 0 > `echo $line | cut -f1 -d:`
+ done
+}
+
+reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
+ if [ ! -f set_ftrace_filter ]; then
+ return 0
+ fi
+ echo > set_ftrace_filter
+ grep -v '^#' set_ftrace_filter | while read t; do
+ tr=`echo $t | cut -d: -f2`
+ if [ "$tr" = "" ]; then
+ continue
+ fi
+ if ! grep -q "$t" set_ftrace_filter; then
+ continue;
+ fi
+ name=`echo $t | cut -d: -f1 | cut -d' ' -f1`
+ if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
+ tr=`echo $t | cut -d: -f2-4`
+ limit=`echo $t | cut -d: -f5`
+ else
+ tr=`echo $t | cut -d: -f2`
+ limit=`echo $t | cut -d: -f3`
+ fi
+ if [ "$limit" != "unlimited" ]; then
+ tr="$tr:$limit"
+ fi
+ echo "!$name:$tr" > set_ftrace_filter
+ done
+}
+
+disable_events() {
+ echo 0 > events/enable
+}
+
+clear_synthetic_events() { # reset all current synthetic events
+ grep -v ^# synthetic_events |
+ while read line; do
+ echo "!$line" >> synthetic_events
+ done
+}
+
+initialize_ftrace() { # Reset ftrace to initial-state
+# As the initial state, ftrace will be set to nop tracer,
+# no events, no triggers, no filters, no function filters,
+# no probes, and tracing on.
+ disable_tracing
+ reset_tracer
+ reset_trigger
+ reset_events_filter
+ reset_ftrace_filter
+ disable_events
+ [ -f set_event_pid ] && echo > set_event_pid
+ [ -f set_ftrace_pid ] && echo > set_ftrace_pid
+ [ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
+ [ -f set_graph_function ] && echo | tee set_graph_*
+ [ -f stack_trace_filter ] && echo > stack_trace_filter
+ [ -f kprobe_events ] && echo > kprobe_events
+ [ -f uprobe_events ] && echo > uprobe_events
+ [ -f synthetic_events ] && echo > synthetic_events
+ [ -f snapshot ] && echo 0 > snapshot
+ clear_trace
+ enable_tracing
+}
diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh
new file mode 100755
index 000000000000..6c0d4b61e0c2
--- /dev/null
+++ b/tools/bootconfig/scripts/ftrace2bconf.sh
@@ -0,0 +1,244 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+usage() {
+ echo "Dump boot-time tracing bootconfig from ftrace"
+ echo "Usage: $0 [--debug] [ > BOOTCONFIG-FILE]"
+ exit 1
+}
+
+DEBUG=
+while [ x"$1" != x ]; do
+ case "$1" in
+ "--debug")
+ DEBUG=$1;;
+ -*)
+ usage
+ ;;
+ esac
+ shift 1
+done
+
+if [ x"$DEBUG" != x ]; then
+ set -x
+fi
+
+TRACEFS=`grep -m 1 -w tracefs /proc/mounts | cut -f 2 -d " "`
+if [ -z "$TRACEFS" ]; then
+ if ! grep -wq debugfs /proc/mounts; then
+ echo "Error: No tracefs/debugfs was mounted."
+ exit 1
+ fi
+ TRACEFS=`grep -m 1 -w debugfs /proc/mounts | cut -f 2 -d " "`/tracing
+ if [ ! -d $TRACEFS ]; then
+ echo "Error: ftrace is not enabled on this kernel." 1>&2
+ exit 1
+ fi
+fi
+
+######## main #########
+
+set -e
+
+emit_kv() { # key =|+= value
+ echo "$@"
+}
+
+global_options() {
+ val=`cat $TRACEFS/max_graph_depth`
+ [ $val != 0 ] && emit_kv kernel.fgraph_max_depth = $val
+ if grep -qv "^#" $TRACEFS/set_graph_function $TRACEFS/set_graph_notrace ; then
+ cat 1>&2 << EOF
+# WARN: kernel.fgraph_filters and kernel.fgraph_notrace are not supported, since the wild card expression was expanded and lost from memory.
+EOF
+ fi
+}
+
+kprobe_event_options() {
+ cat $TRACEFS/kprobe_events | while read p args; do
+ case $p in
+ r*)
+ cat 1>&2 << EOF
+# WARN: A return probe found but it is not supported by bootconfig. Skip it.
+EOF
+ continue;;
+ esac
+ p=${p#*:}
+ event=${p#*/}
+ group=${p%/*}
+ if [ $group != "kprobes" ]; then
+ cat 1>&2 << EOF
+# WARN: kprobes group name $group is changed to "kprobes" for bootconfig.
+EOF
+ fi
+ emit_kv $PREFIX.event.kprobes.$event.probes += $args
+ done
+}
+
+synth_event_options() {
+ cat $TRACEFS/synthetic_events | while read event fields; do
+ emit_kv $PREFIX.event.synthetic.$event.fields = `echo $fields | sed "s/;/,/g"`
+ done
+}
+
+# Variables resolver
+DEFINED_VARS=
+UNRESOLVED_EVENTS=
+
+defined_vars() { # event-dir
+ grep "^hist" $1/trigger | grep -o ':[a-zA-Z0-9]*='
+}
+referred_vars() {
+ grep "^hist" $1/trigger | grep -o '$[a-zA-Z0-9]*'
+}
+
+per_event_options() { # event-dir
+ evdir=$1
+ # Check the special event which has no filter and no trigger
+ [ ! -f $evdir/filter ] && return
+
+ if grep -q "^hist:" $evdir/trigger; then
+ # hist action can refer the undefined variables
+ __vars=`defined_vars $evdir`
+ for v in `referred_vars $evdir`; do
+ if echo $DEFINED_VARS $__vars | grep -vqw ${v#$}; then
+ # $v is not defined yet, defer it
+ UNRESOLVED_EVENTS="$UNRESOLVED_EVENTS $evdir"
+ return;
+ fi
+ done
+ DEFINED_VARS="$DEFINED_VARS "`defined_vars $evdir`
+ fi
+ grep -v "^#" $evdir/trigger | while read action active; do
+ emit_kv $PREFIX.event.$group.$event.actions += \'$action\'
+ done
+
+ # enable is not checked; this is done by set_event in the instance.
+ val=`cat $evdir/filter`
+ if [ "$val" != "none" ]; then
+ emit_kv $PREFIX.event.$group.$event.filter = "$val"
+ fi
+}
+
+retry_unresolved() {
+ unresolved=$UNRESOLVED_EVENTS
+ UNRESOLVED_EVENTS=
+ for evdir in $unresolved; do
+ event=${evdir##*/}
+ group=${evdir%/*}; group=${group##*/}
+ per_event_options $evdir
+ done
+}
+
+event_options() {
+ # PREFIX and INSTANCE must be set
+ if [ $PREFIX = "ftrace" ]; then
+ # define the dynamic events
+ kprobe_event_options
+ synth_event_options
+ fi
+ for group in `ls $INSTANCE/events/` ; do
+ [ ! -d $INSTANCE/events/$group ] && continue
+ for event in `ls $INSTANCE/events/$group/` ;do
+ [ ! -d $INSTANCE/events/$group/$event ] && continue
+ per_event_options $INSTANCE/events/$group/$event
+ done
+ done
+ retry=0
+ while [ $retry -lt 3 ]; do
+ retry_unresolved
+ retry=$((retry + 1))
+ done
+ if [ "$UNRESOLVED_EVENTS" ]; then
+ cat 1>&2 << EOF
+! ERROR: hist triggers in $UNRESOLVED_EVENTS use some undefined variables.
+EOF
+ fi
+}
+
+is_default_trace_option() { # option
+grep -qw $1 << EOF
+print-parent
+nosym-offset
+nosym-addr
+noverbose
+noraw
+nohex
+nobin
+noblock
+trace_printk
+annotate
+nouserstacktrace
+nosym-userobj
+noprintk-msg-only
+context-info
+nolatency-format
+record-cmd
+norecord-tgid
+overwrite
+nodisable_on_free
+irq-info
+markers
+noevent-fork
+nopause-on-trace
+function-trace
+nofunction-fork
+nodisplay-graph
+nostacktrace
+notest_nop_accept
+notest_nop_refuse
+EOF
+}
+
+instance_options() { # [instance-name]
+ if [ $# -eq 0 ]; then
+ PREFIX="ftrace"
+ INSTANCE=$TRACEFS
+ else
+ PREFIX="ftrace.instance.$1"
+ INSTANCE=$TRACEFS/instances/$1
+ fi
+ val=
+ for i in `cat $INSTANCE/trace_options`; do
+ is_default_trace_option $i && continue
+ val="$val, $i"
+ done
+ [ "$val" ] && emit_kv $PREFIX.options = "${val#,}"
+ val="local"
+ for i in `cat $INSTANCE/trace_clock` ; do
+ [ "${i#*]}" ] && continue
+ i=${i%]}; val=${i#[}
+ done
+ [ $val != "local" ] && emit_kv $PREFIX.trace_clock = $val
+ val=`cat $INSTANCE/buffer_size_kb`
+ if echo $val | grep -vq "expanded" ; then
+ emit_kv $PREFIX.buffer_size = $val"KB"
+ fi
+ if grep -q "is allocated" $INSTANCE/snapshot ; then
+ emit_kv $PREFIX.alloc_snapshot
+ fi
+ val=`cat $INSTANCE/tracing_cpumask`
+ if [ `echo $val | sed -e s/f//g`x != x ]; then
+ emit_kv $PREFIX.cpumask = $val
+ fi
+
+ val=
+ for i in `cat $INSTANCE/set_event`; do
+ val="$val, $i"
+ done
+ [ "$val" ] && emit_kv $PREFIX.events = "${val#,}"
+ val=`cat $INSTANCE/current_tracer`
+ [ $val != nop ] && emit_kv $PREFIX.tracer = $val
+ if grep -qv "^#" $INSTANCE/set_ftrace_filter $INSTANCE/set_ftrace_notrace; then
+ cat 1>&2 << EOF
+# WARN: kernel.ftrace.filters and kernel.ftrace.notrace are not supported, since the wild card expression was expanded and lost from memory.
+EOF
+ fi
+ event_options
+}
+
+global_options
+instance_options
+for i in `ls $TRACEFS/instances` ; do
+ instance_options $i
+done
diff --git a/tools/bootconfig/scripts/xbc.sh b/tools/bootconfig/scripts/xbc.sh
new file mode 100644
index 000000000000..b8c84e654556
--- /dev/null
+++ b/tools/bootconfig/scripts/xbc.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+# bootconfig utility functions
+
+XBC_TMPFILE=
+XBC_BASEDIR=`dirname $0`
+BOOTCONFIG=${BOOTCONFIG:=$XBC_BASEDIR/../bootconfig}
+if [ ! -x "$BOOTCONFIG" ]; then
+ BOOTCONFIG=`which bootconfig`
+ if [ -z "$BOOTCONFIG" ]; then
+ echo "Erorr: bootconfig command is not found" 1>&2
+ exit 1
+ fi
+fi
+
+xbc_cleanup() {
+ if [ "$XBC_TMPFILE" ]; then
+ rm -f "$XBC_TMPFILE"
+ fi
+}
+
+xbc_init() { # bootconfig-file
+ xbc_cleanup
+ XBC_TMPFILE=`mktemp bconf-XXXX`
+ trap xbc_cleanup EXIT TERM
+
+ $BOOTCONFIG -l $1 > $XBC_TMPFILE || exit 1
+}
+
+nr_args() { # args
+ echo $#
+}
+
+xbc_get_val() { # key [maxnum]
+ if [ "$2" ]; then
+ MAXOPT="-L $2"
+ fi
+ grep "^$1 =" $XBC_TMPFILE | cut -d= -f2- | \
+ sed -e 's/", /" /g' -e "s/',/' /g" | \
+ xargs $MAXOPT -n 1 echo
+}
+
+xbc_has_key() { # key
+ grep -q "^$1 =" $XBC_TMPFILE
+}
+
+xbc_has_branch() { # prefix-key
+ grep -q "^$1" $XBC_TMPFILE
+}
+
+xbc_subkeys() { # prefix-key depth
+ __keys=`echo $1 | sed "s/\./ /g"`
+ __s=`nr_args $__keys`
+ grep "^$1" $XBC_TMPFILE | cut -d= -f1| cut -d. -f$((__s + 1))-$((__s + $2)) | uniq
+}
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index 81b350ffd03f..d295e406a756 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -55,6 +55,9 @@ echo "Apply command test"
xpass $BOOTCONF -a $TEMPCONF $INITRD
new_size=$(stat -c %s $INITRD)
+echo "Show command test"
+xpass $BOOTCONF $INITRD
+
echo "File size check"
xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12)
@@ -114,6 +117,51 @@ xpass grep -q "bar" $OUTFILE
xpass grep -q "baz" $OUTFILE
xpass grep -q "qux" $OUTFILE
+echo "Override same-key values"
+cat > $TEMPCONF << EOF
+key = bar, baz
+key := qux
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xfail grep -q "bar" $OUTFILE
+xfail grep -q "baz" $OUTFILE
+xpass grep -q "qux" $OUTFILE
+
+echo "Double/single quotes test"
+echo "key = '\"string\"';" > $TEMPCONF
+$BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $TEMPCONF
+cat $TEMPCONF
+xpass grep \'\"string\"\' $TEMPCONF
+
+echo "Repeat same-key tree"
+cat > $TEMPCONF << EOF
+foo
+bar
+foo { buz }
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xpass grep -q "bar" $OUTFILE
+
+
+echo "Remove/keep tailing spaces"
+cat > $TEMPCONF << EOF
+foo = val # comment
+bar = "val2 " # comment
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xfail grep -q val[[:space:]] $OUTFILE
+xpass grep -q val2[[:space:]] $OUTFILE
+
echo "=== expected failure cases ==="
for i in samples/bad-* ; do
xfail $BOOTCONF -a $i $INITRD
@@ -124,9 +172,16 @@ for i in samples/good-* ; do
xpass $BOOTCONF -a $i $INITRD
done
+
+echo
+echo "=== Summary ==="
+echo "# of Passed: $(expr $NO - $NG - 1)"
+echo "# of Failed: $NG"
+
echo
if [ $NG -eq 0 ]; then
echo "All tests passed"
else
echo "$NG tests failed"
+ exit 1
fi
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index f897eeeb0b4f..39bb322707b4 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -3,14 +3,14 @@ include ../scripts/Makefile.include
prefix ?= /usr/local
-CC = gcc
LEX = flex
YACC = bison
MAKE = make
INSTALL ?= install
CFLAGS += -Wall -O2
-CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi \
+ -I$(srctree)/tools/include
# This will work when bpf is built in tools env. where srctree
# isn't set and when invoked from selftests build, where srctree
@@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args
FEATURE_DISPLAY = libbfd disassembler-four-args
check_feat := 1
-NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean
ifdef MAKECMDGOALS
ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
check_feat := 0
@@ -64,12 +64,12 @@ $(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
$(QUIET_FLEX)$(LEX) -o $@ $<
$(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
- $(QUIET_CC)$(COMPILE.c) -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $<
$(OUTPUT)%.yacc.o: $(OUTPUT)%.yacc.c
- $(QUIET_CC)$(COMPILE.c) -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $<
$(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
- $(QUIET_CC)$(COMPILE.c) -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $<
PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
@@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
-clean: bpftool_clean runqslower_clean
+clean: bpftool_clean runqslower_clean resolve_btfids_clean
$(call QUIET_CLEAN, bpf-progs)
$(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
$(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
@@ -124,5 +124,12 @@ runqslower_install:
runqslower_clean:
$(call descend,runqslower,clean)
+resolve_btfids:
+ $(call descend,resolve_btfids)
+
+resolve_btfids_clean:
+ $(call descend,resolve_btfids,clean)
+
.PHONY: all install clean bpftool bpftool_install bpftool_clean \
- runqslower runqslower_install runqslower_clean
+ runqslower runqslower_install runqslower_clean \
+ resolve_btfids resolve_btfids_clean
diff --git a/tools/bpf/bpf_asm.c b/tools/bpf/bpf_asm.c
index e5f95e3eede3..0063c3c029e7 100644
--- a/tools/bpf/bpf_asm.c
+++ b/tools/bpf/bpf_asm.c
@@ -11,7 +11,7 @@
*
* How to get into it:
*
- * 1) read Documentation/networking/filter.txt
+ * 1) read Documentation/networking/filter.rst
* 2) Run `bpf_asm [-c] <filter-prog file>` to translate into binary
* blob that is loadable with xt_bpf, cls_bpf et al. Note: -c will
* pretty print a C-like construct.
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index 9d3766e653a9..a0ebcdf59c31 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -13,7 +13,7 @@
* for making a verdict when multiple simple BPF programs are combined
* into one in order to prevent parsing same headers multiple times.
*
- * More on how to debug BPF opcodes see Documentation/networking/filter.txt
+ * More on how to debug BPF opcodes see Documentation/networking/filter.rst
* which is the main document on BPF. Mini howto for getting started:
*
* 1) `./bpf_dbg` to enter the shell (shell cmds denoted with '>'):
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 26cde83e1ca3..3e601bcfd461 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
*.d
-/_bpftool
+/bpftool-bootstrap
/bpftool
bpftool*.8
bpf-helpers.*
FEATURE-DUMP.bpftool
feature
libbpf
-profiler.skel.h
+/*.skel.h
+/vmlinux.h
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index 815ac9804aee..f33cb02de95c 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -19,7 +19,7 @@ man8dir = $(mandir)/man8
# Load targets for building eBPF helpers man page.
include ../../Makefile.helpers
-MAN8_RST = $(filter-out $(HELPERS_RST),$(wildcard *.rst))
+MAN8_RST = $(wildcard bpftool*.rst)
_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
@@ -28,12 +28,23 @@ man: man8 helpers
man8: $(DOC_MAN8)
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+RST2MAN_OPTS += --verbose
+
+list_pages = $(sort $(basename $(filter-out $(1),$(MAN8_RST))))
+see_also = $(subst " ",, \
+ "\n" \
+ "SEE ALSO\n" \
+ "========\n" \
+ "\t**bpf**\ (2),\n" \
+ "\t**bpf-helpers**\\ (7)" \
+ $(foreach page,$(call list_pages,$(1)),",\n\t**$(page)**\\ (8)") \
+ "\n")
$(OUTPUT)%.8: %.rst
ifndef RST2MAN_DEP
$(error "rst2man not found, but required to generate man pages")
endif
- $(QUIET_GEN)rst2man $< > $@
+ $(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@
clean: helpers-clean
$(call QUIET_CLEAN, Documentation)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 39615f8e145b..ff4d327a582e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -36,6 +36,11 @@ DESCRIPTION
otherwise list all BTF objects currently loaded on the
system.
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BTF
+ objects. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool btf dump** *BTF_SRC*
Dump BTF entries from a given *BTF_SRC*.
@@ -66,26 +71,12 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
**# bpftool btf dump id 1226**
+
::
[1] PTR '(anon)' type_id=2
@@ -99,6 +90,7 @@ EXAMPLES
This gives an example of default output for all supported BTF kinds.
**$ cat prog.c**
+
::
struct fwd_struct;
@@ -139,6 +131,7 @@ This gives an example of default output for all supported BTF kinds.
}
**$ bpftool btf dump file prog.o**
+
::
[1] PTR '(anon)' type_id=2
@@ -224,15 +217,3 @@ All the standard ways to specify map or program are supported:
**# bpftool btf dump prog tag b88e0a09b1d9759d**
**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-map**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 06a28b07787d..790944c35602 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -20,7 +20,7 @@ SYNOPSIS
CGROUP COMMANDS
===============
-| **bpftool** **cgroup { show | list }** *CGROUP* [**effective**]
+| **bpftool** **cgroup** { **show** | **list** } *CGROUP* [**effective**]
| **bpftool** **cgroup tree** [*CGROUP_ROOT*] [**effective**]
| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
@@ -29,8 +29,8 @@ CGROUP COMMANDS
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-| **sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** |
-| **getsockopt** | **setsockopt** }
+| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
+| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
@@ -101,7 +101,11 @@ DESCRIPTION
an unconnected udp6 socket (since 5.2);
**sysctl** sysctl access (since 5.2);
**getsockopt** call to getsockopt (since 5.3);
- **setsockopt** call to setsockopt (since 5.3).
+ **setsockopt** call to setsockopt (since 5.3);
+ **getpeername4** call to getpeername(2) for an inet4 socket (since 5.8);
+ **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
+ **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
+ **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
Detach *PROG* from the cgroup *CGROUP* and attach type
@@ -112,26 +116,11 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
Show file names of pinned programs.
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
-
EXAMPLES
========
|
@@ -154,15 +143,3 @@ EXAMPLES
::
ID AttachType AttachFlags Name
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index b04156cfd7a3..dd3771bdbc57 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -19,7 +19,7 @@ SYNOPSIS
FEATURE COMMANDS
================
-| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**macros** [**prefix** *PREFIX*]]
+| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**unprivileged**] [**macros** [**prefix** *PREFIX*]]
| **bpftool** **feature help**
|
| *COMPONENT* := { **kernel** | **dev** *NAME* }
@@ -28,7 +28,7 @@ DESCRIPTION
===========
**bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]]
Probe the running kernel and dump a number of eBPF-related
- parameters, such as availability of the **bpf()** system call,
+ parameters, such as availability of the **bpf**\ () system call,
JIT status, eBPF program types availability, eBPF helper
functions availability, and more.
@@ -49,6 +49,16 @@ DESCRIPTION
Keyword **kernel** can be omitted. If no probe target is
specified, probing the kernel is the default behaviour.
+ When the **unprivileged** keyword is used, bpftool will dump
+ only the features available to a user who does not have the
+ **CAP_SYS_ADMIN** capability set. The features available in
+ that case usually represent a small subset of the parameters
+ supported by the system. Unprivileged users MUST use the
+ **unprivileged** keyword: This is to avoid misdetection if
+ bpftool is inadvertently run as non-root, for example. This
+ keyword is unavailable if bpftool was compiled without
+ libcap.
+
**bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]]
Probe network device for supported eBPF features and dump
results to the console.
@@ -61,31 +71,4 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
+ .. include:: common_options.rst
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 94d91322895a..84cf0639696f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -14,7 +14,7 @@ SYNOPSIS
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
- *COMMAND* := { **skeleton | **help** }
+ *COMMAND* := { **skeleton** | **help** }
GEN COMMANDS
=============
@@ -36,12 +36,12 @@ DESCRIPTION
etc. Skeleton eliminates the need to lookup mentioned
components by name. Instead, if skeleton instantiation
succeeds, they are populated in skeleton structure as valid
- libbpf types (e.g., struct bpf_map pointer) and can be
+ libbpf types (e.g., **struct bpf_map** pointer) and can be
passed to existing generic libbpf APIs.
In addition to simple and reliable access to maps and
- programs, skeleton provides a storage for BPF links (struct
- bpf_link) for each BPF program within BPF object. When
+ programs, skeleton provides a storage for BPF links (**struct
+ bpf_link**) for each BPF program within BPF object. When
requested, supported BPF programs will be automatically
attached and resulting BPF links stored for further use by
user in pre-allocated fields in skeleton struct. For BPF
@@ -82,14 +82,14 @@ DESCRIPTION
- **example__open** and **example__open_opts**.
These functions are used to instantiate skeleton. It
- corresponds to libbpf's **bpf_object__open()** API.
+ corresponds to libbpf's **bpf_object__open**\ () API.
**_opts** variants accepts extra **bpf_object_open_opts**
options.
- **example__load**.
This function creates maps, loads and verifies BPF
programs, initializes global data maps. It corresponds to
- libppf's **bpf_object__load** API.
+ libppf's **bpf_object__load**\ () API.
- **example__open_and_load** combines **example__open** and
**example__load** invocations in one commonly used
@@ -126,26 +126,12 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON,
- this option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
**$ cat example.c**
+
::
#include <stdbool.h>
@@ -187,6 +173,7 @@ This is example BPF application with two BPF programs and a mix of BPF maps
and global variables.
**$ bpftool gen skeleton example.o**
+
::
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
@@ -241,6 +228,7 @@ and global variables.
#endif /* __EXAMPLE_SKEL_H__ */
**$ cat example_user.c**
+
::
#include "example.skel.h"
@@ -283,6 +271,7 @@ and global variables.
}
**# ./example_user**
+
::
my_map name: my_map
@@ -290,16 +279,3 @@ and global variables.
my_static_var: 7
This is a stripped-out version of skeleton generated for above example code.
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-map**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
new file mode 100644
index 000000000000..51f49bead619
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -0,0 +1,70 @@
+============
+bpftool-iter
+============
+-------------------------------------------------------------------------------
+tool to create BPF iterators
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **iter** *COMMAND*
+
+ *COMMANDS* := { **pin** | **help** }
+
+ITER COMMANDS
+===================
+
+| **bpftool** **iter pin** *OBJ* *PATH* [**map** *MAP*]
+| **bpftool** **iter help**
+|
+| *OBJ* := /a/file/of/bpf_iter_target.o
+| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
+
+DESCRIPTION
+===========
+ **bpftool iter pin** *OBJ* *PATH* [**map** *MAP*]
+ A bpf iterator combines a kernel iterating of
+ particular kernel data (e.g., tasks, bpf_maps, etc.)
+ and a bpf program called for each kernel data object
+ (e.g., one task, one bpf_map, etc.). User space can
+ *read* kernel iterator output through *read()* syscall.
+
+ The *pin* command creates a bpf iterator from *OBJ*,
+ and pin it to *PATH*. The *PATH* should be located
+ in *bpffs* mount. It must not contain a dot
+ character ('.'), which is reserved for future extensions
+ of *bpffs*.
+
+ Map element bpf iterator requires an additional parameter
+ *MAP* so bpf program can iterate over map elements for
+ that map. User can have a bpf program in kernel to run
+ with each map element, do checking, filtering, aggregation,
+ etc. without copying data to user space.
+
+ User can then *cat PATH* to see the bpf iterator output.
+
+ **bpftool iter help**
+ Print short help message.
+
+OPTIONS
+=======
+ .. include:: common_options.rst
+
+EXAMPLES
+========
+**# bpftool iter pin bpf_iter_netlink.o /sys/fs/bpf/my_netlink**
+
+::
+
+ Create a file-based bpf iterator from bpf_iter_netlink.o and pin it
+ to /sys/fs/bpf/my_netlink
+
+**# bpftool iter pin bpf_iter_hashmap.o /sys/fs/bpf/my_hashmap map id 20**
+
+::
+
+ Create a file-based bpf iterator from bpf_iter_hashmap.o and map with
+ id 20, and pin it to /sys/fs/bpf/my_hashmap
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
new file mode 100644
index 000000000000..5f7db2a837cc
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -0,0 +1,108 @@
+================
+bpftool-link
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF links
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **link** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+
+ *COMMANDS* := { **show** | **list** | **pin** | **help** }
+
+LINK COMMANDS
+=============
+
+| **bpftool** **link { show | list }** [*LINK*]
+| **bpftool** **link pin** *LINK* *FILE*
+| **bpftool** **link detach** *LINK*
+| **bpftool** **link help**
+|
+| *LINK* := { **id** *LINK_ID* | **pinned** *FILE* }
+
+
+DESCRIPTION
+===========
+ **bpftool link { show | list }** [*LINK*]
+ Show information about active links. If *LINK* is
+ specified show information only about given link,
+ otherwise list all links currently active on the system.
+
+ Output will start with link ID followed by link type and
+ zero or more named attributes, some of which depend on type
+ of link.
+
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BPF
+ links. On such kernels bpftool will automatically emit this
+ information as well.
+
+ **bpftool link pin** *LINK* *FILE*
+ Pin link *LINK* as *FILE*.
+
+ Note: *FILE* must be located in *bpffs* mount. It must not
+ contain a dot character ('.'), which is reserved for future
+ extensions of *bpffs*.
+
+ **bpftool link detach** *LINK*
+ Force-detach link *LINK*. BPF link and its underlying BPF
+ program will stay valid, but they will be detached from the
+ respective BPF hook and BPF link will transition into
+ a defunct state until last open file descriptor for that
+ link is closed.
+
+ **bpftool link help**
+ Print short help message.
+
+OPTIONS
+=======
+ .. include:: common_options.rst
+
+ -f, --bpffs
+ When showing BPF links, show file names of pinned
+ links.
+
+ -n, --nomount
+ Do not automatically attempt to mount any virtual file system
+ (such as tracefs or BPF virtual file system) when necessary.
+
+EXAMPLES
+========
+**# bpftool link show**
+
+::
+
+ 10: cgroup prog 25
+ cgroup_id 614 attach_type egress
+ pids test_progs(223)
+
+**# bpftool --json --pretty link show**
+
+::
+
+ [{
+ "type": "cgroup",
+ "prog_id": 25,
+ "cgroup_id": 614,
+ "attach_type": "egress",
+ "pids": [{
+ "pid": 223,
+ "comm": "test_progs"
+ }
+ ]
+ }
+ ]
+
+|
+| **# bpftool link pin id 10 /sys/fs/bpf/link**
+| **# ls -l /sys/fs/bpf/**
+
+::
+
+ -rw------- 1 root root 0 Apr 23 21:39 link
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index cdeae8ae90ba..dade10cdf295 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -21,9 +21,10 @@ SYNOPSIS
MAP COMMANDS
=============
-| **bpftool** **map { show | list }** [*MAP*]
+| **bpftool** **map** { **show** | **list** } [*MAP*]
| **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
-| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \
+| [**dev** *NAME*]
| **bpftool** **map dump** *MAP*
| **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*]
| **bpftool** **map lookup** *MAP* [**key** *DATA*]
@@ -49,7 +50,7 @@ MAP COMMANDS
| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-| | **queue** | **stack** }
+| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** }
DESCRIPTION
===========
@@ -62,10 +63,27 @@ DESCRIPTION
Output will start with map ID followed by map type and
zero or more named attributes (depending on kernel version).
- **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BPF
+ maps. On such kernels bpftool will automatically emit this
+ information as well.
+
+ **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*]
Create a new map with given parameters and pin it to *bpffs*
as *FILE*.
+ *FLAGS* should be an integer which is the combination of
+ desired flags, e.g. 1024 for **BPF_F_MMAPABLE** (see bpf.h
+ UAPI header for existing flags).
+
+ To create maps of type array-of-maps or hash-of-maps, the
+ **inner_map** keyword must be used to pass an inner map. The
+ kernel needs it to collect metadata related to the inner maps
+ that the new map will work with.
+
+ Keyword **dev** expects a network interface name, and is used
+ to request hardware offload for the map.
+
**bpftool map dump** *MAP*
Dump all entries in a given *MAP*. In case of **name**,
*MAP* may match several maps which will all be dumped.
@@ -78,7 +96,7 @@ DESCRIPTION
exists; **noexist** update only if entry doesn't exist.
If the **hex** keyword is provided in front of the bytes
- sequence, the bytes are parsed as hexadeximal values, even if
+ sequence, the bytes are parsed as hexadecimal values, even if
no "0x" prefix is added. If the keyword is not provided, then
the bytes are parsed as decimal values, unless a "0x" prefix
(for hexadecimal) or a "0" prefix (for octal) is provided.
@@ -100,10 +118,10 @@ DESCRIPTION
extensions of *bpffs*.
**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
- Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
+ Read events from a **BPF_MAP_TYPE_PERF_EVENT_ARRAY** map.
Install perf rings into a perf event array map and dump
- output of any bpf_perf_event_output() call in the kernel.
+ output of any **bpf_perf_event_output**\ () call in the kernel.
By default read the number of CPUs on the system and
install perf ring for each CPU in the corresponding index
in the array.
@@ -116,24 +134,24 @@ DESCRIPTION
receiving events if it installed its rings earlier.
**bpftool map peek** *MAP*
- Peek next **value** in the queue or stack.
+ Peek next value in the queue or stack.
**bpftool map push** *MAP* **value** *VALUE*
- Push **value** onto the stack.
+ Push *VALUE* onto the stack.
**bpftool map pop** *MAP*
- Pop and print **value** from the stack.
+ Pop and print value from the stack.
**bpftool map enqueue** *MAP* **value** *VALUE*
- Enqueue **value** into the queue.
+ Enqueue *VALUE* into the queue.
**bpftool map dequeue** *MAP*
- Dequeue and print **value** from the queue.
+ Dequeue and print value from the queue.
**bpftool map freeze** *MAP*
Freeze the map as read-only from user space. Entries from a
frozen map can not longer be updated or deleted with the
- **bpf\ ()** system call. This operation is not reversible,
+ **bpf**\ () system call. This operation is not reversible,
and the map remains immutable from user space until its
destruction. However, read and write permissions for BPF
programs to the map remain unchanged.
@@ -143,18 +161,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
Show file names of pinned maps.
@@ -163,17 +170,15 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
-
EXAMPLES
========
**# bpftool map show**
+
::
10: hash name some_map flags 0x0
- key 4B value 8B max_entries 2048 memlock 167936B
+ key 4B value 8B max_entries 2048 memlock 167936B
+ pids systemd(1)
The following three commands are equivalent:
@@ -190,6 +195,7 @@ The following three commands are equivalent:
**# bpftool map dump id 10**
+
::
key: 00 01 02 03 value: 00 01 02 03 04 05 06 07
@@ -197,6 +203,7 @@ The following three commands are equivalent:
Found 2 elements
**# bpftool map getnext id 10 key 0 1 2 3**
+
::
key:
@@ -263,15 +270,3 @@ would be lost as soon as bpftool exits).
key: 00 00 00 00 value: 22 02 00 00
Found 1 element
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 8651b00b81ea..d8165d530937 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -20,7 +20,7 @@ SYNOPSIS
NET COMMANDS
============
-| **bpftool** **net { show | list }** [ **dev** *NAME* ]
+| **bpftool** **net** { **show** | **list** } [ **dev** *NAME* ]
| **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
| **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
| **bpftool** **net help**
@@ -75,22 +75,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -187,16 +172,3 @@ EXAMPLES
::
xdp:
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index e252bd0bc434..e958ce91de72 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -20,7 +20,7 @@ SYNOPSIS
PERF COMMANDS
=============
-| **bpftool** **perf { show | list }**
+| **bpftool** **perf** { **show** | **list** }
| **bpftool** **perf help**
DESCRIPTION
@@ -40,22 +40,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -78,16 +63,3 @@ EXAMPLES
{"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
{"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
{"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-net**\ (8),
- **bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 9f19404f470e..358c7309d419 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -21,11 +21,11 @@ SYNOPSIS
PROG COMMANDS
=============
-| **bpftool** **prog { show | list }** [*PROG*]
+| **bpftool** **prog** { **show** | **list** } [*PROG*]
| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}]
| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}]
| **bpftool** **prog pin** *PROG* *FILE*
-| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
+| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
@@ -41,15 +41,16 @@ PROG COMMANDS
| **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
| **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
-| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
+| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
+| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
| **cgroup/getsockopt** | **cgroup/setsockopt** |
-| **struct_ops** | **fentry** | **fexit** | **freplace**
+| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
| }
-| *METRIC* := {
+| *METRICs* := {
| **cycles** | **instructions** | **l1d_loads** | **llc_misses**
| }
@@ -74,6 +75,11 @@ DESCRIPTION
program run. Activation or deactivation of the feature is
performed via the **kernel.bpf_stats_enabled** sysctl knob.
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BPF
+ programs. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
Dump eBPF instructions of the programs from the kernel. By
default, eBPF will be disassembled and printed to standard
@@ -155,7 +161,7 @@ DESCRIPTION
**bpftool prog tracelog**
Dump the trace pipe of the system to the console (stdout).
Hit <Ctrl+C> to stop printing. BPF programs can write to this
- trace pipe at runtime with the **bpf_trace_printk()** helper.
+ trace pipe at runtime with the **bpf_trace_printk**\ () helper.
This should be used only for debugging purposes. For
streaming data from BPF programs to user space, one can use
perf events (see also **bpftool-map**\ (8)).
@@ -195,27 +201,16 @@ DESCRIPTION
**bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs*
Profile *METRICs* for bpf program *PROG* for *DURATION*
- seconds or until user hits Ctrl-C. *DURATION* is optional.
+ seconds or until user hits <Ctrl+C>. *DURATION* is optional.
If *DURATION* is not specified, the profiling will run up to
- UINT_MAX seconds.
+ **UINT_MAX** seconds.
**bpftool prog help**
Print short help message.
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
When showing BPF programs, show file names of pinned
@@ -228,11 +223,6 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
-
EXAMPLES
========
**# bpftool prog show**
@@ -242,6 +232,7 @@ EXAMPLES
10: xdp name some_prog tag 005a3d2123620c8b gpl run_time_ns 81632 run_cnt 10
loaded_at 2017-09-29T20:11:00+0000 uid 0
xlated 528B jited 370B memlock 4096B map_ids 10
+ pids systemd(1)
**# bpftool --json --pretty prog show**
@@ -261,13 +252,18 @@ EXAMPLES
"bytes_jited": 370,
"bytes_memlock": 4096,
"map_ids": [10
+ ],
+ "pids": [{
+ "pid": 1,
+ "comm": "systemd"
+ }
]
}
]
|
| **# bpftool prog dump xlated id 10 file /tmp/t**
-| **# ls -l /tmp/t**
+| **$ ls -l /tmp/t**
::
@@ -325,19 +321,8 @@ EXAMPLES
| **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses**
::
+
51397 run_cnt
40176203 cycles (83.05%)
42518139 instructions # 1.06 insns per cycle (83.39%)
123 llc_misses # 2.89 LLC misses per million insns (83.15%)
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-map**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
index f045cc89dd6d..506e70ee78e9 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -60,23 +60,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -98,19 +82,3 @@ EXAMPLES
::
Registered tcp_congestion_ops cubic id 110
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
- **bpftool-gen**\ (8)
-
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 34239fda69ed..e7d949334961 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -46,18 +46,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-m, --mapcompat
Allow loading maps with unknown map definitions.
@@ -65,21 +54,3 @@ OPTIONS
-n, --nomount
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
-
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-gen**\ (8),
diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst
new file mode 100644
index 000000000000..05d06c74dcaa
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/common_options.rst
@@ -0,0 +1,22 @@
+-h, --help
+ Print short help message (similar to **bpftool help**).
+
+-V, --version
+ Print version number (similar to **bpftool version**), and optional
+ features that were included when bpftool was compiled. Optional
+ features include linking against libbfd to provide the disassembler
+ for JIT-ted programs (**bpftool prog dump jited**) and usage of BPF
+ skeletons (some features like **bpftool prog profile** or showing
+ pids associated to BPF objects may rely on it).
+
+-j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+-p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+-d, --debug
+ Print all logs available, even debug-level information. This includes
+ logs from libbpf as well as from the verifier, when attempting to
+ load programs.
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index f584d1fdfc64..f60e6ad3a1df 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -25,7 +25,7 @@ endif
LIBBPF = $(LIBBPF_PATH)libbpf.a
-BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
$(LIBBPF): FORCE
$(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))
@@ -40,8 +40,9 @@ bash_compdir ?= /usr/share/bash-completion/completions
CFLAGS += -O2
CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
-CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS))
+CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS))
CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
+ -I$(if $(OUTPUT),$(OUTPUT),.) \
-I$(srctree)/kernel/bpf/ \
-I$(srctree)/tools/include \
-I$(srctree)/tools/include/uapi \
@@ -55,16 +56,16 @@ ifneq ($(EXTRA_LDFLAGS),)
LDFLAGS += $(EXTRA_LDFLAGS)
endif
-LIBS = $(LIBBPF) -lelf -lz
-
INSTALL ?= install
RM ?= rm -f
CLANG ?= clang
+LLVM_STRIP ?= llvm-strip
FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib \
- clang-bpf-global-var
-FEATURE_DISPLAY = libbfd disassembler-four-args zlib clang-bpf-global-var
+FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
+ clang-bpf-co-re
+FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
+ clang-bpf-co-re
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -90,6 +91,12 @@ ifeq ($(feature-reallocarray), 0)
CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
endif
+LIBS = $(LIBBPF) -lelf -lz
+ifeq ($(feature-libcap), 1)
+CFLAGS += -DUSE_LIBCAP
+LIBS += -lcap
+endif
+
include $(wildcard $(OUTPUT)*.d)
all: $(OUTPUT)bpftool
@@ -111,48 +118,72 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT
SRCS += $(BFD_SRCS)
endif
+BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap)
+
+BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
-_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o
-ifeq ($(feature-clang-bpf-global-var),1)
- __OBJS = $(OBJS)
-else
- __OBJS = $(_OBJS)
-endif
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+ifneq ($(VMLINUX_BTF)$(VMLINUX_H),)
+ifeq ($(feature-clang-bpf-co-re),1)
-$(OUTPUT)_prog.o: prog.c
- $(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
+BUILD_BPF_SKELS := 1
-$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
+$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP)
+ifeq ($(VMLINUX_H),)
+ $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
-skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
+$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF)
$(QUIET_CLANG)$(CLANG) \
+ -I$(if $(OUTPUT),$(OUTPUT),.) \
-I$(srctree)/tools/include/uapi/ \
- -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
- -g -O2 -target bpf -c $< -o $@
+ -I$(LIBBPF_PATH) \
+ -I$(srctree)/tools/lib \
+ -g -O2 -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@
+
+$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
+ $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@
-profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
- $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
+$(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h
-$(OUTPUT)prog.o: prog.c profiler.skel.h
- $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+$(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h
+
+endif
+endif
+
+CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
- $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
$(OUTPUT)feature.o: | zdep
-$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
+$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS)
+
+$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
$(OUTPUT)%.o: %.c
- $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
+
+feature-detect-clean:
+ $(call QUIET_CLEAN, feature-detect)
+ $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
-clean: $(LIBBPF)-clean
+clean: $(LIBBPF)-clean feature-detect-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
- $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o
+ $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
$(Q)$(RM) -r -- $(OUTPUT)libbpf/
$(call QUIET_CLEAN, core-gen)
$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
@@ -187,6 +218,7 @@ FORCE:
zdep:
@if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
+.SECONDARY:
.PHONY: all FORCE clean install uninstall zdep
.PHONY: doc doc-clean doc-install doc-uninstall
.DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 45ee99b159e2..3f1da30c4da6 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -98,6 +98,12 @@ _bpftool_get_btf_ids()
command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
}
+_bpftool_get_link_ids()
+{
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp link 2>&1 | \
+ command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
_bpftool_get_obj_map_names()
{
local obj
@@ -466,12 +472,14 @@ _bpftool()
lwt_seg6local sockops sk_skb sk_msg \
lirc_mode2 cgroup/bind4 cgroup/bind6 \
cgroup/connect4 cgroup/connect6 \
+ cgroup/getpeername4 cgroup/getpeername6 \
+ cgroup/getsockname4 cgroup/getsockname6 \
cgroup/sendmsg4 cgroup/sendmsg6 \
cgroup/recvmsg4 cgroup/recvmsg6 \
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt struct_ops \
- fentry fexit freplace" -- \
+ fentry fexit freplace sk_lookup" -- \
"$cur" ) )
return 0
;;
@@ -604,6 +612,35 @@ _bpftool()
;;
esac
;;
+ iter)
+ case $command in
+ pin)
+ case $prev in
+ $command)
+ _filedir
+ ;;
+ id)
+ _bpftool_get_map_ids
+ ;;
+ name)
+ _bpftool_get_map_names
+ ;;
+ pinned)
+ _filedir
+ ;;
+ *)
+ _bpftool_one_of_list $MAP_TYPE
+ ;;
+ esac
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'pin help' \
+ -- "$cur" ) )
+ ;;
+ esac
+ ;;
map)
local MAP_TYPE='id pinned name'
case $command in
@@ -667,13 +704,31 @@ _bpftool()
lru_percpu_hash lpm_trie array_of_maps \
hash_of_maps devmap devmap_hash sockmap cpumap \
xskmap sockhash cgroup_storage reuseport_sockarray \
- percpu_cgroup_storage queue stack' -- \
+ percpu_cgroup_storage queue stack sk_storage \
+ struct_ops inode_storage' -- \
"$cur" ) )
return 0
;;
- key|value|flags|name|entries)
+ key|value|flags|entries)
+ return 0
+ ;;
+ inner_map)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
return 0
;;
+ id)
+ _bpftool_get_map_ids
+ ;;
+ name)
+ case $pprev in
+ inner_map)
+ _bpftool_get_map_names
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+ ;;
*)
_bpftool_once_attr 'type'
_bpftool_once_attr 'key'
@@ -681,6 +736,9 @@ _bpftool()
_bpftool_once_attr 'entries'
_bpftool_once_attr 'name'
_bpftool_once_attr 'flags'
+ if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then
+ _bpftool_once_attr 'inner_map'
+ fi
_bpftool_once_attr 'dev'
return 0
;;
@@ -947,9 +1005,10 @@ _bpftool()
;;
attach|detach)
local ATTACH_TYPES='ingress egress sock_create sock_ops \
- device bind4 bind6 post_bind4 post_bind6 connect4 \
- connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \
- getsockopt setsockopt'
+ device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
+ getpeername4 getpeername6 getsockname4 getsockname6 \
+ sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
+ setsockopt'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
case $prev in
@@ -958,9 +1017,9 @@ _bpftool()
return 0
;;
ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
- post_bind4|post_bind6|connect4|connect6|sendmsg4|\
- sendmsg6|recvmsg4|recvmsg6|sysctl|getsockopt|\
- setsockopt)
+ post_bind4|post_bind6|connect4|connect6|getpeername4|\
+ getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
+ recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
@@ -1073,7 +1132,7 @@ _bpftool()
COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) )
fi
_bpftool_one_of_list 'kernel dev'
- _bpftool_once_attr 'full'
+ _bpftool_once_attr 'full unprivileged'
return 0
;;
*)
@@ -1082,6 +1141,39 @@ _bpftool()
;;
esac
;;
+ link)
+ case $command in
+ show|list|pin|detach)
+ case $prev in
+ id)
+ _bpftool_get_link_ids
+ return 0
+ ;;
+ esac
+ ;;
+ esac
+
+ local LINK_TYPE='id pinned'
+ case $command in
+ show|list)
+ [[ $prev != "$command" ]] && return 0
+ COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ pin|detach)
+ if [[ $prev == "$command" ]]; then
+ COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
+ else
+ _filedir
+ fi
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help pin show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
esac
} &&
complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index bcaf55b59498..8ab142ff5eac 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -15,7 +15,6 @@
#include <linux/hashtable.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <unistd.h>
#include "json_writer.h"
#include "main.h"
@@ -423,54 +422,6 @@ done:
return err;
}
-static struct btf *btf__parse_raw(const char *file)
-{
- struct btf *btf;
- struct stat st;
- __u8 *buf;
- FILE *f;
-
- if (stat(file, &st))
- return NULL;
-
- f = fopen(file, "rb");
- if (!f)
- return NULL;
-
- buf = malloc(st.st_size);
- if (!buf) {
- btf = ERR_PTR(-ENOMEM);
- goto exit_close;
- }
-
- if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) {
- btf = ERR_PTR(-EINVAL);
- goto exit_free;
- }
-
- btf = btf__new(buf, st.st_size);
-
-exit_free:
- free(buf);
-exit_close:
- fclose(f);
- return btf;
-}
-
-static bool is_btf_raw(const char *file)
-{
- __u16 magic = 0;
- int fd, nb_read;
-
- fd = open(file, O_RDONLY);
- if (fd < 0)
- return false;
-
- nb_read = read(fd, &magic, sizeof(magic));
- close(fd);
- return nb_read == sizeof(magic) && magic == BTF_MAGIC;
-}
-
static int do_dump(int argc, char **argv)
{
struct btf *btf = NULL;
@@ -548,13 +499,9 @@ static int do_dump(int argc, char **argv)
}
NEXT_ARG();
} else if (is_prefix(src, "file")) {
- if (is_btf_raw(*argv))
- btf = btf__parse_raw(*argv);
- else
- btf = btf__parse_elf(*argv, NULL);
-
+ btf = btf__parse(*argv, NULL);
if (IS_ERR(btf)) {
- err = PTR_ERR(btf);
+ err = -PTR_ERR(btf);
btf = NULL;
p_err("failed to load BTF from %s: %s",
*argv, strerror(err));
@@ -597,7 +544,7 @@ static int do_dump(int argc, char **argv)
goto done;
}
if (!btf) {
- err = ENOENT;
+ err = -ENOENT;
p_err("can't find btf with ID (%u)", btf_id);
goto done;
}
@@ -810,6 +757,7 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
printf("%s%u", n++ == 0 ? " map_ids " : ",",
obj->obj_id);
}
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
printf("\n");
}
@@ -842,6 +790,9 @@ show_btf_json(struct bpf_btf_info *info, int fd,
jsonw_uint(json_wtr, obj->obj_id);
}
jsonw_end_array(json_wtr); /* map_ids */
+
+ emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */
+
jsonw_end_object(json_wtr); /* btf object */
}
@@ -894,6 +845,7 @@ static int do_show(int argc, char **argv)
close(fd);
return err;
}
+ build_obj_refs_table(&refs_table, BPF_OBJ_BTF);
if (fd >= 0) {
err = show_btf(fd, &btf_prog_table, &btf_map_table);
@@ -940,6 +892,7 @@ static int do_show(int argc, char **argv)
exit_free:
delete_btf_table(&btf_prog_table);
delete_btf_table(&btf_map_table);
+ delete_obj_refs_table(&refs_table);
return err;
}
@@ -952,9 +905,9 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s btf { show | list } [id BTF_ID]\n"
- " %s btf dump BTF_SRC [format FORMAT]\n"
- " %s btf help\n"
+ "Usage: %1$s %2$s { show | list } [id BTF_ID]\n"
+ " %1$s %2$s dump BTF_SRC [format FORMAT]\n"
+ " %1$s %2$s help\n"
"\n"
" BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
" FORMAT := { raw | c }\n"
@@ -962,7 +915,7 @@ static int do_help(int argc, char **argv)
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, bin_name, bin_name);
+ bin_name, "btf");
return 0;
}
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 497807bec675..0e9310727281 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -67,7 +67,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
if (!info->btf_id || !info->nr_func_info ||
btf__get_from_id(info->btf_id, &prog_btf))
goto print;
- finfo = (struct bpf_func_info *)info->func_info;
+ finfo = u64_to_ptr(info->func_info);
func_type = btf__type_by_id(prog_btf, finfo->type_id);
if (!func_type || !btf_is_func(func_type))
goto print;
@@ -271,8 +271,8 @@ static void btf_int128_print(json_writer_t *jw, const void *data,
}
}
-static void btf_int128_shift(__u64 *print_num, u16 left_shift_bits,
- u16 right_shift_bits)
+static void btf_int128_shift(__u64 *print_num, __u16 left_shift_bits,
+ __u16 right_shift_bits)
{
__u64 upper_num, lower_num;
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
index 3e21f994f262..1951219a9af7 100644
--- a/tools/bpf/bpftool/cfg.c
+++ b/tools/bpf/bpftool/cfg.c
@@ -157,7 +157,7 @@ static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
return false;
}
-static bool is_jmp_insn(u8 code)
+static bool is_jmp_insn(__u8 code)
{
return BPF_CLASS(code) == BPF_JMP || BPF_CLASS(code) == BPF_JMP32;
}
@@ -176,7 +176,7 @@ static bool func_partition_bb_head(struct func_node *func)
for (; cur <= end; cur++) {
if (is_jmp_insn(cur->code)) {
- u8 opcode = BPF_OP(cur->code);
+ __u8 opcode = BPF_OP(cur->code);
if (opcode == BPF_EXIT || opcode == BPF_CALL)
continue;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 62c6a1d7cd18..d901cc1b904a 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -25,48 +25,27 @@
" ATTACH_TYPE := { ingress | egress | sock_create |\n" \
" sock_ops | device | bind4 | bind6 |\n" \
" post_bind4 | post_bind6 | connect4 |\n" \
- " connect6 | sendmsg4 | sendmsg6 |\n" \
- " recvmsg4 | recvmsg6 | sysctl |\n" \
- " getsockopt | setsockopt }"
+ " connect6 | getpeername4 | getpeername6 |\n" \
+ " getsockname4 | getsockname6 | sendmsg4 |\n" \
+ " sendmsg6 | recvmsg4 | recvmsg6 |\n" \
+ " sysctl | getsockopt | setsockopt }"
static unsigned int query_flags;
-static const char * const attach_type_strings[] = {
- [BPF_CGROUP_INET_INGRESS] = "ingress",
- [BPF_CGROUP_INET_EGRESS] = "egress",
- [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
- [BPF_CGROUP_SOCK_OPS] = "sock_ops",
- [BPF_CGROUP_DEVICE] = "device",
- [BPF_CGROUP_INET4_BIND] = "bind4",
- [BPF_CGROUP_INET6_BIND] = "bind6",
- [BPF_CGROUP_INET4_CONNECT] = "connect4",
- [BPF_CGROUP_INET6_CONNECT] = "connect6",
- [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
- [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
- [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
- [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
- [BPF_CGROUP_SYSCTL] = "sysctl",
- [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
- [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
- [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
- [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
- [__MAX_BPF_ATTACH_TYPE] = NULL,
-};
-
static enum bpf_attach_type parse_attach_type(const char *str)
{
enum bpf_attach_type type;
for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
- if (attach_type_strings[type] &&
- is_prefix(str, attach_type_strings[type]))
+ if (attach_type_name[type] &&
+ is_prefix(str, attach_type_name[type]))
return type;
}
return __MAX_BPF_ATTACH_TYPE;
}
-static int show_bpf_prog(int id, const char *attach_type_str,
+static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
const char *attach_flags_str,
int level)
{
@@ -86,18 +65,22 @@ static int show_bpf_prog(int id, const char *attach_type_str,
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "id", info.id);
- jsonw_string_field(json_wtr, "attach_type",
- attach_type_str);
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ jsonw_string_field(json_wtr, "attach_type",
+ attach_type_name[attach_type]);
+ else
+ jsonw_uint_field(json_wtr, "attach_type", attach_type);
jsonw_string_field(json_wtr, "attach_flags",
attach_flags_str);
jsonw_string_field(json_wtr, "name", info.name);
jsonw_end_object(json_wtr);
} else {
- printf("%s%-8u %-15s %-15s %-15s\n", level ? " " : "",
- info.id,
- attach_type_str,
- attach_flags_str,
- info.name);
+ printf("%s%-8u ", level ? " " : "", info.id);
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ printf("%-15s", attach_type_name[attach_type]);
+ else
+ printf("type %-10u", attach_type);
+ printf(" %-15s %-15s\n", attach_flags_str, info.name);
}
close(prog_fd);
@@ -171,7 +154,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
}
for (iter = 0; iter < prog_cnt; iter++)
- show_bpf_prog(prog_ids[iter], attach_type_strings[type],
+ show_bpf_prog(prog_ids[iter], type,
attach_flags_str, level);
return 0;
@@ -508,20 +491,18 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } CGROUP [**effective**]\n"
- " %s %s tree [CGROUP_ROOT] [**effective**]\n"
- " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
- " %s %s detach CGROUP ATTACH_TYPE PROG\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } CGROUP [**effective**]\n"
+ " %1$s %2$s tree [CGROUP_ROOT] [**effective**]\n"
+ " %1$s %2$s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
+ " %1$s %2$s detach CGROUP ATTACH_TYPE PROG\n"
+ " %1$s %2$s help\n"
"\n"
HELP_SPEC_ATTACH_TYPES "\n"
" " HELP_SPEC_ATTACH_FLAGS "\n"
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2]);
return 0;
}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index f2223dbdfb0a..65303664417e 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
-#include <fts.h>
+#include <ftw.h>
#include <libgen.h>
#include <mntent.h>
#include <stdbool.h>
@@ -29,6 +30,44 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
+ [BPF_CGROUP_INET_INGRESS] = "ingress",
+ [BPF_CGROUP_INET_EGRESS] = "egress",
+ [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
+ [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release",
+ [BPF_CGROUP_SOCK_OPS] = "sock_ops",
+ [BPF_CGROUP_DEVICE] = "device",
+ [BPF_CGROUP_INET4_BIND] = "bind4",
+ [BPF_CGROUP_INET6_BIND] = "bind6",
+ [BPF_CGROUP_INET4_CONNECT] = "connect4",
+ [BPF_CGROUP_INET6_CONNECT] = "connect6",
+ [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+ [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
+ [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
+ [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
+ [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
+ [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
+ [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
+ [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
+ [BPF_CGROUP_SYSCTL] = "sysctl",
+ [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
+ [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
+ [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
+ [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
+
+ [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
+ [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
+ [BPF_LIRC_MODE2] = "lirc_mode2",
+ [BPF_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_TRACE_RAW_TP] = "raw_tp",
+ [BPF_TRACE_FENTRY] = "fentry",
+ [BPF_TRACE_FEXIT] = "fexit",
+ [BPF_MODIFY_RETURN] = "mod_ret",
+ [BPF_LSM_MAC] = "lsm_mac",
+ [BPF_SK_LOOKUP] = "sk_lookup",
+};
+
void p_err(const char *fmt, ...)
{
va_list ap;
@@ -123,24 +162,35 @@ int mount_tracefs(const char *target)
return err;
}
-int open_obj_pinned(char *path, bool quiet)
+int open_obj_pinned(const char *path, bool quiet)
{
- int fd;
+ char *pname;
+ int fd = -1;
+
+ pname = strdup(path);
+ if (!pname) {
+ if (!quiet)
+ p_err("mem alloc failed");
+ goto out_ret;
+ }
- fd = bpf_obj_get(path);
+ fd = bpf_obj_get(pname);
if (fd < 0) {
if (!quiet)
- p_err("bpf obj get (%s): %s", path,
- errno == EACCES && !is_bpffs(dirname(path)) ?
+ p_err("bpf obj get (%s): %s", pname,
+ errno == EACCES && !is_bpffs(dirname(pname)) ?
"directory not in bpf file system (bpffs)" :
strerror(errno));
- return -1;
+ goto out_free;
}
+out_free:
+ free(pname);
+out_ret:
return fd;
}
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
{
enum bpf_obj_type type;
int fd;
@@ -262,6 +312,8 @@ int get_fd_type(int fd)
return BPF_OBJ_MAP;
else if (strstr(buf, "bpf-prog"))
return BPF_OBJ_PROG;
+ else if (strstr(buf, "bpf-link"))
+ return BPF_OBJ_LINK;
return BPF_OBJ_UNKNOWN;
}
@@ -328,71 +380,82 @@ void print_hex_data_json(uint8_t *data, size_t len)
jsonw_end_array(json_wtr);
}
+/* extra params for nftw cb */
+static struct pinned_obj_table *build_fn_table;
+static enum bpf_obj_type build_fn_type;
+
+static int do_build_table_cb(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ struct bpf_prog_info pinned_info;
+ __u32 len = sizeof(pinned_info);
+ struct pinned_obj *obj_node;
+ enum bpf_obj_type objtype;
+ int fd, err = 0;
+
+ if (typeflag != FTW_F)
+ goto out_ret;
+
+ fd = open_obj_pinned(fpath, true);
+ if (fd < 0)
+ goto out_ret;
+
+ objtype = get_fd_type(fd);
+ if (objtype != build_fn_type)
+ goto out_close;
+
+ memset(&pinned_info, 0, sizeof(pinned_info));
+ if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len))
+ goto out_close;
+
+ obj_node = calloc(1, sizeof(*obj_node));
+ if (!obj_node) {
+ err = -1;
+ goto out_close;
+ }
+
+ obj_node->id = pinned_info.id;
+ obj_node->path = strdup(fpath);
+ if (!obj_node->path) {
+ err = -1;
+ free(obj_node);
+ goto out_close;
+ }
+
+ hash_add(build_fn_table->table, &obj_node->hash, obj_node->id);
+out_close:
+ close(fd);
+out_ret:
+ return err;
+}
+
int build_pinned_obj_table(struct pinned_obj_table *tab,
enum bpf_obj_type type)
{
- struct bpf_prog_info pinned_info = {};
- struct pinned_obj *obj_node = NULL;
- __u32 len = sizeof(pinned_info);
struct mntent *mntent = NULL;
- enum bpf_obj_type objtype;
FILE *mntfile = NULL;
- FTSENT *ftse = NULL;
- FTS *fts = NULL;
- int fd, err;
+ int flags = FTW_PHYS;
+ int nopenfd = 16;
+ int err = 0;
mntfile = setmntent("/proc/mounts", "r");
if (!mntfile)
return -1;
+ build_fn_table = tab;
+ build_fn_type = type;
+
while ((mntent = getmntent(mntfile))) {
- char *path[] = { mntent->mnt_dir, NULL };
+ char *path = mntent->mnt_dir;
if (strncmp(mntent->mnt_type, "bpf", 3) != 0)
continue;
-
- fts = fts_open(path, 0, NULL);
- if (!fts)
- continue;
-
- while ((ftse = fts_read(fts))) {
- if (!(ftse->fts_info & FTS_F))
- continue;
- fd = open_obj_pinned(ftse->fts_path, true);
- if (fd < 0)
- continue;
-
- objtype = get_fd_type(fd);
- if (objtype != type) {
- close(fd);
- continue;
- }
- memset(&pinned_info, 0, sizeof(pinned_info));
- err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len);
- if (err) {
- close(fd);
- continue;
- }
-
- obj_node = malloc(sizeof(*obj_node));
- if (!obj_node) {
- close(fd);
- fts_close(fts);
- fclose(mntfile);
- return -1;
- }
-
- memset(obj_node, 0, sizeof(*obj_node));
- obj_node->id = pinned_info.id;
- obj_node->path = strdup(ftse->fts_path);
- hash_add(tab->table, &obj_node->hash, obj_node->id);
-
- close(fd);
- }
- fts_close(fts);
+ err = nftw(path, do_build_table_cb, nopenfd, flags);
+ if (err)
+ break;
}
fclose(mntfile);
- return 0;
+ return err;
}
void delete_pinned_obj_table(struct pinned_obj_table *tab)
@@ -579,3 +642,311 @@ print_all_levels(__maybe_unused enum libbpf_print_level level,
{
return vfprintf(stderr, format, args);
}
+
+static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
+{
+ unsigned int id = 0;
+ int fd, nb_fds = 0;
+ void *tmp;
+ int err;
+
+ while (true) {
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ if (errno != ENOENT) {
+ p_err("%s", strerror(errno));
+ goto err_close_fds;
+ }
+ return nb_fds;
+ }
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ p_err("can't get prog by id (%u): %s",
+ id, strerror(errno));
+ goto err_close_fds;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get prog info (%u): %s",
+ id, strerror(errno));
+ goto err_close_fd;
+ }
+
+ if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
+ (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+ close(fd);
+ continue;
+ }
+
+ if (nb_fds > 0) {
+ tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+ if (!tmp) {
+ p_err("failed to realloc");
+ goto err_close_fd;
+ }
+ *fds = tmp;
+ }
+ (*fds)[nb_fds++] = fd;
+ }
+
+err_close_fd:
+ close(fd);
+err_close_fds:
+ while (--nb_fds >= 0)
+ close((*fds)[nb_fds]);
+ return -1;
+}
+
+int prog_parse_fds(int *argc, char ***argv, int **fds)
+{
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ (*fds)[0] = bpf_prog_get_fd_by_id(id);
+ if ((*fds)[0] < 0) {
+ p_err("get by id (%u): %s", id, strerror(errno));
+ return -1;
+ }
+ return 1;
+ } else if (is_prefix(**argv, "tag")) {
+ unsigned char tag[BPF_TAG_SIZE];
+
+ NEXT_ARGP();
+
+ if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
+ tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
+ != BPF_TAG_SIZE) {
+ p_err("can't parse tag");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return prog_fd_by_nametag(tag, fds, true);
+ } else if (is_prefix(**argv, "name")) {
+ char *name;
+
+ NEXT_ARGP();
+
+ name = **argv;
+ if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ p_err("can't parse name");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return prog_fd_by_nametag(name, fds, false);
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
+ if ((*fds)[0] < 0)
+ return -1;
+ return 1;
+ }
+
+ p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+int prog_parse_fd(int *argc, char ***argv)
+{
+ int *fds = NULL;
+ int nb_fds, fd;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = prog_parse_fds(argc, argv, &fds);
+ if (nb_fds != 1) {
+ if (nb_fds > 1) {
+ p_err("several programs match this handle");
+ while (nb_fds--)
+ close(fds[nb_fds]);
+ }
+ fd = -1;
+ goto exit_free;
+ }
+
+ fd = fds[0];
+exit_free:
+ free(fds);
+ return fd;
+}
+
+static int map_fd_by_name(char *name, int **fds)
+{
+ unsigned int id = 0;
+ int fd, nb_fds = 0;
+ void *tmp;
+ int err;
+
+ while (true) {
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno != ENOENT) {
+ p_err("%s", strerror(errno));
+ goto err_close_fds;
+ }
+ return nb_fds;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ p_err("can't get map by id (%u): %s",
+ id, strerror(errno));
+ goto err_close_fds;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get map info (%u): %s",
+ id, strerror(errno));
+ goto err_close_fd;
+ }
+
+ if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
+ close(fd);
+ continue;
+ }
+
+ if (nb_fds > 0) {
+ tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+ if (!tmp) {
+ p_err("failed to realloc");
+ goto err_close_fd;
+ }
+ *fds = tmp;
+ }
+ (*fds)[nb_fds++] = fd;
+ }
+
+err_close_fd:
+ close(fd);
+err_close_fds:
+ while (--nb_fds >= 0)
+ close((*fds)[nb_fds]);
+ return -1;
+}
+
+int map_parse_fds(int *argc, char ***argv, int **fds)
+{
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ (*fds)[0] = bpf_map_get_fd_by_id(id);
+ if ((*fds)[0] < 0) {
+ p_err("get map by id (%u): %s", id, strerror(errno));
+ return -1;
+ }
+ return 1;
+ } else if (is_prefix(**argv, "name")) {
+ char *name;
+
+ NEXT_ARGP();
+
+ name = **argv;
+ if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ p_err("can't parse name");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return map_fd_by_name(name, fds);
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
+ if ((*fds)[0] < 0)
+ return -1;
+ return 1;
+ }
+
+ p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+int map_parse_fd(int *argc, char ***argv)
+{
+ int *fds = NULL;
+ int nb_fds, fd;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = map_parse_fds(argc, argv, &fds);
+ if (nb_fds != 1) {
+ if (nb_fds > 1) {
+ p_err("several maps match this handle");
+ while (nb_fds--)
+ close(fds[nb_fds]);
+ }
+ fd = -1;
+ goto exit_free;
+ }
+
+ fd = fds[0];
+exit_free:
+ free(fds);
+ return fd;
+}
+
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+{
+ int err;
+ int fd;
+
+ fd = map_parse_fd(argc, argv);
+ if (fd < 0)
+ return -1;
+
+ err = bpf_obj_get_info_by_fd(fd, info, info_len);
+ if (err) {
+ p_err("can't get map info: %s", strerror(errno));
+ close(fd);
+ return err;
+ }
+
+ return fd;
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 88718ee6a438..a43a6f10b564 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -6,6 +6,9 @@
#include <string.h>
#include <unistd.h>
#include <net/if.h>
+#ifdef USE_LIBCAP
+#include <sys/capability.h>
+#endif
#include <sys/utsname.h>
#include <sys/vfs.h>
@@ -35,6 +38,11 @@ static const char * const helper_name[] = {
#undef BPF_HELPER_MAKE_ENTRY
+static bool full_mode;
+#ifdef USE_LIBCAP
+static bool run_as_unprivileged;
+#endif
+
/* Miscellaneous utility functions */
static bool check_procfs(void)
@@ -72,13 +80,12 @@ print_bool_feature(const char *feat_name, const char *plain_name,
printf("%s is %savailable\n", plain_name, res ? "" : "NOT ");
}
-static void print_kernel_option(const char *name, const char *value)
+static void print_kernel_option(const char *name, const char *value,
+ const char *define_prefix)
{
char *endptr;
int res;
- /* No support for C-style ouptut */
-
if (json_output) {
if (!value) {
jsonw_null_field(json_wtr, name);
@@ -90,6 +97,12 @@ static void print_kernel_option(const char *name, const char *value)
jsonw_int_field(json_wtr, name, res);
else
jsonw_string_field(json_wtr, name, value);
+ } else if (define_prefix) {
+ if (value)
+ printf("#define %s%s %s\n", define_prefix,
+ name, value);
+ else
+ printf("/* %s%s is not set */\n", define_prefix, name);
} else {
if (value)
printf("%s is set to %s\n", name, value);
@@ -307,77 +320,84 @@ static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
return false;
}
-static void probe_kernel_image_config(void)
+static void probe_kernel_image_config(const char *define_prefix)
{
- static const char * const options[] = {
+ static const struct {
+ const char * const name;
+ bool macro_dump;
+ } options[] = {
/* Enable BPF */
- "CONFIG_BPF",
+ { "CONFIG_BPF", },
/* Enable bpf() syscall */
- "CONFIG_BPF_SYSCALL",
+ { "CONFIG_BPF_SYSCALL", },
/* Does selected architecture support eBPF JIT compiler */
- "CONFIG_HAVE_EBPF_JIT",
+ { "CONFIG_HAVE_EBPF_JIT", },
/* Compile eBPF JIT compiler */
- "CONFIG_BPF_JIT",
+ { "CONFIG_BPF_JIT", },
/* Avoid compiling eBPF interpreter (use JIT only) */
- "CONFIG_BPF_JIT_ALWAYS_ON",
+ { "CONFIG_BPF_JIT_ALWAYS_ON", },
/* cgroups */
- "CONFIG_CGROUPS",
+ { "CONFIG_CGROUPS", },
/* BPF programs attached to cgroups */
- "CONFIG_CGROUP_BPF",
+ { "CONFIG_CGROUP_BPF", },
/* bpf_get_cgroup_classid() helper */
- "CONFIG_CGROUP_NET_CLASSID",
+ { "CONFIG_CGROUP_NET_CLASSID", },
/* bpf_skb_{,ancestor_}cgroup_id() helpers */
- "CONFIG_SOCK_CGROUP_DATA",
+ { "CONFIG_SOCK_CGROUP_DATA", },
/* Tracing: attach BPF to kprobes, tracepoints, etc. */
- "CONFIG_BPF_EVENTS",
+ { "CONFIG_BPF_EVENTS", },
/* Kprobes */
- "CONFIG_KPROBE_EVENTS",
+ { "CONFIG_KPROBE_EVENTS", },
/* Uprobes */
- "CONFIG_UPROBE_EVENTS",
+ { "CONFIG_UPROBE_EVENTS", },
/* Tracepoints */
- "CONFIG_TRACING",
+ { "CONFIG_TRACING", },
/* Syscall tracepoints */
- "CONFIG_FTRACE_SYSCALLS",
+ { "CONFIG_FTRACE_SYSCALLS", },
/* bpf_override_return() helper support for selected arch */
- "CONFIG_FUNCTION_ERROR_INJECTION",
+ { "CONFIG_FUNCTION_ERROR_INJECTION", },
/* bpf_override_return() helper */
- "CONFIG_BPF_KPROBE_OVERRIDE",
+ { "CONFIG_BPF_KPROBE_OVERRIDE", },
/* Network */
- "CONFIG_NET",
+ { "CONFIG_NET", },
/* AF_XDP sockets */
- "CONFIG_XDP_SOCKETS",
+ { "CONFIG_XDP_SOCKETS", },
/* BPF_PROG_TYPE_LWT_* and related helpers */
- "CONFIG_LWTUNNEL_BPF",
+ { "CONFIG_LWTUNNEL_BPF", },
/* BPF_PROG_TYPE_SCHED_ACT, TC (traffic control) actions */
- "CONFIG_NET_ACT_BPF",
+ { "CONFIG_NET_ACT_BPF", },
/* BPF_PROG_TYPE_SCHED_CLS, TC filters */
- "CONFIG_NET_CLS_BPF",
+ { "CONFIG_NET_CLS_BPF", },
/* TC clsact qdisc */
- "CONFIG_NET_CLS_ACT",
+ { "CONFIG_NET_CLS_ACT", },
/* Ingress filtering with TC */
- "CONFIG_NET_SCH_INGRESS",
+ { "CONFIG_NET_SCH_INGRESS", },
/* bpf_skb_get_xfrm_state() helper */
- "CONFIG_XFRM",
+ { "CONFIG_XFRM", },
/* bpf_get_route_realm() helper */
- "CONFIG_IP_ROUTE_CLASSID",
+ { "CONFIG_IP_ROUTE_CLASSID", },
/* BPF_PROG_TYPE_LWT_SEG6_LOCAL and related helpers */
- "CONFIG_IPV6_SEG6_BPF",
+ { "CONFIG_IPV6_SEG6_BPF", },
/* BPF_PROG_TYPE_LIRC_MODE2 and related helpers */
- "CONFIG_BPF_LIRC_MODE2",
+ { "CONFIG_BPF_LIRC_MODE2", },
/* BPF stream parser and BPF socket maps */
- "CONFIG_BPF_STREAM_PARSER",
+ { "CONFIG_BPF_STREAM_PARSER", },
/* xt_bpf module for passing BPF programs to netfilter */
- "CONFIG_NETFILTER_XT_MATCH_BPF",
+ { "CONFIG_NETFILTER_XT_MATCH_BPF", },
/* bpfilter back-end for iptables */
- "CONFIG_BPFILTER",
+ { "CONFIG_BPFILTER", },
/* bpftilter module with "user mode helper" */
- "CONFIG_BPFILTER_UMH",
+ { "CONFIG_BPFILTER_UMH", },
/* test_bpf module for BPF tests */
- "CONFIG_TEST_BPF",
+ { "CONFIG_TEST_BPF", },
+
+ /* Misc configs useful in BPF C programs */
+ /* jiffies <-> sec conversion for bpf_jiffies64() helper */
+ { "CONFIG_HZ", true, }
};
char *values[ARRAY_SIZE(options)] = { };
struct utsname utsn;
@@ -419,7 +439,8 @@ static void probe_kernel_image_config(void)
while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
for (i = 0; i < ARRAY_SIZE(options); i++) {
- if (values[i] || strcmp(buf, options[i]))
+ if ((define_prefix && !options[i].macro_dump) ||
+ values[i] || strcmp(buf, options[i].name))
continue;
values[i] = strdup(value);
@@ -431,7 +452,9 @@ end_parse:
gzclose(file);
for (i = 0; i < ARRAY_SIZE(options); i++) {
- print_kernel_option(options[i], values[i]);
+ if (define_prefix && !options[i].macro_dump)
+ continue;
+ print_kernel_option(options[i].name, values[i], define_prefix);
free(values[i]);
}
}
@@ -471,9 +494,20 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
}
res = bpf_probe_prog_type(prog_type, ifindex);
+#ifdef USE_LIBCAP
+ /* Probe may succeed even if program load fails, for unprivileged users
+ * check that we did not fail because of insufficient permissions
+ */
+ if (run_as_unprivileged && errno == EPERM)
+ res = false;
+#endif
supported_types[prog_type] |= res;
+ if (!prog_type_name[prog_type]) {
+ p_info("program type name not found (type %d)", prog_type);
+ return;
+ }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
if (strlen(prog_type_name[prog_type]) > maxlen) {
p_info("program type name too long");
@@ -499,6 +533,14 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
res = bpf_probe_map_type(map_type, ifindex);
+ /* Probe result depends on the success of map creation, no additional
+ * check required for unprivileged users
+ */
+
+ if (!map_type_name[map_type]) {
+ p_info("map type name not found (type %d)", map_type);
+ return;
+ }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
if (strlen(map_type_name[map_type]) > maxlen) {
p_info("map type name too long");
@@ -518,12 +560,19 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
const char *define_prefix, unsigned int id,
const char *ptype_name, __u32 ifindex)
{
- bool res;
+ bool res = false;
- if (!supported_type)
- res = false;
- else
+ if (supported_type) {
res = bpf_probe_helper(id, prog_type, ifindex);
+#ifdef USE_LIBCAP
+ /* Probe may succeed even if program load fails, for
+ * unprivileged users check that we did not fail because of
+ * insufficient permissions
+ */
+ if (run_as_unprivileged && errno == EPERM)
+ res = false;
+#endif
+ }
if (json_output) {
if (res)
@@ -540,8 +589,7 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
static void
probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
- const char *define_prefix, bool full_mode,
- __u32 ifindex)
+ const char *define_prefix, __u32 ifindex)
{
const char *ptype_name = prog_type_name[prog_type];
char feat_name[128];
@@ -607,23 +655,22 @@ section_system_config(enum probe_component target, const char *define_prefix)
switch (target) {
case COMPONENT_KERNEL:
case COMPONENT_UNSPEC:
- if (define_prefix)
- break;
-
print_start_section("system_config",
"Scanning system configuration...",
- NULL, /* define_comment never used here */
- NULL); /* define_prefix always NULL here */
- if (check_procfs()) {
- probe_unprivileged_disabled();
- probe_jit_enable();
- probe_jit_harden();
- probe_jit_kallsyms();
- probe_jit_limit();
- } else {
- p_info("/* procfs not mounted, skipping related probes */");
+ "/*** Misc kernel config items ***/",
+ define_prefix);
+ if (!define_prefix) {
+ if (check_procfs()) {
+ probe_unprivileged_disabled();
+ probe_jit_enable();
+ probe_jit_harden();
+ probe_jit_kallsyms();
+ probe_jit_limit();
+ } else {
+ p_info("/* procfs not mounted, skipping related probes */");
+ }
}
- probe_kernel_image_config();
+ probe_kernel_image_config(define_prefix);
print_end_section();
break;
default:
@@ -656,7 +703,7 @@ section_program_types(bool *supported_types, const char *define_prefix,
"/*** eBPF program types ***/",
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
probe_prog_type(i, supported_types, define_prefix, ifindex);
print_end_section();
@@ -678,8 +725,7 @@ static void section_map_types(const char *define_prefix, __u32 ifindex)
}
static void
-section_helpers(bool *supported_types, const char *define_prefix,
- bool full_mode, __u32 ifindex)
+section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
{
unsigned int i;
@@ -703,9 +749,9 @@ section_helpers(bool *supported_types, const char *define_prefix,
" %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
define_prefix, define_prefix, define_prefix,
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
- probe_helpers_for_progtype(i, supported_types[i],
- define_prefix, full_mode, ifindex);
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
+ probe_helpers_for_progtype(i, supported_types[i], define_prefix,
+ ifindex);
print_end_section();
}
@@ -720,23 +766,133 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
print_end_section();
}
+#ifdef USE_LIBCAP
+#define capability(c) { c, false, #c }
+#define capability_msg(a, i) a[i].set ? "" : a[i].name, a[i].set ? "" : ", "
+#endif
+
+static int handle_perms(void)
+{
+#ifdef USE_LIBCAP
+ struct {
+ cap_value_t cap;
+ bool set;
+ char name[14]; /* strlen("CAP_SYS_ADMIN") */
+ } bpf_caps[] = {
+ capability(CAP_SYS_ADMIN),
+#ifdef CAP_BPF
+ capability(CAP_BPF),
+ capability(CAP_NET_ADMIN),
+ capability(CAP_PERFMON),
+#endif
+ };
+ cap_value_t cap_list[ARRAY_SIZE(bpf_caps)];
+ unsigned int i, nb_bpf_caps = 0;
+ bool cap_sys_admin_only = true;
+ cap_flag_value_t val;
+ int res = -1;
+ cap_t caps;
+
+ caps = cap_get_proc();
+ if (!caps) {
+ p_err("failed to get capabilities for process: %s",
+ strerror(errno));
+ return -1;
+ }
+
+#ifdef CAP_BPF
+ if (CAP_IS_SUPPORTED(CAP_BPF))
+ cap_sys_admin_only = false;
+#endif
+
+ for (i = 0; i < ARRAY_SIZE(bpf_caps); i++) {
+ const char *cap_name = bpf_caps[i].name;
+ cap_value_t cap = bpf_caps[i].cap;
+
+ if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val)) {
+ p_err("bug: failed to retrieve %s status: %s", cap_name,
+ strerror(errno));
+ goto exit_free;
+ }
+
+ if (val == CAP_SET) {
+ bpf_caps[i].set = true;
+ cap_list[nb_bpf_caps++] = cap;
+ }
+
+ if (cap_sys_admin_only)
+ /* System does not know about CAP_BPF, meaning that
+ * CAP_SYS_ADMIN is the only capability required. We
+ * just checked it, break.
+ */
+ break;
+ }
+
+ if ((run_as_unprivileged && !nb_bpf_caps) ||
+ (!run_as_unprivileged && nb_bpf_caps == ARRAY_SIZE(bpf_caps)) ||
+ (!run_as_unprivileged && cap_sys_admin_only && nb_bpf_caps)) {
+ /* We are all good, exit now */
+ res = 0;
+ goto exit_free;
+ }
+
+ if (!run_as_unprivileged) {
+ if (cap_sys_admin_only)
+ p_err("missing %s, required for full feature probing; run as root or use 'unprivileged'",
+ bpf_caps[0].name);
+ else
+ p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'",
+ capability_msg(bpf_caps, 0),
+ capability_msg(bpf_caps, 1),
+ capability_msg(bpf_caps, 2),
+ capability_msg(bpf_caps, 3));
+ goto exit_free;
+ }
+
+ /* if (run_as_unprivileged && nb_bpf_caps > 0), drop capabilities. */
+ if (cap_set_flag(caps, CAP_EFFECTIVE, nb_bpf_caps, cap_list,
+ CAP_CLEAR)) {
+ p_err("bug: failed to clear capabilities: %s", strerror(errno));
+ goto exit_free;
+ }
+
+ if (cap_set_proc(caps)) {
+ p_err("failed to drop capabilities: %s", strerror(errno));
+ goto exit_free;
+ }
+
+ res = 0;
+
+exit_free:
+ if (cap_free(caps) && !res) {
+ p_err("failed to clear storage object for capabilities: %s",
+ strerror(errno));
+ res = -1;
+ }
+
+ return res;
+#else
+ /* Detection assumes user has specific privileges.
+ * We do not use libpcap so let's approximate, and restrict usage to
+ * root user only.
+ */
+ if (geteuid()) {
+ p_err("full feature probing requires root privileges");
+ return -1;
+ }
+
+ return 0;
+#endif /* USE_LIBCAP */
+}
+
static int do_probe(int argc, char **argv)
{
enum probe_component target = COMPONENT_UNSPEC;
const char *define_prefix = NULL;
bool supported_types[128] = {};
- bool full_mode = false;
__u32 ifindex = 0;
char *ifname;
- /* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
- * Let's approximate, and restrict usage to root user only.
- */
- if (geteuid()) {
- p_err("please run this command as root user");
- return -1;
- }
-
set_max_rlimit();
while (argc) {
@@ -785,6 +941,14 @@ static int do_probe(int argc, char **argv)
if (!REQ_ARGS(1))
return -1;
define_prefix = GET_ARG();
+ } else if (is_prefix(*argv, "unprivileged")) {
+#ifdef USE_LIBCAP
+ run_as_unprivileged = true;
+ NEXT_ARG();
+#else
+ p_err("unprivileged run not supported, recompile bpftool with libcap");
+ return -1;
+#endif
} else {
p_err("expected no more arguments, 'kernel', 'dev', 'macros' or 'prefix', got: '%s'?",
*argv);
@@ -792,6 +956,12 @@ static int do_probe(int argc, char **argv)
}
}
+ /* Full feature detection requires specific privileges.
+ * Let's approximate, and warn if user is not root.
+ */
+ if (handle_perms())
+ return -1;
+
if (json_output) {
define_prefix = NULL;
jsonw_start_object(json_wtr);
@@ -803,7 +973,7 @@ static int do_probe(int argc, char **argv)
goto exit_close_json;
section_program_types(supported_types, define_prefix, ifindex);
section_map_types(define_prefix, ifindex);
- section_helpers(supported_types, define_prefix, full_mode, ifindex);
+ section_helpers(supported_types, define_prefix, ifindex);
section_misc(define_prefix, ifindex);
exit_close_json:
@@ -822,12 +992,12 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s probe [COMPONENT] [full] [macros [prefix PREFIX]]\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
+ " %1$s %2$s help\n"
"\n"
" COMPONENT := { kernel | dev NAME }\n"
"",
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2]);
return 0;
}
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index f8113b3646f5..4033c46d83e7 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -17,14 +17,11 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
-#include <unistd.h>
#include <bpf/btf.h>
-#include "bpf/libbpf_internal.h"
#include "json_writer.h"
#include "main.h"
-
#define MAX_OBJ_NAME_LEN 64
static void sanitize_identifier(char *name)
@@ -89,7 +86,7 @@ static const char *get_map_ident(const struct bpf_map *map)
return NULL;
}
-static void codegen_btf_dump_printf(void *ct, const char *fmt, va_list args)
+static void codegen_btf_dump_printf(void *ctx, const char *fmt, va_list args)
{
vprintf(fmt, args);
}
@@ -105,17 +102,20 @@ static int codegen_datasec_def(struct bpf_object *obj,
int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec);
const char *sec_ident;
char var_ident[256];
+ bool strip_mods = false;
- if (strcmp(sec_name, ".data") == 0)
+ if (strcmp(sec_name, ".data") == 0) {
sec_ident = "data";
- else if (strcmp(sec_name, ".bss") == 0)
+ } else if (strcmp(sec_name, ".bss") == 0) {
sec_ident = "bss";
- else if (strcmp(sec_name, ".rodata") == 0)
+ } else if (strcmp(sec_name, ".rodata") == 0) {
sec_ident = "rodata";
- else if (strcmp(sec_name, ".kconfig") == 0)
+ strip_mods = true;
+ } else if (strcmp(sec_name, ".kconfig") == 0) {
sec_ident = "kconfig";
- else
+ } else {
return 0;
+ }
printf(" struct %s__%s {\n", obj_name, sec_ident);
for (i = 0; i < vlen; i++, sec_var++) {
@@ -124,16 +124,10 @@ static int codegen_datasec_def(struct bpf_object *obj,
DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts,
.field_name = var_ident,
.indent_level = 2,
+ .strip_mods = strip_mods,
);
int need_off = sec_var->offset, align_off, align;
__u32 var_type_id = var->type;
- const struct btf_type *t;
-
- t = btf__type_by_id(btf, var_type_id);
- while (btf_is_mod(t)) {
- var_type_id = t->type;
- t = btf__type_by_id(btf, var_type_id);
- }
if (off > need_off) {
p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n",
@@ -147,6 +141,20 @@ static int codegen_datasec_def(struct bpf_object *obj,
var_name, align);
return -EINVAL;
}
+ /* Assume 32-bit architectures when generating data section
+ * struct memory layout. Given bpftool can't know which target
+ * host architecture it's emitting skeleton for, we need to be
+ * conservative and assume 32-bit one to ensure enough padding
+ * bytes are generated for pointer and long types. This will
+ * still work correctly for 64-bit architectures, because in
+ * the worst case we'll generate unnecessary padding field,
+ * which on 64-bit architectures is not strictly necessary and
+ * would be handled by natural 8-byte alignment. But it still
+ * will be a correct memory layout, based on recorded offsets
+ * in BTF.
+ */
+ if (align > 4)
+ align = 4;
align_off = (off + align - 1) / align * align;
if (align_off != need_off) {
@@ -201,7 +209,7 @@ out:
return err;
}
-static int codegen(const char *template, ...)
+static void codegen(const char *template, ...)
{
const char *src, *end;
int skip_tabs = 0, n;
@@ -212,7 +220,7 @@ static int codegen(const char *template, ...)
n = strlen(template);
s = malloc(n + 1);
if (!s)
- return -ENOMEM;
+ exit(-1);
src = template;
dst = s;
@@ -225,7 +233,8 @@ static int codegen(const char *template, ...)
} else {
p_err("unrecognized character at pos %td in template '%s'",
src - template - 1, template);
- return -EINVAL;
+ free(s);
+ exit(-1);
}
}
@@ -235,7 +244,8 @@ static int codegen(const char *template, ...)
if (*src != '\t') {
p_err("not enough tabs at pos %td in template '%s'",
src - template - 1, template);
- return -EINVAL;
+ free(s);
+ exit(-1);
}
}
/* trim trailing whitespace */
@@ -256,7 +266,6 @@ static int codegen(const char *template, ...)
va_end(args);
free(s);
- return n;
}
static int do_skeleton(int argc, char **argv)
@@ -305,8 +314,11 @@ static int do_skeleton(int argc, char **argv)
opts.object_name = obj_name;
obj = bpf_object__open_mem(obj_data, file_sz, &opts);
if (IS_ERR(obj)) {
+ char err_buf[256];
+
+ libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf));
+ p_err("failed to open BPF object file: %s", err_buf);
obj = NULL;
- p_err("failed to open BPF object file: %ld", PTR_ERR(obj));
goto out;
}
@@ -397,7 +409,7 @@ static int do_skeleton(int argc, char **argv)
{ \n\
struct %1$s *obj; \n\
\n\
- obj = (typeof(obj))calloc(1, sizeof(*obj)); \n\
+ obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\
if (!obj) \n\
return NULL; \n\
if (%1$s__create_skeleton(obj)) \n\
@@ -461,7 +473,7 @@ static int do_skeleton(int argc, char **argv)
{ \n\
struct bpf_object_skeleton *s; \n\
\n\
- s = (typeof(s))calloc(1, sizeof(*s)); \n\
+ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
if (!s) \n\
return -1; \n\
obj->skeleton = s; \n\
@@ -479,7 +491,7 @@ static int do_skeleton(int argc, char **argv)
/* maps */ \n\
s->map_cnt = %zu; \n\
s->map_skel_sz = sizeof(*s->maps); \n\
- s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);\n\
+ s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\
if (!s->maps) \n\
goto err; \n\
",
@@ -515,7 +527,7 @@ static int do_skeleton(int argc, char **argv)
/* programs */ \n\
s->prog_cnt = %zu; \n\
s->prog_skel_sz = sizeof(*s->progs); \n\
- s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);\n\
+ s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\
if (!s->progs) \n\
goto err; \n\
",
@@ -587,12 +599,12 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %1$s gen skeleton FILE\n"
- " %1$s gen help\n"
+ "Usage: %1$s %2$s skeleton FILE\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name);
+ bin_name, "gen");
return 0;
}
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
new file mode 100644
index 000000000000..3b1aad7535dd
--- /dev/null
+++ b/tools/bpf/bpftool/iter.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (C) 2020 Facebook
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <linux/err.h>
+#include <bpf/libbpf.h>
+
+#include "main.h"
+
+static int do_pin(int argc, char **argv)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts);
+ union bpf_iter_link_info linfo;
+ const char *objfile, *path;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_link *link;
+ int err = -1, map_fd = -1;
+
+ if (!REQ_ARGS(2))
+ usage();
+
+ objfile = GET_ARG();
+ path = GET_ARG();
+
+ /* optional arguments */
+ if (argc) {
+ if (is_prefix(*argv, "map")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(2)) {
+ p_err("incorrect map spec");
+ return -1;
+ }
+
+ map_fd = map_parse_fd(&argc, &argv);
+ if (map_fd < 0)
+ return -1;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ iter_opts.link_info = &linfo;
+ iter_opts.link_info_len = sizeof(linfo);
+ }
+ }
+
+ obj = bpf_object__open(objfile);
+ if (IS_ERR(obj)) {
+ p_err("can't open objfile %s", objfile);
+ goto close_map_fd;
+ }
+
+ err = bpf_object__load(obj);
+ if (err) {
+ p_err("can't load objfile %s", objfile);
+ goto close_obj;
+ }
+
+ prog = bpf_program__next(NULL, obj);
+ if (!prog) {
+ p_err("can't find bpf program in objfile %s", objfile);
+ goto close_obj;
+ }
+
+ link = bpf_program__attach_iter(prog, &iter_opts);
+ if (IS_ERR(link)) {
+ err = PTR_ERR(link);
+ p_err("attach_iter failed for program %s",
+ bpf_program__name(prog));
+ goto close_obj;
+ }
+
+ err = mount_bpffs_for_pin(path);
+ if (err)
+ goto close_link;
+
+ err = bpf_link__pin(link, path);
+ if (err) {
+ p_err("pin_iter failed for program %s to path %s",
+ bpf_program__name(prog), path);
+ goto close_link;
+ }
+
+close_link:
+ bpf_link__destroy(link);
+close_obj:
+ bpf_object__close(obj);
+close_map_fd:
+ if (map_fd >= 0)
+ close(map_fd);
+ return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
+ " %1$s %2$s help\n"
+ " " HELP_SPEC_MAP "\n"
+ "",
+ bin_name, "iter");
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "help", do_help },
+ { "pin", do_pin },
+ { 0 }
+};
+
+int do_iter(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index f7f5885aa3ba..e7e7eee9f172 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -15,7 +15,6 @@
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
-#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
index 86501cd3c763..7fea83bedf48 100644
--- a/tools/bpf/bpftool/json_writer.c
+++ b/tools/bpf/bpftool/json_writer.c
@@ -119,6 +119,12 @@ void jsonw_pretty(json_writer_t *self, bool on)
self->pretty = on;
}
+void jsonw_reset(json_writer_t *self)
+{
+ assert(self->depth == 0);
+ self->sep = '\0';
+}
+
/* Basic blocks */
static void jsonw_begin(json_writer_t *self, int c)
{
diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h
index 35cf1f00f96c..8ace65cdb92f 100644
--- a/tools/bpf/bpftool/json_writer.h
+++ b/tools/bpf/bpftool/json_writer.h
@@ -27,6 +27,9 @@ void jsonw_destroy(json_writer_t **self_p);
/* Cause output to have pretty whitespace */
void jsonw_pretty(json_writer_t *self, bool on);
+/* Reset separator to create new JSON */
+void jsonw_reset(json_writer_t *self);
+
/* Add property name */
void jsonw_name(json_writer_t *self, const char *name);
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
new file mode 100644
index 000000000000..e77e1525d20a
--- /dev/null
+++ b/tools/bpf/bpftool/link.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+static const char * const link_type_name[] = {
+ [BPF_LINK_TYPE_UNSPEC] = "unspec",
+ [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_LINK_TYPE_TRACING] = "tracing",
+ [BPF_LINK_TYPE_CGROUP] = "cgroup",
+ [BPF_LINK_TYPE_ITER] = "iter",
+ [BPF_LINK_TYPE_NETNS] = "netns",
+};
+
+static int link_parse_fd(int *argc, char ***argv)
+{
+ int fd;
+
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ fd = bpf_link_get_fd_by_id(id);
+ if (fd < 0)
+ p_err("failed to get link with ID %d: %s", id, strerror(errno));
+ return fd;
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ return open_obj_pinned_any(path, BPF_OBJ_LINK);
+ }
+
+ p_err("expected 'id' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+static void
+show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ jsonw_uint_field(wtr, "id", info->id);
+ if (info->type < ARRAY_SIZE(link_type_name))
+ jsonw_string_field(wtr, "type", link_type_name[info->type]);
+ else
+ jsonw_uint_field(wtr, "type", info->type);
+
+ jsonw_uint_field(json_wtr, "prog_id", info->prog_id);
+}
+
+static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
+{
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ jsonw_string_field(wtr, "attach_type",
+ attach_type_name[attach_type]);
+ else
+ jsonw_uint_field(wtr, "attach_type", attach_type);
+}
+
+static bool is_iter_map_target(const char *target_name)
+{
+ return strcmp(target_name, "bpf_map_elem") == 0 ||
+ strcmp(target_name, "bpf_sk_storage_map") == 0;
+}
+
+static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ const char *target_name = u64_to_ptr(info->iter.target_name);
+
+ jsonw_string_field(wtr, "target_name", target_name);
+
+ if (is_iter_map_target(target_name))
+ jsonw_uint_field(wtr, "map_id", info->iter.map.map_id);
+}
+
+static int get_prog_info(int prog_id, struct bpf_prog_info *info)
+{
+ __u32 len = sizeof(*info);
+ int err, prog_fd;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0)
+ return prog_fd;
+
+ memset(info, 0, sizeof(*info));
+ err = bpf_obj_get_info_by_fd(prog_fd, info, &len);
+ if (err)
+ p_err("can't get prog info: %s", strerror(errno));
+ close(prog_fd);
+ return err;
+}
+
+static int show_link_close_json(int fd, struct bpf_link_info *info)
+{
+ struct bpf_prog_info prog_info;
+ int err;
+
+ jsonw_start_object(json_wtr);
+
+ show_link_header_json(info, json_wtr);
+
+ switch (info->type) {
+ case BPF_LINK_TYPE_RAW_TRACEPOINT:
+ jsonw_string_field(json_wtr, "tp_name",
+ u64_to_ptr(info->raw_tracepoint.tp_name));
+ break;
+ case BPF_LINK_TYPE_TRACING:
+ err = get_prog_info(info->prog_id, &prog_info);
+ if (err)
+ return err;
+
+ if (prog_info.type < prog_type_name_size)
+ jsonw_string_field(json_wtr, "prog_type",
+ prog_type_name[prog_info.type]);
+ else
+ jsonw_uint_field(json_wtr, "prog_type",
+ prog_info.type);
+
+ show_link_attach_type_json(info->tracing.attach_type,
+ json_wtr);
+ break;
+ case BPF_LINK_TYPE_CGROUP:
+ jsonw_lluint_field(json_wtr, "cgroup_id",
+ info->cgroup.cgroup_id);
+ show_link_attach_type_json(info->cgroup.attach_type, json_wtr);
+ break;
+ case BPF_LINK_TYPE_ITER:
+ show_iter_json(info, json_wtr);
+ break;
+ case BPF_LINK_TYPE_NETNS:
+ jsonw_uint_field(json_wtr, "netns_ino",
+ info->netns.netns_ino);
+ show_link_attach_type_json(info->netns.attach_type, json_wtr);
+ break;
+ default:
+ break;
+ }
+
+ if (!hash_empty(link_table.table)) {
+ struct pinned_obj *obj;
+
+ jsonw_name(json_wtr, "pinned");
+ jsonw_start_array(json_wtr);
+ hash_for_each_possible(link_table.table, obj, hash, info->id) {
+ if (obj->id == info->id)
+ jsonw_string(json_wtr, obj->path);
+ }
+ jsonw_end_array(json_wtr);
+ }
+
+ emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
+ jsonw_end_object(json_wtr);
+
+ return 0;
+}
+
+static void show_link_header_plain(struct bpf_link_info *info)
+{
+ printf("%u: ", info->id);
+ if (info->type < ARRAY_SIZE(link_type_name))
+ printf("%s ", link_type_name[info->type]);
+ else
+ printf("type %u ", info->type);
+
+ printf("prog %u ", info->prog_id);
+}
+
+static void show_link_attach_type_plain(__u32 attach_type)
+{
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ printf("attach_type %s ", attach_type_name[attach_type]);
+ else
+ printf("attach_type %u ", attach_type);
+}
+
+static void show_iter_plain(struct bpf_link_info *info)
+{
+ const char *target_name = u64_to_ptr(info->iter.target_name);
+
+ printf("target_name %s ", target_name);
+
+ if (is_iter_map_target(target_name))
+ printf("map_id %u ", info->iter.map.map_id);
+}
+
+static int show_link_close_plain(int fd, struct bpf_link_info *info)
+{
+ struct bpf_prog_info prog_info;
+ int err;
+
+ show_link_header_plain(info);
+
+ switch (info->type) {
+ case BPF_LINK_TYPE_RAW_TRACEPOINT:
+ printf("\n\ttp '%s' ",
+ (const char *)u64_to_ptr(info->raw_tracepoint.tp_name));
+ break;
+ case BPF_LINK_TYPE_TRACING:
+ err = get_prog_info(info->prog_id, &prog_info);
+ if (err)
+ return err;
+
+ if (prog_info.type < prog_type_name_size)
+ printf("\n\tprog_type %s ",
+ prog_type_name[prog_info.type]);
+ else
+ printf("\n\tprog_type %u ", prog_info.type);
+
+ show_link_attach_type_plain(info->tracing.attach_type);
+ break;
+ case BPF_LINK_TYPE_CGROUP:
+ printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id);
+ show_link_attach_type_plain(info->cgroup.attach_type);
+ break;
+ case BPF_LINK_TYPE_ITER:
+ show_iter_plain(info);
+ break;
+ case BPF_LINK_TYPE_NETNS:
+ printf("\n\tnetns_ino %u ", info->netns.netns_ino);
+ show_link_attach_type_plain(info->netns.attach_type);
+ break;
+ default:
+ break;
+ }
+
+ if (!hash_empty(link_table.table)) {
+ struct pinned_obj *obj;
+
+ hash_for_each_possible(link_table.table, obj, hash, info->id) {
+ if (obj->id == info->id)
+ printf("\n\tpinned %s", obj->path);
+ }
+ }
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
+ printf("\n");
+
+ return 0;
+}
+
+static int do_show_link(int fd)
+{
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ char buf[256];
+ int err;
+
+ memset(&info, 0, sizeof(info));
+again:
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get link info: %s",
+ strerror(errno));
+ close(fd);
+ return err;
+ }
+ if (info.type == BPF_LINK_TYPE_RAW_TRACEPOINT &&
+ !info.raw_tracepoint.tp_name) {
+ info.raw_tracepoint.tp_name = (unsigned long)&buf;
+ info.raw_tracepoint.tp_name_len = sizeof(buf);
+ goto again;
+ }
+ if (info.type == BPF_LINK_TYPE_ITER &&
+ !info.iter.target_name) {
+ info.iter.target_name = (unsigned long)&buf;
+ info.iter.target_name_len = sizeof(buf);
+ goto again;
+ }
+
+ if (json_output)
+ show_link_close_json(fd, &info);
+ else
+ show_link_close_plain(fd, &info);
+
+ close(fd);
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ __u32 id = 0;
+ int err, fd;
+
+ if (show_pinned)
+ build_pinned_obj_table(&link_table, BPF_OBJ_LINK);
+ build_obj_refs_table(&refs_table, BPF_OBJ_LINK);
+
+ if (argc == 2) {
+ fd = link_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return fd;
+ return do_show_link(fd);
+ }
+
+ if (argc)
+ return BAD_ARG();
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ while (true) {
+ err = bpf_link_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ p_err("can't get next link: %s%s", strerror(errno),
+ errno == EINVAL ? " -- kernel too old?" : "");
+ break;
+ }
+
+ fd = bpf_link_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ p_err("can't get link by id (%u): %s",
+ id, strerror(errno));
+ break;
+ }
+
+ err = do_show_link(fd);
+ if (err)
+ break;
+ }
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ delete_obj_refs_table(&refs_table);
+
+ return errno == ENOENT ? 0 : -1;
+}
+
+static int do_pin(int argc, char **argv)
+{
+ int err;
+
+ err = do_pin_any(argc, argv, link_parse_fd);
+ if (!err && json_output)
+ jsonw_null(json_wtr);
+ return err;
+}
+
+static int do_detach(int argc, char **argv)
+{
+ int err, fd;
+
+ if (argc != 2) {
+ p_err("link specifier is invalid or missing\n");
+ return 1;
+ }
+
+ fd = link_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return 1;
+
+ err = bpf_link_detach(fd);
+ if (err)
+ err = -errno;
+ close(fd);
+ if (err) {
+ p_err("failed link detach: %s", strerror(-err));
+ return 1;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %1$s %2$s { show | list } [LINK]\n"
+ " %1$s %2$s pin LINK FILE\n"
+ " %1$s %2$s detach LINK\n"
+ " %1$s %2$s help\n"
+ "\n"
+ " " HELP_SPEC_LINK "\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { "pin", do_pin },
+ { "detach", do_detach },
+ { 0 }
+};
+
+int do_link(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 466c269eabdd..682daaa49e6a 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -30,6 +30,8 @@ bool verifier_logs;
bool relaxed_maps;
struct pinned_obj_table prog_table;
struct pinned_obj_table map_table;
+struct pinned_obj_table link_table;
+struct obj_refs_table refs_table;
static void __noreturn clean_and_exit(int i)
{
@@ -58,7 +60,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen | struct_ops }\n"
+ " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@@ -68,13 +70,42 @@ static int do_help(int argc, char **argv)
static int do_version(int argc, char **argv)
{
+#ifdef HAVE_LIBBFD_SUPPORT
+ const bool has_libbfd = true;
+#else
+ const bool has_libbfd = false;
+#endif
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+ const bool has_skeletons = false;
+#else
+ const bool has_skeletons = true;
+#endif
+
if (json_output) {
- jsonw_start_object(json_wtr);
+ jsonw_start_object(json_wtr); /* root object */
+
jsonw_name(json_wtr, "version");
jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION);
- jsonw_end_object(json_wtr);
+
+ jsonw_name(json_wtr, "features");
+ jsonw_start_object(json_wtr); /* features */
+ jsonw_bool_field(json_wtr, "libbfd", has_libbfd);
+ jsonw_bool_field(json_wtr, "skeletons", has_skeletons);
+ jsonw_end_object(json_wtr); /* features */
+
+ jsonw_end_object(json_wtr); /* root object */
} else {
+ unsigned int nb_features = 0;
+
printf("%s v%s\n", bin_name, BPFTOOL_VERSION);
+ printf("features:");
+ if (has_libbfd) {
+ printf(" libbfd");
+ nb_features++;
+ }
+ if (has_skeletons)
+ printf("%s skeletons", nb_features++ ? "," : "");
+ printf("\n");
}
return 0;
}
@@ -91,9 +122,16 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
if (argc < 1 && cmds[0].func)
return cmds[0].func(argc, argv);
- for (i = 0; cmds[i].func; i++)
- if (is_prefix(*argv, cmds[i].cmd))
+ for (i = 0; cmds[i].cmd; i++) {
+ if (is_prefix(*argv, cmds[i].cmd)) {
+ if (!cmds[i].func) {
+ p_err("command '%s' is not supported in bootstrap mode",
+ cmds[i].cmd);
+ return -1;
+ }
return cmds[i].func(argc - 1, argv + 1);
+ }
+ }
help(argc - 1, argv + 1);
@@ -215,6 +253,7 @@ static const struct cmd cmds[] = {
{ "batch", do_batch },
{ "prog", do_prog },
{ "map", do_map },
+ { "link", do_link },
{ "cgroup", do_cgroup },
{ "perf", do_perf },
{ "net", do_net },
@@ -222,6 +261,7 @@ static const struct cmd cmds[] = {
{ "btf", do_btf },
{ "gen", do_gen },
{ "struct_ops", do_struct_ops },
+ { "iter", do_iter },
{ "version", do_version },
{ 0 }
};
@@ -364,6 +404,7 @@ int main(int argc, char **argv)
hash_init(prog_table.table);
hash_init(map_table.table);
+ hash_init(link_table.table);
opterr = 0;
while ((opt = getopt_long(argc, argv, "Vhpjfmnd",
@@ -422,6 +463,7 @@ int main(int argc, char **argv)
if (show_pinned) {
delete_pinned_obj_table(&prog_table);
delete_pinned_obj_table(&map_table);
+ delete_pinned_obj_table(&link_table);
}
return ret;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 86f14ce26fd7..c46e52137b87 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -18,7 +18,18 @@
#include "json_writer.h"
-#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
+/* Make sure we do not use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static inline void *u64_to_ptr(__u64 ptr)
+{
+ return (void *)(unsigned long)ptr;
+}
#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); })
#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
@@ -50,46 +61,24 @@
"\t {-m|--mapcompat} | {-n|--nomount} }"
#define HELP_SPEC_MAP \
"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
+#define HELP_SPEC_LINK \
+ "LINK := { id LINK_ID | pinned FILE }"
-static const char * const prog_type_name[] = {
- [BPF_PROG_TYPE_UNSPEC] = "unspec",
- [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
- [BPF_PROG_TYPE_KPROBE] = "kprobe",
- [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
- [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
- [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
- [BPF_PROG_TYPE_XDP] = "xdp",
- [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
- [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
- [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
- [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
- [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
- [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
- [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
- [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
- [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
- [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
- [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
- [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
- [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
- [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
- [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
- [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
- [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
- [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
- [BPF_PROG_TYPE_TRACING] = "tracing",
- [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
- [BPF_PROG_TYPE_EXT] = "ext",
-};
+extern const char * const prog_type_name[];
+extern const size_t prog_type_name_size;
+
+extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE];
extern const char * const map_type_name[];
extern const size_t map_type_name_size;
+/* keep in sync with the definition in skeleton/pid_iter.bpf.c */
enum bpf_obj_type {
BPF_OBJ_UNKNOWN,
BPF_OBJ_PROG,
BPF_OBJ_MAP,
+ BPF_OBJ_LINK,
+ BPF_OBJ_BTF,
};
extern const char *bin_name;
@@ -97,11 +86,14 @@ extern const char *bin_name;
extern json_writer_t *json_wtr;
extern bool json_output;
extern bool show_pinned;
+extern bool show_pids;
extern bool block_mount;
extern bool verifier_logs;
extern bool relaxed_maps;
extern struct pinned_obj_table prog_table;
extern struct pinned_obj_table map_table;
+extern struct pinned_obj_table link_table;
+extern struct obj_refs_table refs_table;
void __printf(1, 2) p_err(const char *fmt, ...);
void __printf(1, 2) p_info(const char *fmt, ...);
@@ -125,12 +117,35 @@ struct pinned_obj {
struct hlist_node hash;
};
+struct obj_refs_table {
+ DECLARE_HASHTABLE(table, 16);
+};
+
+struct obj_ref {
+ int pid;
+ char comm[16];
+};
+
+struct obj_refs {
+ struct hlist_node node;
+ __u32 id;
+ int ref_cnt;
+ struct obj_ref *refs;
+};
+
struct btf;
struct bpf_line_info;
int build_pinned_obj_table(struct pinned_obj_table *table,
enum bpf_obj_type type);
void delete_pinned_obj_table(struct pinned_obj_table *tab);
+__weak int build_obj_refs_table(struct obj_refs_table *table,
+ enum bpf_obj_type type);
+__weak void delete_obj_refs_table(struct obj_refs_table *table);
+__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+ json_writer_t *json_wtr);
+__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id,
+ const char *prefix);
void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
@@ -145,27 +160,34 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
int get_fd_type(int fd);
const char *get_fd_type_name(enum bpf_obj_type type);
char *get_fdinfo(int fd, const char *key);
-int open_obj_pinned(char *path, bool quiet);
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
+int open_obj_pinned(const char *path, bool quiet);
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type);
int mount_bpffs_for_pin(const char *name);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
int do_pin_fd(int fd, const char *name);
-int do_prog(int argc, char **arg);
-int do_map(int argc, char **arg);
-int do_event_pipe(int argc, char **argv);
-int do_cgroup(int argc, char **arg);
-int do_perf(int argc, char **arg);
-int do_net(int argc, char **arg);
-int do_tracelog(int argc, char **arg);
-int do_feature(int argc, char **argv);
-int do_btf(int argc, char **argv);
+/* commands available in bootstrap mode */
int do_gen(int argc, char **argv);
-int do_struct_ops(int argc, char **argv);
+int do_btf(int argc, char **argv);
+
+/* non-bootstrap only commands */
+int do_prog(int argc, char **arg) __weak;
+int do_map(int argc, char **arg) __weak;
+int do_link(int argc, char **arg) __weak;
+int do_event_pipe(int argc, char **argv) __weak;
+int do_cgroup(int argc, char **arg) __weak;
+int do_perf(int argc, char **arg) __weak;
+int do_net(int argc, char **arg) __weak;
+int do_tracelog(int argc, char **arg) __weak;
+int do_feature(int argc, char **argv) __weak;
+int do_struct_ops(int argc, char **argv) __weak;
+int do_iter(int argc, char **argv) __weak;
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
+int prog_parse_fds(int *argc, char ***argv, int **fds);
int map_parse_fd(int *argc, char ***argv);
+int map_parse_fds(int *argc, char ***argv, int **fds);
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
struct bpf_prog_linfo;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 693a632f6813..a7efbd84fbcc 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -49,6 +49,8 @@ const char * const map_type_name[] = {
[BPF_MAP_TYPE_STACK] = "stack",
[BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
[BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
+ [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
};
const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -92,162 +94,6 @@ static void *alloc_value(struct bpf_map_info *info)
return malloc(info->value_size);
}
-static int map_fd_by_name(char *name, int **fds)
-{
- unsigned int id = 0;
- int fd, nb_fds = 0;
- void *tmp;
- int err;
-
- while (true) {
- struct bpf_map_info info = {};
- __u32 len = sizeof(info);
-
- err = bpf_map_get_next_id(id, &id);
- if (err) {
- if (errno != ENOENT) {
- p_err("%s", strerror(errno));
- goto err_close_fds;
- }
- return nb_fds;
- }
-
- fd = bpf_map_get_fd_by_id(id);
- if (fd < 0) {
- p_err("can't get map by id (%u): %s",
- id, strerror(errno));
- goto err_close_fds;
- }
-
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
- if (err) {
- p_err("can't get map info (%u): %s",
- id, strerror(errno));
- goto err_close_fd;
- }
-
- if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
- close(fd);
- continue;
- }
-
- if (nb_fds > 0) {
- tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
- if (!tmp) {
- p_err("failed to realloc");
- goto err_close_fd;
- }
- *fds = tmp;
- }
- (*fds)[nb_fds++] = fd;
- }
-
-err_close_fd:
- close(fd);
-err_close_fds:
- while (--nb_fds >= 0)
- close((*fds)[nb_fds]);
- return -1;
-}
-
-static int map_parse_fds(int *argc, char ***argv, int **fds)
-{
- if (is_prefix(**argv, "id")) {
- unsigned int id;
- char *endptr;
-
- NEXT_ARGP();
-
- id = strtoul(**argv, &endptr, 0);
- if (*endptr) {
- p_err("can't parse %s as ID", **argv);
- return -1;
- }
- NEXT_ARGP();
-
- (*fds)[0] = bpf_map_get_fd_by_id(id);
- if ((*fds)[0] < 0) {
- p_err("get map by id (%u): %s", id, strerror(errno));
- return -1;
- }
- return 1;
- } else if (is_prefix(**argv, "name")) {
- char *name;
-
- NEXT_ARGP();
-
- name = **argv;
- if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
- p_err("can't parse name");
- return -1;
- }
- NEXT_ARGP();
-
- return map_fd_by_name(name, fds);
- } else if (is_prefix(**argv, "pinned")) {
- char *path;
-
- NEXT_ARGP();
-
- path = **argv;
- NEXT_ARGP();
-
- (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
- if ((*fds)[0] < 0)
- return -1;
- return 1;
- }
-
- p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
- return -1;
-}
-
-int map_parse_fd(int *argc, char ***argv)
-{
- int *fds = NULL;
- int nb_fds, fd;
-
- fds = malloc(sizeof(int));
- if (!fds) {
- p_err("mem alloc failed");
- return -1;
- }
- nb_fds = map_parse_fds(argc, argv, &fds);
- if (nb_fds != 1) {
- if (nb_fds > 1) {
- p_err("several maps match this handle");
- while (nb_fds--)
- close(fds[nb_fds]);
- }
- fd = -1;
- goto exit_free;
- }
-
- fd = fds[0];
-exit_free:
- free(fds);
- return fd;
-}
-
-int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
-{
- int err;
- int fd;
-
- fd = map_parse_fd(argc, argv);
- if (fd < 0)
- return -1;
-
- err = bpf_obj_get_info_by_fd(fd, info, info_len);
- if (err) {
- p_err("can't get map info: %s", strerror(errno));
- close(fd);
- return err;
- }
-
- return fd;
-}
-
static int do_dump_btf(const struct btf_dumper *d,
struct bpf_map_info *map_info, void *key,
void *value)
@@ -367,8 +213,9 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
jsonw_end_object(json_wtr);
}
-static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
- const char *error_msg)
+static void
+print_entry_error_msg(struct bpf_map_info *info, unsigned char *key,
+ const char *error_msg)
{
int msg_size = strlen(error_msg);
bool single_line, break_names;
@@ -386,6 +233,40 @@ static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
printf("\n");
}
+static void
+print_entry_error(struct bpf_map_info *map_info, void *key, int lookup_errno)
+{
+ /* For prog_array maps or arrays of maps, failure to lookup the value
+ * means there is no entry for that key. Do not print an error message
+ * in that case.
+ */
+ if ((map_is_map_of_maps(map_info->type) ||
+ map_is_map_of_progs(map_info->type)) && lookup_errno == ENOENT)
+ return;
+
+ if (json_output) {
+ jsonw_start_object(json_wtr); /* entry */
+ jsonw_name(json_wtr, "key");
+ print_hex_data_json(key, map_info->key_size);
+ jsonw_name(json_wtr, "value");
+ jsonw_start_object(json_wtr); /* error */
+ jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
+ jsonw_end_object(json_wtr); /* error */
+ jsonw_end_object(json_wtr); /* entry */
+ } else {
+ const char *msg = NULL;
+
+ if (lookup_errno == ENOENT)
+ msg = "<no entry>";
+ else if (lookup_errno == ENOSPC &&
+ map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
+ msg = "<cannot read>";
+
+ print_entry_error_msg(map_info, key,
+ msg ? : strerror(lookup_errno));
+ }
+}
+
static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
unsigned char *value)
{
@@ -628,7 +509,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
- if (prog_type < ARRAY_SIZE(prog_type_name))
+ if (prog_type < prog_type_name_size)
jsonw_string_field(json_wtr, "owner_prog_type",
prog_type_name[prog_type]);
else
@@ -665,6 +546,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
jsonw_end_array(json_wtr);
}
+ emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
jsonw_end_object(json_wtr);
return 0;
@@ -711,7 +594,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
- if (prog_type < ARRAY_SIZE(prog_type_name))
+ if (prog_type < prog_type_name_size)
printf("owner_prog_type %s ",
prog_type_name[prog_type]);
else
@@ -752,6 +635,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (frozen)
printf("%sfrozen", info->btf_id ? " " : "");
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
printf("\n");
return 0;
}
@@ -810,6 +695,7 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&map_table, BPF_OBJ_MAP);
+ build_obj_refs_table(&refs_table, BPF_OBJ_MAP);
if (argc == 2)
return do_show_subset(argc, argv);
@@ -853,6 +739,8 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
+ delete_obj_refs_table(&refs_table);
+
return errno == ENOENT ? 0 : -1;
}
@@ -860,56 +748,23 @@ static int dump_map_elem(int fd, void *key, void *value,
struct bpf_map_info *map_info, struct btf *btf,
json_writer_t *btf_wtr)
{
- int num_elems = 0;
- int lookup_errno;
-
- if (!bpf_map_lookup_elem(fd, key, value)) {
- if (json_output) {
- print_entry_json(map_info, key, value, btf);
- } else {
- if (btf) {
- struct btf_dumper d = {
- .btf = btf,
- .jw = btf_wtr,
- .is_plain_text = true,
- };
-
- do_dump_btf(&d, map_info, key, value);
- } else {
- print_entry_plain(map_info, key, value);
- }
- num_elems++;
- }
- return num_elems;
+ if (bpf_map_lookup_elem(fd, key, value)) {
+ print_entry_error(map_info, key, errno);
+ return -1;
}
- /* lookup error handling */
- lookup_errno = errno;
-
- if (map_is_map_of_maps(map_info->type) ||
- map_is_map_of_progs(map_info->type))
- return 0;
-
if (json_output) {
- jsonw_start_object(json_wtr);
- jsonw_name(json_wtr, "key");
- print_hex_data_json(key, map_info->key_size);
- jsonw_name(json_wtr, "value");
- jsonw_start_object(json_wtr);
- jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
- jsonw_end_object(json_wtr);
- jsonw_end_object(json_wtr);
- } else {
- const char *msg = NULL;
-
- if (lookup_errno == ENOENT)
- msg = "<no entry>";
- else if (lookup_errno == ENOSPC &&
- map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
- msg = "<cannot read>";
+ print_entry_json(map_info, key, value, btf);
+ } else if (btf) {
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = btf_wtr,
+ .is_plain_text = true,
+ };
- print_entry_error(map_info, key,
- msg ? : strerror(lookup_errno));
+ do_dump_btf(&d, map_info, key, value);
+ } else {
+ print_entry_plain(map_info, key, value);
}
return 0;
@@ -1020,7 +875,8 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
err = 0;
break;
}
- num_elems += dump_map_elem(fd, key, value, info, btf, wtr);
+ if (!dump_map_elem(fd, key, value, info, btf, wtr))
+ num_elems++;
prev_key = key;
}
@@ -1394,7 +1250,7 @@ static int do_create(int argc, char **argv)
{
struct bpf_create_map_attr attr = { NULL, };
const char *pinfile;
- int err, fd;
+ int err = -1, fd;
if (!REQ_ARGS(7))
return -1;
@@ -1409,13 +1265,13 @@ static int do_create(int argc, char **argv)
if (attr.map_type) {
p_err("map type already specified");
- return -1;
+ goto exit;
}
attr.map_type = map_type_from_str(*argv);
if ((int)attr.map_type < 0) {
p_err("unrecognized map type: %s", *argv);
- return -1;
+ goto exit;
}
NEXT_ARG();
} else if (is_prefix(*argv, "name")) {
@@ -1424,43 +1280,56 @@ static int do_create(int argc, char **argv)
} else if (is_prefix(*argv, "key")) {
if (parse_u32_arg(&argc, &argv, &attr.key_size,
"key size"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "value")) {
if (parse_u32_arg(&argc, &argv, &attr.value_size,
"value size"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "entries")) {
if (parse_u32_arg(&argc, &argv, &attr.max_entries,
"max entries"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "flags")) {
if (parse_u32_arg(&argc, &argv, &attr.map_flags,
"flags"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "dev")) {
NEXT_ARG();
if (attr.map_ifindex) {
p_err("offload device already specified");
- return -1;
+ goto exit;
}
attr.map_ifindex = if_nametoindex(*argv);
if (!attr.map_ifindex) {
p_err("unrecognized netdevice '%s': %s",
*argv, strerror(errno));
- return -1;
+ goto exit;
}
NEXT_ARG();
+ } else if (is_prefix(*argv, "inner_map")) {
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int inner_map_fd;
+
+ NEXT_ARG();
+ if (!REQ_ARGS(2))
+ usage();
+ inner_map_fd = map_parse_fd_and_info(&argc, &argv,
+ &info, &len);
+ if (inner_map_fd < 0)
+ return -1;
+ attr.inner_map_fd = inner_map_fd;
} else {
p_err("unknown arg %s", *argv);
- return -1;
+ goto exit;
}
}
if (!attr.name) {
p_err("map name not specified");
- return -1;
+ goto exit;
}
set_max_rlimit();
@@ -1468,17 +1337,22 @@ static int do_create(int argc, char **argv)
fd = bpf_create_map_xattr(&attr);
if (fd < 0) {
p_err("map create failed: %s", strerror(errno));
- return -1;
+ goto exit;
}
err = do_pin_fd(fd, pinfile);
close(fd);
if (err)
- return err;
+ goto exit;
if (json_output)
jsonw_null(json_wtr);
- return 0;
+
+exit:
+ if (attr.inner_map_fd > 0)
+ close(attr.inner_map_fd);
+
+ return err;
}
static int do_pop_dequeue(int argc, char **argv)
@@ -1561,24 +1435,24 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [MAP]\n"
- " %s %s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
- " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
- " [dev NAME]\n"
- " %s %s dump MAP\n"
- " %s %s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
- " %s %s lookup MAP [key DATA]\n"
- " %s %s getnext MAP [key DATA]\n"
- " %s %s delete MAP key DATA\n"
- " %s %s pin MAP FILE\n"
- " %s %s event_pipe MAP [cpu N index M]\n"
- " %s %s peek MAP\n"
- " %s %s push MAP value VALUE\n"
- " %s %s pop MAP\n"
- " %s %s enqueue MAP value VALUE\n"
- " %s %s dequeue MAP\n"
- " %s %s freeze MAP\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } [MAP]\n"
+ " %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
+ " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
+ " [inner_map MAP] [dev NAME]\n"
+ " %1$s %2$s dump MAP\n"
+ " %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
+ " %1$s %2$s lookup MAP [key DATA]\n"
+ " %1$s %2$s getnext MAP [key DATA]\n"
+ " %1$s %2$s delete MAP key DATA\n"
+ " %1$s %2$s pin MAP FILE\n"
+ " %1$s %2$s event_pipe MAP [cpu N index M]\n"
+ " %1$s %2$s peek MAP\n"
+ " %1$s %2$s push MAP value VALUE\n"
+ " %1$s %2$s pop MAP\n"
+ " %1$s %2$s enqueue MAP value VALUE\n"
+ " %1$s %2$s dequeue MAP\n"
+ " %1$s %2$s freeze MAP\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_MAP "\n"
" DATA := { [hex] BYTES }\n"
@@ -1589,14 +1463,10 @@ static int do_help(int argc, char **argv)
" percpu_array | stack_trace | cgroup_array | lru_hash |\n"
" lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
- " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n"
+ " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
+ " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2]);
return 0;
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index d9b29c17fbb8..825f29f93a57 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -39,7 +39,7 @@ struct event_ring_info {
struct perf_event_sample {
struct perf_event_header header;
- u64 time;
+ __u64 time;
__u32 size;
unsigned char data[];
};
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index c5e3895b7c8b..910e7bac6e9e 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -6,22 +6,27 @@
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
+#include <time.h>
#include <unistd.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <net/if.h>
#include <linux/if.h>
#include <linux/rtnetlink.h>
+#include <linux/socket.h>
#include <linux/tc_act/tc_bpf.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "bpf/nlattr.h"
-#include "bpf/libbpf_internal.h"
#include "main.h"
#include "netlink_dumper.h"
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
struct ip_devname_ifindex {
char devname[64];
int ifindex;
@@ -85,6 +90,266 @@ static enum net_attach_type parse_attach_type(const char *str)
return net_attach_type_size;
}
+typedef int (*dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, dump_nlmsg_t, void *cookie);
+
+static int netlink_open(__u32 *nl_pid)
+{
+ struct sockaddr_nl sa;
+ socklen_t addrlen;
+ int one = 1, ret;
+ int sock;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0)
+ return -errno;
+
+ if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one)) < 0) {
+ p_err("Netlink error reporting not supported");
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ addrlen = sizeof(sa);
+ if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ if (addrlen != sizeof(sa)) {
+ ret = -LIBBPF_ERRNO__INTERNAL;
+ goto cleanup;
+ }
+
+ *nl_pid = sa.nl_pid;
+ return sock;
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
+ __dump_nlmsg_t _fn, dump_nlmsg_t fn,
+ void *cookie)
+{
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+ char buf[4096];
+ int len, ret;
+
+ while (multipart) {
+ multipart = false;
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ ret = -errno;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != nl_pid) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+ goto done;
+ }
+ if (nh->nlmsg_seq != seq) {
+ ret = -LIBBPF_ERRNO__INVSEQ;
+ goto done;
+ }
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = true;
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ libbpf_nla_dump_errormsg(nh);
+ goto done;
+ case NLMSG_DONE:
+ return 0;
+ default:
+ break;
+ }
+ if (_fn) {
+ ret = _fn(nh, fn, cookie);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ ret = 0;
+done:
+ return ret;
+}
+
+static int __dump_class_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_class_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_class_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_class(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTCLASS,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
+ dump_class_nlmsg, cookie);
+}
+
+static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_qdisc_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_qdisc_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETQDISC,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
+ dump_qdisc_nlmsg, cookie);
+}
+
+static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_filter_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_filter_nlmsg(cookie, t, tb);
+}
+
+static int netlink_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTFILTER,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ .t.tcm_parent = handle,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
+ dump_filter_nlmsg, cookie);
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct nlattr *tb[IFLA_MAX + 1], *attr;
+ struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+ attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+ if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+static int netlink_get_link(int sock, unsigned int nl_pid,
+ dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nlh.nlmsg_type = RTM_GETLINK,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifm.ifi_family = AF_PACKET,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+ dump_link_nlmsg, cookie);
+}
+
static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
{
struct bpf_netdev_t *netinfo = cookie;
@@ -168,14 +433,14 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
tcinfo.array_len = 0;
tcinfo.is_qdisc = false;
- ret = libbpf_nl_get_class(sock, nl_pid, dev->ifindex,
- dump_class_qdisc_nlmsg, &tcinfo);
+ ret = netlink_get_class(sock, nl_pid, dev->ifindex,
+ dump_class_qdisc_nlmsg, &tcinfo);
if (ret)
goto out;
tcinfo.is_qdisc = true;
- ret = libbpf_nl_get_qdisc(sock, nl_pid, dev->ifindex,
- dump_class_qdisc_nlmsg, &tcinfo);
+ ret = netlink_get_qdisc(sock, nl_pid, dev->ifindex,
+ dump_class_qdisc_nlmsg, &tcinfo);
if (ret)
goto out;
@@ -183,9 +448,9 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
filter_info.ifindex = dev->ifindex;
for (i = 0; i < tcinfo.used_len; i++) {
filter_info.kind = tcinfo.handle_array[i].kind;
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex,
- tcinfo.handle_array[i].handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex,
+ tcinfo.handle_array[i].handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
}
@@ -193,22 +458,22 @@ static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
/* root, ingress and egress handle */
handle = TC_H_ROOT;
filter_info.kind = "root";
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
filter_info.kind = "clsact/ingress";
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS);
filter_info.kind = "clsact/egress";
- ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
- dump_filter_nlmsg, &filter_info);
+ ret = netlink_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
if (ret)
goto out;
@@ -386,7 +651,7 @@ static int do_show(int argc, char **argv)
struct bpf_attach_info attach_info = {};
int i, sock, ret, filter_idx = -1;
struct bpf_netdev_t dev_array;
- unsigned int nl_pid;
+ unsigned int nl_pid = 0;
char err_buf[256];
if (argc == 2) {
@@ -401,7 +666,7 @@ static int do_show(int argc, char **argv)
if (ret)
return -1;
- sock = libbpf_netlink_open(&nl_pid);
+ sock = netlink_open(&nl_pid);
if (sock < 0) {
fprintf(stderr, "failed to open netlink sock\n");
return -1;
@@ -416,7 +681,7 @@ static int do_show(int argc, char **argv)
jsonw_start_array(json_wtr);
NET_START_OBJECT;
NET_START_ARRAY("xdp", "%s:\n");
- ret = libbpf_nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
+ ret = netlink_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
NET_END_ARRAY("\n");
if (!ret) {
@@ -458,10 +723,10 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [dev <devname>]\n"
- " %s %s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
- " %s %s detach ATTACH_TYPE dev <devname>\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } [dev <devname>]\n"
+ " %1$s %2$s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
+ " %1$s %2$s detach ATTACH_TYPE dev <devname>\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_PROGRAM "\n"
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
@@ -470,8 +735,8 @@ static int do_help(int argc, char **argv)
" For progs attached to cgroups, use \"bpftool cgroup\"\n"
" to dump program attachments. For program types\n"
" sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
- " consult iproute2.\n",
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ " consult iproute2.\n"
+ "",
bin_name, argv[-2]);
return 0;
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index 3341aa14acda..ad23934819c7 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -231,7 +231,7 @@ static int do_show(int argc, char **argv)
static int do_help(int argc, char **argv)
{
fprintf(stderr,
- "Usage: %s %s { show | list | help }\n"
+ "Usage: %1$s %2$s { show | list | help }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
new file mode 100644
index 000000000000..df7d8ec76036
--- /dev/null
+++ b/tools/bpf/bpftool/pids.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+
+#include "main.h"
+#include "skeleton/pid_iter.h"
+
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+ return -ENOTSUP;
+}
+void delete_obj_refs_table(struct obj_refs_table *table) {}
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {}
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_writer) {}
+
+#else /* BPFTOOL_WITHOUT_SKELETONS */
+
+#include "pid_iter.skel.h"
+
+static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e)
+{
+ struct obj_refs *refs;
+ struct obj_ref *ref;
+ void *tmp;
+ int i;
+
+ hash_for_each_possible(table->table, refs, node, e->id) {
+ if (refs->id != e->id)
+ continue;
+
+ for (i = 0; i < refs->ref_cnt; i++) {
+ if (refs->refs[i].pid == e->pid)
+ return;
+ }
+
+ tmp = realloc(refs->refs, (refs->ref_cnt + 1) * sizeof(*ref));
+ if (!tmp) {
+ p_err("failed to re-alloc memory for ID %u, PID %d, COMM %s...",
+ e->id, e->pid, e->comm);
+ return;
+ }
+ refs->refs = tmp;
+ ref = &refs->refs[refs->ref_cnt];
+ ref->pid = e->pid;
+ memcpy(ref->comm, e->comm, sizeof(ref->comm));
+ refs->ref_cnt++;
+
+ return;
+ }
+
+ /* new ref */
+ refs = calloc(1, sizeof(*refs));
+ if (!refs) {
+ p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+ e->id, e->pid, e->comm);
+ return;
+ }
+
+ refs->id = e->id;
+ refs->refs = malloc(sizeof(*refs->refs));
+ if (!refs->refs) {
+ free(refs);
+ p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+ e->id, e->pid, e->comm);
+ return;
+ }
+ ref = &refs->refs[0];
+ ref->pid = e->pid;
+ memcpy(ref->comm, e->comm, sizeof(ref->comm));
+ refs->ref_cnt = 1;
+ hash_add(table->table, &refs->node, e->id);
+}
+
+static int __printf(2, 0)
+libbpf_print_none(__maybe_unused enum libbpf_print_level level,
+ __maybe_unused const char *format,
+ __maybe_unused va_list args)
+{
+ return 0;
+}
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+ char buf[4096];
+ struct pid_iter_bpf *skel;
+ struct pid_iter_entry *e;
+ int err, ret, fd = -1, i;
+ libbpf_print_fn_t default_print;
+
+ hash_init(table->table);
+ set_max_rlimit();
+
+ skel = pid_iter_bpf__open();
+ if (!skel) {
+ p_err("failed to open PID iterator skeleton");
+ return -1;
+ }
+
+ skel->rodata->obj_type = type;
+
+ /* we don't want output polluted with libbpf errors if bpf_iter is not
+ * supported
+ */
+ default_print = libbpf_set_print(libbpf_print_none);
+ err = pid_iter_bpf__load(skel);
+ libbpf_set_print(default_print);
+ if (err) {
+ /* too bad, kernel doesn't support BPF iterators yet */
+ err = 0;
+ goto out;
+ }
+ err = pid_iter_bpf__attach(skel);
+ if (err) {
+ /* if we loaded above successfully, attach has to succeed */
+ p_err("failed to attach PID iterator: %d", err);
+ goto out;
+ }
+
+ fd = bpf_iter_create(bpf_link__fd(skel->links.iter));
+ if (fd < 0) {
+ err = -errno;
+ p_err("failed to create PID iterator session: %d", err);
+ goto out;
+ }
+
+ while (true) {
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ err = -errno;
+ p_err("failed to read PID iterator output: %d", err);
+ goto out;
+ }
+ if (ret == 0)
+ break;
+ if (ret % sizeof(*e)) {
+ err = -EINVAL;
+ p_err("invalid PID iterator output format");
+ goto out;
+ }
+ ret /= sizeof(*e);
+
+ e = (void *)buf;
+ for (i = 0; i < ret; i++, e++) {
+ add_ref(table, e);
+ }
+ }
+ err = 0;
+out:
+ if (fd >= 0)
+ close(fd);
+ pid_iter_bpf__destroy(skel);
+ return err;
+}
+
+void delete_obj_refs_table(struct obj_refs_table *table)
+{
+ struct obj_refs *refs;
+ struct hlist_node *tmp;
+ unsigned int bkt;
+
+ hash_for_each_safe(table->table, bkt, tmp, refs, node) {
+ hash_del(&refs->node);
+ free(refs->refs);
+ free(refs);
+ }
+}
+
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+ json_writer_t *json_writer)
+{
+ struct obj_refs *refs;
+ struct obj_ref *ref;
+ int i;
+
+ if (hash_empty(table->table))
+ return;
+
+ hash_for_each_possible(table->table, refs, node, id) {
+ if (refs->id != id)
+ continue;
+ if (refs->ref_cnt == 0)
+ break;
+
+ jsonw_name(json_writer, "pids");
+ jsonw_start_array(json_writer);
+ for (i = 0; i < refs->ref_cnt; i++) {
+ ref = &refs->refs[i];
+ jsonw_start_object(json_writer);
+ jsonw_int_field(json_writer, "pid", ref->pid);
+ jsonw_string_field(json_writer, "comm", ref->comm);
+ jsonw_end_object(json_writer);
+ }
+ jsonw_end_array(json_writer);
+ break;
+ }
+}
+
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix)
+{
+ struct obj_refs *refs;
+ struct obj_ref *ref;
+ int i;
+
+ if (hash_empty(table->table))
+ return;
+
+ hash_for_each_possible(table->table, refs, node, id) {
+ if (refs->id != id)
+ continue;
+ if (refs->ref_cnt == 0)
+ break;
+
+ printf("%s", prefix);
+ for (i = 0; i < refs->ref_cnt; i++) {
+ ref = &refs->refs[i];
+ printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid);
+ }
+ break;
+ }
+}
+
+
+#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f6a5974a7b0a..d942c1e3372c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -29,6 +29,45 @@
#include "main.h"
#include "xlated_dumper.h"
+#define BPF_METADATA_PREFIX "bpf_metadata_"
+#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1)
+
+const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
+ [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
+ [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
+ [BPF_PROG_TYPE_XDP] = "xdp",
+ [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
+ [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
+ [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
+ [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
+ [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
+ [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
+ [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
+ [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
+ [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
+ [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+ [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
+ [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
+ [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
+ [BPF_PROG_TYPE_TRACING] = "tracing",
+ [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_PROG_TYPE_EXT] = "ext",
+ [BPF_PROG_TYPE_LSM] = "lsm",
+ [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
+};
+
+const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
+
enum dump_mode {
DUMP_JITED,
DUMP_XLATED,
@@ -86,185 +125,225 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
strftime(buf, size, "%FT%T%z", &load_tm);
}
-static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
+static void show_prog_maps(int fd, __u32 num_maps)
{
- unsigned int id = 0;
- int fd, nb_fds = 0;
- void *tmp;
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ __u32 map_ids[num_maps];
+ unsigned int i;
int err;
- while (true) {
- struct bpf_prog_info info = {};
- __u32 len = sizeof(info);
+ info.nr_map_ids = num_maps;
+ info.map_ids = ptr_to_u64(map_ids);
- err = bpf_prog_get_next_id(id, &id);
- if (err) {
- if (errno != ENOENT) {
- p_err("%s", strerror(errno));
- goto err_close_fds;
- }
- return nb_fds;
- }
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err || !info.nr_map_ids)
+ return;
- fd = bpf_prog_get_fd_by_id(id);
- if (fd < 0) {
- p_err("can't get prog by id (%u): %s",
- id, strerror(errno));
- goto err_close_fds;
- }
+ if (json_output) {
+ jsonw_name(json_wtr, "map_ids");
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < info.nr_map_ids; i++)
+ jsonw_uint(json_wtr, map_ids[i]);
+ jsonw_end_array(json_wtr);
+ } else {
+ printf(" map_ids ");
+ for (i = 0; i < info.nr_map_ids; i++)
+ printf("%u%s", map_ids[i],
+ i == info.nr_map_ids - 1 ? "" : ",");
+ }
+}
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
- if (err) {
- p_err("can't get prog info (%u): %s",
- id, strerror(errno));
- goto err_close_fd;
+static void *find_metadata(int prog_fd, struct bpf_map_info *map_info)
+{
+ struct bpf_prog_info prog_info;
+ __u32 prog_info_len;
+ __u32 map_info_len;
+ void *value = NULL;
+ __u32 *map_ids;
+ int nr_maps;
+ int key = 0;
+ int map_fd;
+ int ret;
+ __u32 i;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info_len = sizeof(prog_info);
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ return NULL;
+
+ if (!prog_info.nr_map_ids)
+ return NULL;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32));
+ if (!map_ids)
+ return NULL;
+
+ nr_maps = prog_info.nr_map_ids;
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.nr_map_ids = nr_maps;
+ prog_info.map_ids = ptr_to_u64(map_ids);
+ prog_info_len = sizeof(prog_info);
+
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ goto free_map_ids;
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ map_fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (map_fd < 0)
+ goto free_map_ids;
+
+ memset(map_info, 0, sizeof(*map_info));
+ map_info_len = sizeof(*map_info);
+ ret = bpf_obj_get_info_by_fd(map_fd, map_info, &map_info_len);
+ if (ret < 0) {
+ close(map_fd);
+ goto free_map_ids;
}
- if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
- (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
- close(fd);
+ if (map_info->type != BPF_MAP_TYPE_ARRAY ||
+ map_info->key_size != sizeof(int) ||
+ map_info->max_entries != 1 ||
+ !map_info->btf_value_type_id ||
+ !strstr(map_info->name, ".rodata")) {
+ close(map_fd);
continue;
}
- if (nb_fds > 0) {
- tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
- if (!tmp) {
- p_err("failed to realloc");
- goto err_close_fd;
- }
- *fds = tmp;
+ value = malloc(map_info->value_size);
+ if (!value) {
+ close(map_fd);
+ goto free_map_ids;
}
- (*fds)[nb_fds++] = fd;
+
+ if (bpf_map_lookup_elem(map_fd, &key, value)) {
+ close(map_fd);
+ free(value);
+ value = NULL;
+ goto free_map_ids;
+ }
+
+ close(map_fd);
+ break;
}
-err_close_fd:
- close(fd);
-err_close_fds:
- while (--nb_fds >= 0)
- close((*fds)[nb_fds]);
- return -1;
+free_map_ids:
+ free(map_ids);
+ return value;
}
-static int prog_parse_fds(int *argc, char ***argv, int **fds)
+static bool has_metadata_prefix(const char *s)
{
- if (is_prefix(**argv, "id")) {
- unsigned int id;
- char *endptr;
-
- NEXT_ARGP();
-
- id = strtoul(**argv, &endptr, 0);
- if (*endptr) {
- p_err("can't parse %s as ID", **argv);
- return -1;
- }
- NEXT_ARGP();
-
- (*fds)[0] = bpf_prog_get_fd_by_id(id);
- if ((*fds)[0] < 0) {
- p_err("get by id (%u): %s", id, strerror(errno));
- return -1;
- }
- return 1;
- } else if (is_prefix(**argv, "tag")) {
- unsigned char tag[BPF_TAG_SIZE];
+ return strncmp(s, BPF_METADATA_PREFIX, BPF_METADATA_PREFIX_LEN) == 0;
+}
- NEXT_ARGP();
+static void show_prog_metadata(int fd, __u32 num_maps)
+{
+ const struct btf_type *t_datasec, *t_var;
+ struct bpf_map_info map_info;
+ struct btf_var_secinfo *vsi;
+ bool printed_header = false;
+ struct btf *btf = NULL;
+ unsigned int i, vlen;
+ void *value = NULL;
+ const char *name;
+ int err;
- if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
- tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
- != BPF_TAG_SIZE) {
- p_err("can't parse tag");
- return -1;
- }
- NEXT_ARGP();
+ if (!num_maps)
+ return;
- return prog_fd_by_nametag(tag, fds, true);
- } else if (is_prefix(**argv, "name")) {
- char *name;
+ memset(&map_info, 0, sizeof(map_info));
+ value = find_metadata(fd, &map_info);
+ if (!value)
+ return;
- NEXT_ARGP();
+ err = btf__get_from_id(map_info.btf_id, &btf);
+ if (err || !btf)
+ goto out_free;
- name = **argv;
- if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
- p_err("can't parse name");
- return -1;
- }
- NEXT_ARGP();
+ t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
+ if (!btf_is_datasec(t_datasec))
+ goto out_free;
- return prog_fd_by_nametag(name, fds, false);
- } else if (is_prefix(**argv, "pinned")) {
- char *path;
+ vlen = btf_vlen(t_datasec);
+ vsi = btf_var_secinfos(t_datasec);
- NEXT_ARGP();
+ /* We don't proceed to check the kinds of the elements of the DATASEC.
+ * The verifier enforces them to be BTF_KIND_VAR.
+ */
- path = **argv;
- NEXT_ARGP();
+ if (json_output) {
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = json_wtr,
+ .is_plain_text = false,
+ };
- (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
- if ((*fds)[0] < 0)
- return -1;
- return 1;
- }
+ for (i = 0; i < vlen; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ name = btf__name_by_offset(btf, t_var->name_off);
- p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
- return -1;
-}
+ if (!has_metadata_prefix(name))
+ continue;
-int prog_parse_fd(int *argc, char ***argv)
-{
- int *fds = NULL;
- int nb_fds, fd;
+ if (!printed_header) {
+ jsonw_name(json_wtr, "metadata");
+ jsonw_start_object(json_wtr);
+ printed_header = true;
+ }
- fds = malloc(sizeof(int));
- if (!fds) {
- p_err("mem alloc failed");
- return -1;
- }
- nb_fds = prog_parse_fds(argc, argv, &fds);
- if (nb_fds != 1) {
- if (nb_fds > 1) {
- p_err("several programs match this handle");
- while (nb_fds--)
- close(fds[nb_fds]);
+ jsonw_name(json_wtr, name + BPF_METADATA_PREFIX_LEN);
+ err = btf_dumper_type(&d, t_var->type, value + vsi->offset);
+ if (err) {
+ p_err("btf dump failed: %d", err);
+ break;
+ }
+ }
+ if (printed_header)
+ jsonw_end_object(json_wtr);
+ } else {
+ json_writer_t *btf_wtr = jsonw_new(stdout);
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = btf_wtr,
+ .is_plain_text = true,
+ };
+
+ if (!btf_wtr) {
+ p_err("jsonw alloc failed");
+ goto out_free;
}
- fd = -1;
- goto exit_free;
- }
- fd = fds[0];
-exit_free:
- free(fds);
- return fd;
-}
+ for (i = 0; i < vlen; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ name = btf__name_by_offset(btf, t_var->name_off);
-static void show_prog_maps(int fd, u32 num_maps)
-{
- struct bpf_prog_info info = {};
- __u32 len = sizeof(info);
- __u32 map_ids[num_maps];
- unsigned int i;
- int err;
+ if (!has_metadata_prefix(name))
+ continue;
- info.nr_map_ids = num_maps;
- info.map_ids = ptr_to_u64(map_ids);
+ if (!printed_header) {
+ printf("\tmetadata:");
+ printed_header = true;
+ }
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
- if (err || !info.nr_map_ids)
- return;
+ printf("\n\t\t%s = ", name + BPF_METADATA_PREFIX_LEN);
- if (json_output) {
- jsonw_name(json_wtr, "map_ids");
- jsonw_start_array(json_wtr);
- for (i = 0; i < info.nr_map_ids; i++)
- jsonw_uint(json_wtr, map_ids[i]);
- jsonw_end_array(json_wtr);
- } else {
- printf(" map_ids ");
- for (i = 0; i < info.nr_map_ids; i++)
- printf("%u%s", map_ids[i],
- i == info.nr_map_ids - 1 ? "" : ",");
+ jsonw_reset(btf_wtr);
+ err = btf_dumper_type(&d, t_var->type, value + vsi->offset);
+ if (err) {
+ p_err("btf dump failed: %d", err);
+ break;
+ }
+ }
+ if (printed_header)
+ jsonw_destroy(&btf_wtr);
}
+
+out_free:
+ btf__free(btf);
+ free(value);
}
static void print_prog_header_json(struct bpf_prog_info *info)
@@ -342,6 +421,10 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
jsonw_end_array(json_wtr);
}
+ emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
+ show_prog_metadata(fd, info->nr_map_ids);
+
jsonw_end_object(json_wtr);
}
@@ -408,7 +491,11 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
if (info->btf_id)
printf("\n\tbtf_id %d", info->btf_id);
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
printf("\n");
+
+ show_prog_metadata(fd, info->nr_map_ids);
}
static int show_prog(int fd)
@@ -473,6 +560,7 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&prog_table, BPF_OBJ_PROG);
+ build_obj_refs_table(&refs_table, BPF_OBJ_PROG);
if (argc == 2)
return do_show_subset(argc, argv);
@@ -514,6 +602,8 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
+ delete_obj_refs_table(&refs_table);
+
return err;
}
@@ -537,14 +627,14 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
p_info("no instructions returned");
return -1;
}
- buf = (unsigned char *)(info->jited_prog_insns);
+ buf = u64_to_ptr(info->jited_prog_insns);
member_len = info->jited_prog_len;
} else { /* DUMP_XLATED */
if (info->xlated_prog_len == 0 || !info->xlated_prog_insns) {
p_err("error retrieving insn dump: kernel.kptr_restrict set?");
return -1;
}
- buf = (unsigned char *)info->xlated_prog_insns;
+ buf = u64_to_ptr(info->xlated_prog_insns);
member_len = info->xlated_prog_len;
}
@@ -553,7 +643,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
return -1;
}
- func_info = (void *)info->func_info;
+ func_info = u64_to_ptr(info->func_info);
if (info->nr_line_info) {
prog_linfo = bpf_prog_linfo__new(info);
@@ -571,7 +661,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
n = write(fd, buf, member_len);
close(fd);
- if (n != member_len) {
+ if (n != (ssize_t)member_len) {
p_err("error writing output file: %s",
n < 0 ? strerror(errno) : "short write");
return -1;
@@ -601,13 +691,13 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
__u32 i;
if (info->nr_jited_ksyms) {
kernel_syms_load(&dd);
- ksyms = (__u64 *) info->jited_ksyms;
+ ksyms = u64_to_ptr(info->jited_ksyms);
}
if (json_output)
jsonw_start_array(json_wtr);
- lens = (__u32 *) info->jited_func_lens;
+ lens = u64_to_ptr(info->jited_func_lens);
for (i = 0; i < info->nr_jited_func_lens; i++) {
if (ksyms) {
sym = kernel_syms_search(&dd, ksyms[i]);
@@ -668,7 +758,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
} else {
kernel_syms_load(&dd);
dd.nr_jited_ksyms = info->nr_jited_ksyms;
- dd.jited_ksyms = (__u64 *) info->jited_ksyms;
+ dd.jited_ksyms = u64_to_ptr(info->jited_ksyms);
dd.btf = btf;
dd.func_info = func_info;
dd.finfo_rec_size = info->func_info_rec_size;
@@ -1413,7 +1503,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
enum bpf_prog_type prog_type = common_prog_type;
if (prog_type == BPF_PROG_TYPE_UNSPEC) {
- const char *sec_name = bpf_program__title(pos, false);
+ const char *sec_name = bpf_program__section_name(pos);
err = get_prog_type_by_name(sec_name, &prog_type,
&expected_attach_type);
@@ -1507,7 +1597,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
err = bpf_obj_pin(bpf_program__fd(prog), pinfile);
if (err) {
p_err("failed to pin program %s",
- bpf_program__title(prog, false));
+ bpf_program__section_name(prog));
goto err_close_obj;
}
} else {
@@ -1790,7 +1880,7 @@ static char *profile_target_name(int tgt_fd)
goto out;
}
- func_info = (struct bpf_func_info *)(info_linear->info.func_info);
+ func_info = u64_to_ptr(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
p_err("btf %d doesn't have type %d",
@@ -1984,24 +2074,24 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [PROG]\n"
- " %s %s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n"
- " %s %s dump jited PROG [{ file FILE | opcodes | linum }]\n"
- " %s %s pin PROG FILE\n"
- " %s %s { load | loadall } OBJ PATH \\\n"
+ "Usage: %1$s %2$s { show | list } [PROG]\n"
+ " %1$s %2$s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n"
+ " %1$s %2$s dump jited PROG [{ file FILE | opcodes | linum }]\n"
+ " %1$s %2$s pin PROG FILE\n"
+ " %1$s %2$s { load | loadall } OBJ PATH \\\n"
" [type TYPE] [dev NAME] \\\n"
" [map { idx IDX | name NAME } MAP]\\\n"
" [pinmaps MAP_DIR]\n"
- " %s %s attach PROG ATTACH_TYPE [MAP]\n"
- " %s %s detach PROG ATTACH_TYPE [MAP]\n"
- " %s %s run PROG \\\n"
+ " %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n"
+ " %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n"
+ " %1$s %2$s run PROG \\\n"
" data_in FILE \\\n"
" [data_out FILE [data_size_out L]] \\\n"
" [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n"
" [repeat N]\n"
- " %s %s profile PROG [duration DURATION] METRICs\n"
- " %s %s tracelog\n"
- " %s %s help\n"
+ " %1$s %2$s profile PROG [duration DURATION] METRICs\n"
+ " %1$s %2$s tracelog\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_MAP "\n"
" " HELP_SPEC_PROGRAM "\n"
@@ -2012,18 +2102,17 @@ static int do_help(int argc, char **argv)
" sk_reuseport | flow_dissector | cgroup/sysctl |\n"
" cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
" cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
- " cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
- " cgroup/recvmsg6 | cgroup/getsockopt | cgroup/setsockopt |\n"
- " struct_ops | fentry | fexit | freplace }\n"
+ " cgroup/getpeername4 | cgroup/getpeername6 |\n"
+ " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
+ " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
+ " cgroup/getsockopt | cgroup/setsockopt |\n"
+ " struct_ops | fentry | fexit | freplace | sk_lookup }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2]);
return 0;
}
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
new file mode 100644
index 000000000000..d9b420972934
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "pid_iter.h"
+
+/* keep in sync with the definition in main.h */
+enum bpf_obj_type {
+ BPF_OBJ_UNKNOWN,
+ BPF_OBJ_PROG,
+ BPF_OBJ_MAP,
+ BPF_OBJ_LINK,
+ BPF_OBJ_BTF,
+};
+
+extern const void bpf_link_fops __ksym;
+extern const void bpf_map_fops __ksym;
+extern const void bpf_prog_fops __ksym;
+extern const void btf_fops __ksym;
+
+const volatile enum bpf_obj_type obj_type = BPF_OBJ_UNKNOWN;
+
+static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type)
+{
+ switch (type) {
+ case BPF_OBJ_PROG:
+ return BPF_CORE_READ((struct bpf_prog *)ent, aux, id);
+ case BPF_OBJ_MAP:
+ return BPF_CORE_READ((struct bpf_map *)ent, id);
+ case BPF_OBJ_BTF:
+ return BPF_CORE_READ((struct btf *)ent, id);
+ case BPF_OBJ_LINK:
+ return BPF_CORE_READ((struct bpf_link *)ent, id);
+ default:
+ return 0;
+ }
+}
+
+SEC("iter/task_file")
+int iter(struct bpf_iter__task_file *ctx)
+{
+ struct file *file = ctx->file;
+ struct task_struct *task = ctx->task;
+ struct pid_iter_entry e;
+ const void *fops;
+
+ if (!file || !task)
+ return 0;
+
+ switch (obj_type) {
+ case BPF_OBJ_PROG:
+ fops = &bpf_prog_fops;
+ break;
+ case BPF_OBJ_MAP:
+ fops = &bpf_map_fops;
+ break;
+ case BPF_OBJ_BTF:
+ fops = &btf_fops;
+ break;
+ case BPF_OBJ_LINK:
+ fops = &bpf_link_fops;
+ break;
+ default:
+ return 0;
+ }
+
+ if (file->f_op != fops)
+ return 0;
+
+ e.pid = task->tgid;
+ e.id = get_obj_id(file->private_data, obj_type);
+ bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
+ task->group_leader->comm);
+ bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.h b/tools/bpf/bpftool/skeleton/pid_iter.h
new file mode 100644
index 000000000000..5692cf257adb
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2020 Facebook */
+#ifndef __PID_ITER_H
+#define __PID_ITER_H
+
+struct pid_iter_entry {
+ __u32 id;
+ int pid;
+ char comm[16];
+};
+
+#endif
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index 20034c12f7c5..4e3512f700c0 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -1,7 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2020 Facebook
-#include "profiler.h"
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -116,4 +115,4 @@ int BPF_PROG(fexit_XXX)
return 0;
}
-char LICENSE[] SEC("license") = "GPL";
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h
deleted file mode 100644
index 1f767e9510f7..000000000000
--- a/tools/bpf/bpftool/skeleton/profiler.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __PROFILER_H
-#define __PROFILER_H
-
-/* useful typedefs from vmlinux.h */
-
-typedef signed char __s8;
-typedef unsigned char __u8;
-typedef short int __s16;
-typedef short unsigned int __u16;
-typedef int __s32;
-typedef unsigned int __u32;
-typedef long long int __s64;
-typedef long long unsigned int __u64;
-
-typedef __s8 s8;
-typedef __u8 u8;
-typedef __s16 s16;
-typedef __u16 u16;
-typedef __s32 s32;
-typedef __u32 u32;
-typedef __s64 s64;
-typedef __u64 u64;
-
-enum {
- false = 0,
- true = 1,
-};
-
-#ifdef __CHECKER__
-#define __bitwise__ __attribute__((bitwise))
-#else
-#define __bitwise__
-#endif
-
-typedef __u16 __bitwise__ __le16;
-typedef __u16 __bitwise__ __be16;
-typedef __u32 __bitwise__ __le32;
-typedef __u32 __bitwise__ __be32;
-typedef __u64 __bitwise__ __le64;
-typedef __u64 __bitwise__ __be64;
-
-typedef __u16 __bitwise__ __sum16;
-typedef __u32 __bitwise__ __wsum;
-
-#endif /* __PROFILER_H */
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index e17738479edc..b58b91f62ffb 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -566,16 +566,15 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [STRUCT_OPS_MAP]\n"
- " %s %s dump [STRUCT_OPS_MAP]\n"
- " %s %s register OBJ\n"
- " %s %s unregister STRUCT_OPS_MAP\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } [STRUCT_OPS_MAP]\n"
+ " %1$s %2$s dump [STRUCT_OPS_MAP]\n"
+ " %1$s %2$s register OBJ\n"
+ " %1$s %2$s unregister STRUCT_OPS_MAP\n"
+ " %1$s %2$s help\n"
"\n"
" OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
- " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n",
- bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2],
+ " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+ "",
bin_name, argv[-2]);
return 0;
diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore
new file mode 100644
index 000000000000..a026df7dc280
--- /dev/null
+++ b/tools/bpf/resolve_btfids/.gitignore
@@ -0,0 +1,4 @@
+/FEATURE-DUMP.libbpf
+/bpf_helper_defs.h
+/fixdep
+/resolve_btfids
diff --git a/tools/bpf/resolve_btfids/Build b/tools/bpf/resolve_btfids/Build
new file mode 100644
index 000000000000..ae82da03f9bf
--- /dev/null
+++ b/tools/bpf/resolve_btfids/Build
@@ -0,0 +1,10 @@
+resolve_btfids-y += main.o
+resolve_btfids-y += rbtree.o
+resolve_btfids-y += zalloc.o
+resolve_btfids-y += string.o
+resolve_btfids-y += ctype.o
+resolve_btfids-y += str_error_r.o
+
+$(OUTPUT)%.o: ../../lib/%.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
new file mode 100644
index 000000000000..66cb92136de4
--- /dev/null
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0-only
+include ../../scripts/Makefile.include
+include ../../scripts/Makefile.arch
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+ifeq ($(V),1)
+ Q =
+ msg =
+else
+ Q = @
+ msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
+ MAKEFLAGS=--no-print-directory
+endif
+
+# always use the host compiler
+ifneq ($(LLVM),)
+HOSTAR ?= llvm-ar
+HOSTCC ?= clang
+HOSTLD ?= ld.lld
+else
+HOSTAR ?= ar
+HOSTCC ?= gcc
+HOSTLD ?= ld
+endif
+AR = $(HOSTAR)
+CC = $(HOSTCC)
+LD = $(HOSTLD)
+ARCH = $(HOSTARCH)
+
+OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
+
+LIBBPF_SRC := $(srctree)/tools/lib/bpf/
+SUBCMD_SRC := $(srctree)/tools/lib/subcmd/
+
+BPFOBJ := $(OUTPUT)/libbpf.a
+SUBCMDOBJ := $(OUTPUT)/libsubcmd.a
+
+BINARY := $(OUTPUT)/resolve_btfids
+BINARY_IN := $(BINARY)-in.o
+
+all: $(BINARY)
+
+$(OUTPUT):
+ $(call msg,MKDIR,,$@)
+ $(Q)mkdir -p $(OUTPUT)
+
+$(SUBCMDOBJ): fixdep FORCE
+ $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT)
+
+$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
+ $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
+
+CFLAGS := -g \
+ -I$(srctree)/tools/include \
+ -I$(srctree)/tools/include/uapi \
+ -I$(LIBBPF_SRC) \
+ -I$(SUBCMD_SRC)
+
+LIBS = -lelf -lz
+
+export srctree OUTPUT CFLAGS Q
+include $(srctree)/tools/build/Makefile.include
+
+$(BINARY_IN): fixdep FORCE
+ $(Q)$(MAKE) $(build)=resolve_btfids
+
+$(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN)
+ $(call msg,LINK,$@)
+ $(Q)$(CC) $(BINARY_IN) $(LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS)
+
+libsubcmd-clean:
+ $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT) clean
+
+libbpf-clean:
+ $(Q)$(MAKE) -C $(LIBBPF_SRC) OUTPUT=$(OUTPUT) clean
+
+clean: libsubcmd-clean libbpf-clean fixdep-clean
+ $(call msg,CLEAN,$(BINARY))
+ $(Q)$(RM) -f $(BINARY); \
+ $(RM) -rf $(if $(OUTPUT),$(OUTPUT),.)/feature; \
+ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+tags:
+ $(call msg,GEN,,tags)
+ $(Q)ctags -R . $(LIBBPF_SRC) $(SUBCMD_SRC)
+
+FORCE:
+
+.PHONY: all FORCE clean tags
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
new file mode 100644
index 000000000000..dfa540d8a02d
--- /dev/null
+++ b/tools/bpf/resolve_btfids/main.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * resolve_btfids scans Elf object for .BTF_ids section and resolves
+ * its symbols with BTF ID values.
+ *
+ * Each symbol points to 4 bytes data and is expected to have
+ * following name syntax:
+ *
+ * __BTF_ID__<type>__<symbol>[__<id>]
+ *
+ * type is:
+ *
+ * func - lookup BTF_KIND_FUNC symbol with <symbol> name
+ * and store its ID into the data:
+ *
+ * __BTF_ID__func__vfs_close__1:
+ * .zero 4
+ *
+ * struct - lookup BTF_KIND_STRUCT symbol with <symbol> name
+ * and store its ID into the data:
+ *
+ * __BTF_ID__struct__sk_buff__1:
+ * .zero 4
+ *
+ * union - lookup BTF_KIND_UNION symbol with <symbol> name
+ * and store its ID into the data:
+ *
+ * __BTF_ID__union__thread_union__1:
+ * .zero 4
+ *
+ * typedef - lookup BTF_KIND_TYPEDEF symbol with <symbol> name
+ * and store its ID into the data:
+ *
+ * __BTF_ID__typedef__pid_t__1:
+ * .zero 4
+ *
+ * set - store symbol size into first 4 bytes and sort following
+ * ID list
+ *
+ * __BTF_ID__set__list:
+ * .zero 4
+ * list:
+ * __BTF_ID__func__vfs_getattr__3:
+ * .zero 4
+ * __BTF_ID__func__vfs_fallocate__4:
+ * .zero 4
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/rbtree.h>
+#include <linux/zalloc.h>
+#include <linux/err.h>
+#include <btf.h>
+#include <libbpf.h>
+#include <parse-options.h>
+
+#define BTF_IDS_SECTION ".BTF_ids"
+#define BTF_ID "__BTF_ID__"
+
+#define BTF_STRUCT "struct"
+#define BTF_UNION "union"
+#define BTF_TYPEDEF "typedef"
+#define BTF_FUNC "func"
+#define BTF_SET "set"
+
+#define ADDR_CNT 100
+
+struct btf_id {
+ struct rb_node rb_node;
+ char *name;
+ union {
+ int id;
+ int cnt;
+ };
+ int addr_cnt;
+ Elf64_Addr addr[ADDR_CNT];
+};
+
+struct object {
+ const char *path;
+ const char *btf;
+
+ struct {
+ int fd;
+ Elf *elf;
+ Elf_Data *symbols;
+ Elf_Data *idlist;
+ int symbols_shndx;
+ int idlist_shndx;
+ size_t strtabidx;
+ unsigned long idlist_addr;
+ } efile;
+
+ struct rb_root sets;
+ struct rb_root structs;
+ struct rb_root unions;
+ struct rb_root typedefs;
+ struct rb_root funcs;
+
+ int nr_funcs;
+ int nr_structs;
+ int nr_unions;
+ int nr_typedefs;
+};
+
+static int verbose;
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ if (var >= level) {
+ va_start(args, fmt);
+ ret = vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+ return ret;
+}
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define pr_debug(fmt, ...) \
+ eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debugN(n, fmt, ...) \
+ eprintf(n, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+static bool is_btf_id(const char *name)
+{
+ return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1);
+}
+
+static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
+{
+ struct rb_node *p = root->rb_node;
+ struct btf_id *id;
+ int cmp;
+
+ while (p) {
+ id = rb_entry(p, struct btf_id, rb_node);
+ cmp = strcmp(id->name, name);
+ if (cmp < 0)
+ p = p->rb_left;
+ else if (cmp > 0)
+ p = p->rb_right;
+ else
+ return id;
+ }
+ return NULL;
+}
+
+static struct btf_id*
+btf_id__add(struct rb_root *root, char *name, bool unique)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct btf_id *id;
+ int cmp;
+
+ while (*p != NULL) {
+ parent = *p;
+ id = rb_entry(parent, struct btf_id, rb_node);
+ cmp = strcmp(id->name, name);
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else if (cmp > 0)
+ p = &(*p)->rb_right;
+ else
+ return unique ? NULL : id;
+ }
+
+ id = zalloc(sizeof(*id));
+ if (id) {
+ pr_debug("adding symbol %s\n", name);
+ id->name = name;
+ rb_link_node(&id->rb_node, parent, p);
+ rb_insert_color(&id->rb_node, root);
+ }
+ return id;
+}
+
+static char *get_id(const char *prefix_end)
+{
+ /*
+ * __BTF_ID__func__vfs_truncate__0
+ * prefix_end = ^
+ * pos = ^
+ */
+ int len = strlen(prefix_end);
+ int pos = sizeof("__") - 1;
+ char *p, *id;
+
+ if (pos >= len)
+ return NULL;
+
+ id = strdup(prefix_end + pos);
+ if (id) {
+ /*
+ * __BTF_ID__func__vfs_truncate__0
+ * id = ^
+ *
+ * cut the unique id part
+ */
+ p = strrchr(id, '_');
+ p--;
+ if (*p != '_') {
+ free(id);
+ return NULL;
+ }
+ *p = '\0';
+ }
+ return id;
+}
+
+static struct btf_id *add_set(struct object *obj, char *name)
+{
+ /*
+ * __BTF_ID__set__name
+ * name = ^
+ * id = ^
+ */
+ char *id = name + sizeof(BTF_SET "__") - 1;
+ int len = strlen(name);
+
+ if (id >= name + len) {
+ pr_err("FAILED to parse set name: %s\n", name);
+ return NULL;
+ }
+
+ return btf_id__add(&obj->sets, id, true);
+}
+
+static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
+{
+ char *id;
+
+ id = get_id(name + size);
+ if (!id) {
+ pr_err("FAILED to parse symbol name: %s\n", name);
+ return NULL;
+ }
+
+ return btf_id__add(root, id, false);
+}
+
+/*
+ * The data of compressed section should be aligned to 4
+ * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld
+ * sets sh_addralign to 1, which makes libelf fail with
+ * misaligned section error during the update:
+ * FAILED elf_update(WRITE): invalid section alignment
+ *
+ * While waiting for ld fix, we fix the compressed sections
+ * sh_addralign value manualy.
+ */
+static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
+{
+ int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8;
+
+ if (!(sh->sh_flags & SHF_COMPRESSED))
+ return 0;
+
+ if (sh->sh_addralign == expected)
+ return 0;
+
+ pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n",
+ sh->sh_addralign, expected);
+
+ sh->sh_addralign = expected;
+
+ if (gelf_update_shdr(scn, sh) == 0) {
+ printf("FAILED cannot update section header: %s\n",
+ elf_errmsg(-1));
+ return -1;
+ }
+ return 0;
+}
+
+static int elf_collect(struct object *obj)
+{
+ Elf_Scn *scn = NULL;
+ size_t shdrstrndx;
+ int idx = 0;
+ Elf *elf;
+ int fd;
+
+ fd = open(obj->path, O_RDWR, 0666);
+ if (fd == -1) {
+ pr_err("FAILED cannot open %s: %s\n",
+ obj->path, strerror(errno));
+ return -1;
+ }
+
+ elf_version(EV_CURRENT);
+
+ elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
+ if (!elf) {
+ pr_err("FAILED cannot create ELF descriptor: %s\n",
+ elf_errmsg(-1));
+ return -1;
+ }
+
+ obj->efile.fd = fd;
+ obj->efile.elf = elf;
+
+ elf_flagelf(elf, ELF_C_SET, ELF_F_LAYOUT);
+
+ if (elf_getshdrstrndx(elf, &shdrstrndx) != 0) {
+ pr_err("FAILED cannot get shdr str ndx\n");
+ return -1;
+ }
+
+ /*
+ * Scan all the elf sections and look for save data
+ * from .BTF_ids section and symbols.
+ */
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ Elf_Data *data;
+ GElf_Shdr sh;
+ char *name;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_err("FAILED get section(%d) header\n", idx);
+ return -1;
+ }
+
+ name = elf_strptr(elf, shdrstrndx, sh.sh_name);
+ if (!name) {
+ pr_err("FAILED get section(%d) name\n", idx);
+ return -1;
+ }
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_err("FAILED to get section(%d) data from %s\n",
+ idx, name);
+ return -1;
+ }
+
+ pr_debug2("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+ idx, name, (unsigned long) data->d_size,
+ (int) sh.sh_link, (unsigned long) sh.sh_flags,
+ (int) sh.sh_type);
+
+ if (sh.sh_type == SHT_SYMTAB) {
+ obj->efile.symbols = data;
+ obj->efile.symbols_shndx = idx;
+ obj->efile.strtabidx = sh.sh_link;
+ } else if (!strcmp(name, BTF_IDS_SECTION)) {
+ obj->efile.idlist = data;
+ obj->efile.idlist_shndx = idx;
+ obj->efile.idlist_addr = sh.sh_addr;
+ }
+
+ if (compressed_section_fix(elf, scn, &sh))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int symbols_collect(struct object *obj)
+{
+ Elf_Scn *scn = NULL;
+ int n, i, err = 0;
+ GElf_Shdr sh;
+ char *name;
+
+ scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
+ if (!scn)
+ return -1;
+
+ if (gelf_getshdr(scn, &sh) != &sh)
+ return -1;
+
+ n = sh.sh_size / sh.sh_entsize;
+
+ /*
+ * Scan symbols and look for the ones starting with
+ * __BTF_ID__* over .BTF_ids section.
+ */
+ for (i = 0; !err && i < n; i++) {
+ char *tmp, *prefix;
+ struct btf_id *id;
+ GElf_Sym sym;
+ int err = -1;
+
+ if (!gelf_getsym(obj->efile.symbols, i, &sym))
+ return -1;
+
+ if (sym.st_shndx != obj->efile.idlist_shndx)
+ continue;
+
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+
+ if (!is_btf_id(name))
+ continue;
+
+ /*
+ * __BTF_ID__TYPE__vfs_truncate__0
+ * prefix = ^
+ */
+ prefix = name + sizeof(BTF_ID) - 1;
+
+ /* struct */
+ if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) {
+ obj->nr_structs++;
+ id = add_symbol(&obj->structs, prefix, sizeof(BTF_STRUCT) - 1);
+ /* union */
+ } else if (!strncmp(prefix, BTF_UNION, sizeof(BTF_UNION) - 1)) {
+ obj->nr_unions++;
+ id = add_symbol(&obj->unions, prefix, sizeof(BTF_UNION) - 1);
+ /* typedef */
+ } else if (!strncmp(prefix, BTF_TYPEDEF, sizeof(BTF_TYPEDEF) - 1)) {
+ obj->nr_typedefs++;
+ id = add_symbol(&obj->typedefs, prefix, sizeof(BTF_TYPEDEF) - 1);
+ /* func */
+ } else if (!strncmp(prefix, BTF_FUNC, sizeof(BTF_FUNC) - 1)) {
+ obj->nr_funcs++;
+ id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
+ /* set */
+ } else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
+ id = add_set(obj, prefix);
+ /*
+ * SET objects store list's count, which is encoded
+ * in symbol's size, together with 'cnt' field hence
+ * that - 1.
+ */
+ if (id)
+ id->cnt = sym.st_size / sizeof(int) - 1;
+ } else {
+ pr_err("FAILED unsupported prefix %s\n", prefix);
+ return -1;
+ }
+
+ if (!id)
+ return -ENOMEM;
+
+ if (id->addr_cnt >= ADDR_CNT) {
+ pr_err("FAILED symbol %s crossed the number of allowed lists",
+ id->name);
+ return -1;
+ }
+ id->addr[id->addr_cnt++] = sym.st_value;
+ }
+
+ return 0;
+}
+
+static int symbols_resolve(struct object *obj)
+{
+ int nr_typedefs = obj->nr_typedefs;
+ int nr_structs = obj->nr_structs;
+ int nr_unions = obj->nr_unions;
+ int nr_funcs = obj->nr_funcs;
+ int err, type_id;
+ struct btf *btf;
+ __u32 nr;
+
+ btf = btf__parse(obj->btf ?: obj->path, NULL);
+ err = libbpf_get_error(btf);
+ if (err) {
+ pr_err("FAILED: load BTF from %s: %s",
+ obj->path, strerror(err));
+ return -1;
+ }
+
+ err = -1;
+ nr = btf__get_nr_types(btf);
+
+ /*
+ * Iterate all the BTF types and search for collected symbol IDs.
+ */
+ for (type_id = 1; type_id <= nr; type_id++) {
+ const struct btf_type *type;
+ struct rb_root *root;
+ struct btf_id *id;
+ const char *str;
+ int *nr;
+
+ type = btf__type_by_id(btf, type_id);
+ if (!type) {
+ pr_err("FAILED: malformed BTF, can't resolve type for ID %d\n",
+ type_id);
+ goto out;
+ }
+
+ if (btf_is_func(type) && nr_funcs) {
+ nr = &nr_funcs;
+ root = &obj->funcs;
+ } else if (btf_is_struct(type) && nr_structs) {
+ nr = &nr_structs;
+ root = &obj->structs;
+ } else if (btf_is_union(type) && nr_unions) {
+ nr = &nr_unions;
+ root = &obj->unions;
+ } else if (btf_is_typedef(type) && nr_typedefs) {
+ nr = &nr_typedefs;
+ root = &obj->typedefs;
+ } else
+ continue;
+
+ str = btf__name_by_offset(btf, type->name_off);
+ if (!str) {
+ pr_err("FAILED: malformed BTF, can't resolve name for ID %d\n",
+ type_id);
+ goto out;
+ }
+
+ id = btf_id__find(root, str);
+ if (id) {
+ id->id = type_id;
+ (*nr)--;
+ }
+ }
+
+ err = 0;
+out:
+ btf__free(btf);
+ return err;
+}
+
+static int id_patch(struct object *obj, struct btf_id *id)
+{
+ Elf_Data *data = obj->efile.idlist;
+ int *ptr = data->d_buf;
+ int i;
+
+ if (!id->id) {
+ pr_err("FAILED unresolved symbol %s\n", id->name);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < id->addr_cnt; i++) {
+ unsigned long addr = id->addr[i];
+ unsigned long idx = addr - obj->efile.idlist_addr;
+
+ pr_debug("patching addr %5lu: ID %7d [%s]\n",
+ idx, id->id, id->name);
+
+ if (idx >= data->d_size) {
+ pr_err("FAILED patching index %lu out of bounds %lu\n",
+ idx, data->d_size);
+ return -1;
+ }
+
+ idx = idx / sizeof(int);
+ ptr[idx] = id->id;
+ }
+
+ return 0;
+}
+
+static int __symbols_patch(struct object *obj, struct rb_root *root)
+{
+ struct rb_node *next;
+ struct btf_id *id;
+
+ next = rb_first(root);
+ while (next) {
+ id = rb_entry(next, struct btf_id, rb_node);
+
+ if (id_patch(obj, id))
+ return -1;
+
+ next = rb_next(next);
+ }
+ return 0;
+}
+
+static int cmp_id(const void *pa, const void *pb)
+{
+ const int *a = pa, *b = pb;
+
+ return *a - *b;
+}
+
+static int sets_patch(struct object *obj)
+{
+ Elf_Data *data = obj->efile.idlist;
+ int *ptr = data->d_buf;
+ struct rb_node *next;
+
+ next = rb_first(&obj->sets);
+ while (next) {
+ unsigned long addr, idx;
+ struct btf_id *id;
+ int *base;
+ int cnt;
+
+ id = rb_entry(next, struct btf_id, rb_node);
+ addr = id->addr[0];
+ idx = addr - obj->efile.idlist_addr;
+
+ /* sets are unique */
+ if (id->addr_cnt != 1) {
+ pr_err("FAILED malformed data for set '%s'\n",
+ id->name);
+ return -1;
+ }
+
+ idx = idx / sizeof(int);
+ base = &ptr[idx] + 1;
+ cnt = ptr[idx];
+
+ pr_debug("sorting addr %5lu: cnt %6d [%s]\n",
+ (idx + 1) * sizeof(int), cnt, id->name);
+
+ qsort(base, cnt, sizeof(int), cmp_id);
+
+ next = rb_next(next);
+ }
+ return 0;
+}
+
+static int symbols_patch(struct object *obj)
+{
+ int err;
+
+ if (__symbols_patch(obj, &obj->structs) ||
+ __symbols_patch(obj, &obj->unions) ||
+ __symbols_patch(obj, &obj->typedefs) ||
+ __symbols_patch(obj, &obj->funcs) ||
+ __symbols_patch(obj, &obj->sets))
+ return -1;
+
+ if (sets_patch(obj))
+ return -1;
+
+ elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY);
+
+ err = elf_update(obj->efile.elf, ELF_C_WRITE);
+ if (err < 0) {
+ pr_err("FAILED elf_update(WRITE): %s\n",
+ elf_errmsg(-1));
+ }
+
+ pr_debug("update %s for %s\n",
+ err >= 0 ? "ok" : "failed", obj->path);
+ return err < 0 ? -1 : 0;
+}
+
+static const char * const resolve_btfids_usage[] = {
+ "resolve_btfids [<options>] <ELF object>",
+ NULL
+};
+
+int main(int argc, const char **argv)
+{
+ bool no_fail = false;
+ struct object obj = {
+ .efile = {
+ .idlist_shndx = -1,
+ .symbols_shndx = -1,
+ },
+ .structs = RB_ROOT,
+ .unions = RB_ROOT,
+ .typedefs = RB_ROOT,
+ .funcs = RB_ROOT,
+ .sets = RB_ROOT,
+ };
+ struct option btfid_options[] = {
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show errors, etc)"),
+ OPT_STRING(0, "btf", &obj.btf, "BTF data",
+ "BTF data"),
+ OPT_BOOLEAN(0, "no-fail", &no_fail,
+ "do not fail if " BTF_IDS_SECTION " section is not found"),
+ OPT_END()
+ };
+ int err = -1;
+
+ argc = parse_options(argc, argv, btfid_options, resolve_btfids_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (argc != 1)
+ usage_with_options(resolve_btfids_usage, btfid_options);
+
+ obj.path = argv[0];
+
+ if (elf_collect(&obj))
+ goto out;
+
+ /*
+ * We did not find .BTF_ids section or symbols section,
+ * nothing to do..
+ */
+ if (obj.efile.idlist_shndx == -1 ||
+ obj.efile.symbols_shndx == -1) {
+ if (no_fail)
+ return 0;
+ pr_err("FAILED to find needed sections\n");
+ return -1;
+ }
+
+ if (symbols_collect(&obj))
+ goto out;
+
+ if (symbols_resolve(&obj))
+ goto out;
+
+ if (symbols_patch(&obj))
+ goto out;
+
+ err = 0;
+out:
+ if (obj.efile.elf)
+ elf_end(obj.efile.elf);
+ close(obj.efile.fd);
+ return err;
+}
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 8a6f82e56a24..fb1337d69868 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -8,7 +8,8 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL)
LIBBPF_SRC := $(abspath ../../lib/bpf)
BPFOBJ := $(OUTPUT)/libbpf.a
BPF_INCLUDE := $(OUTPUT)
-INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib)
+INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \
+ -I$(abspath ../../include/uapi)
CFLAGS := -g -Wall
# Try to detect best kernel BTF source
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 9ec01f4454f9..585486e40995 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -74,7 +74,8 @@ dep-cmd = $(if $(wildcard $(fixdep)),
# dependencies in the cmd file
if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \
@set -e; \
- $(echo-cmd) $(cmd_$(1)) && $(dep-cmd))
+ $(echo-cmd) $(cmd_$(1)); \
+ $(dep-cmd))
# if_changed - execute command if any prerequisite is newer than
# target, or command line has changed
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 727050c40f09..722f1700d96a 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -38,6 +38,8 @@ clean:
$(call QUIET_CLEAN, fixdep)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)rm -f $(OUTPUT)fixdep
+ $(call QUIET_CLEAN, feature-detect)
+ $(Q)$(MAKE) -C feature/ clean >/dev/null
$(OUTPUT)fixdep-in.o: FORCE
$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 3e0c019ef297..97cbfb31b762 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -8,7 +8,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
- feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))
@@ -38,16 +38,13 @@ FEATURE_TESTS_BASIC := \
get_current_dir_name \
gettid \
glibc \
- gtk2 \
- gtk2-infobar \
- libaudit \
libbfd \
+ libbfd-buildid \
libcap \
libelf \
libelf-getphdrnum \
libelf-gelf_getnote \
libelf-getshdrstrndx \
- libelf-mmap \
libnuma \
numa_num_possible_cpus \
libperl \
@@ -82,6 +79,8 @@ FEATURE_TESTS_EXTRA := \
compile-32 \
compile-x32 \
cplus-demangle \
+ gtk2 \
+ gtk2-infobar \
hello \
libbabeltrace \
libbfd-liberty \
@@ -98,7 +97,9 @@ FEATURE_TESTS_EXTRA := \
llvm \
llvm-version \
clang \
- libbpf
+ libbpf \
+ libpfm4 \
+ libdebuginfod
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
@@ -110,9 +111,8 @@ FEATURE_DISPLAY ?= \
dwarf \
dwarf_getlocations \
glibc \
- gtk2 \
- libaudit \
libbfd \
+ libbfd-buildid \
libcap \
libelf \
libnuma \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 92012381393a..cdde783f3018 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -15,6 +15,7 @@ FILES= \
test-hello.bin \
test-libaudit.bin \
test-libbfd.bin \
+ test-libbfd-buildid.bin \
test-disassembler-four-args.bin \
test-reallocarray.bin \
test-libbfd-liberty.bin \
@@ -25,7 +26,7 @@ FILES= \
test-libelf-getphdrnum.bin \
test-libelf-gelf_getnote.bin \
test-libelf-getshdrstrndx.bin \
- test-libelf-mmap.bin \
+ test-libdebuginfod.bin \
test-libnuma.bin \
test-numa_num_possible_cpus.bin \
test-libperl.bin \
@@ -68,13 +69,12 @@ FILES= \
test-llvm-version.bin \
test-libaio.bin \
test-libzstd.bin \
- test-clang-bpf-global-var.bin \
- test-file-handle.bin
+ test-clang-bpf-co-re.bin \
+ test-file-handle.bin \
+ test-libpfm4.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
-CC ?= $(CROSS_COMPILE)gcc
-CXX ?= $(CROSS_COMPILE)g++
PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config
CLANG ?= clang
@@ -90,7 +90,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
###############################
$(OUTPUT)test-all.bin:
- $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
+ $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd
$(OUTPUT)test-hello.bin:
$(BUILD)
@@ -146,9 +146,6 @@ $(OUTPUT)test-dwarf.bin:
$(OUTPUT)test-dwarf_getlocations.bin:
$(BUILD) $(DWARFLIBS)
-$(OUTPUT)test-libelf-mmap.bin:
- $(BUILD) -lelf
-
$(OUTPUT)test-libelf-getphdrnum.bin:
$(BUILD) -lelf
@@ -158,6 +155,9 @@ $(OUTPUT)test-libelf-gelf_getnote.bin:
$(OUTPUT)test-libelf-getshdrstrndx.bin:
$(BUILD) -lelf
+$(OUTPUT)test-libdebuginfod.bin:
+ $(BUILD) -ldebuginfod
+
$(OUTPUT)test-libnuma.bin:
$(BUILD) -lnuma
@@ -226,6 +226,9 @@ $(OUTPUT)test-libpython-version.bin:
$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
+$(OUTPUT)test-libbfd-buildid.bin:
+ $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
+
$(OUTPUT)test-disassembler-four-args.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
@@ -324,13 +327,16 @@ $(OUTPUT)test-libaio.bin:
$(OUTPUT)test-libzstd.bin:
$(BUILD) -lzstd
-$(OUTPUT)test-clang-bpf-global-var.bin:
+$(OUTPUT)test-clang-bpf-co-re.bin:
$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
grep BTF_KIND_VAR
$(OUTPUT)test-file-handle.bin:
$(BUILD)
+$(OUTPUT)test-libpfm4.bin:
+ $(BUILD) -lpfm
+
###############################
clean:
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 88145e8cde1a..a04e81321c66 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -30,10 +30,6 @@
# include "test-libelf.c"
#undef main
-#define main main_test_libelf_mmap
-# include "test-libelf-mmap.c"
-#undef main
-
#define main main_test_get_current_dir_name
# include "test-get_current_dir_name.c"
#undef main
@@ -74,26 +70,18 @@
# include "test-libunwind.c"
#undef main
-#define main main_test_libaudit
-# include "test-libaudit.c"
-#undef main
-
#define main main_test_libslang
# include "test-libslang.c"
#undef main
-#define main main_test_gtk2
-# include "test-gtk2.c"
-#undef main
-
-#define main main_test_gtk2_infobar
-# include "test-gtk2-infobar.c"
-#undef main
-
#define main main_test_libbfd
# include "test-libbfd.c"
#undef main
+#define main main_test_libbfd_buildid
+# include "test-libbfd-buildid.c"
+#undef main
+
#define main main_test_backtrace
# include "test-backtrace.c"
#undef main
@@ -208,11 +196,9 @@ int main(int argc, char *argv[])
main_test_libelf_gelf_getnote();
main_test_libelf_getshdrstrndx();
main_test_libunwind();
- main_test_libaudit();
main_test_libslang();
- main_test_gtk2(argc, argv);
- main_test_gtk2_infobar(argc, argv);
main_test_libbfd();
+ main_test_libbfd_buildid();
main_test_backtrace();
main_test_libnuma();
main_test_numa_num_possible_cpus();
diff --git a/tools/build/feature/test-clang-bpf-co-re.c b/tools/build/feature/test-clang-bpf-co-re.c
new file mode 100644
index 000000000000..cb5265bfdd83
--- /dev/null
+++ b/tools/build/feature/test-clang-bpf-co-re.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+struct test {
+ int a;
+ int b;
+} __attribute__((preserve_access_index));
+
+volatile struct test global_value_for_test = {};
diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c
deleted file mode 100644
index 221f1481d52e..000000000000
--- a/tools/build/feature/test-clang-bpf-global-var.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2020 Facebook
-
-volatile int global_value_for_test = 1;
diff --git a/tools/build/feature/test-libbfd-buildid.c b/tools/build/feature/test-libbfd-buildid.c
new file mode 100644
index 000000000000..157644b04c05
--- /dev/null
+++ b/tools/build/feature/test-libbfd-buildid.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bfd.h>
+
+int main(void)
+{
+ bfd *abfd = bfd_openr("Pedro", 0);
+ return abfd && (!abfd->build_id || abfd->build_id->size > 0x506564726f);
+}
diff --git a/tools/build/feature/test-libdebuginfod.c b/tools/build/feature/test-libdebuginfod.c
new file mode 100644
index 000000000000..da22548b8413
--- /dev/null
+++ b/tools/build/feature/test-libdebuginfod.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/debuginfod.h>
+
+int main(void)
+{
+ debuginfod_client* c = debuginfod_begin();
+ return (long)c;
+}
diff --git a/tools/build/feature/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c
deleted file mode 100644
index 2c3ef81affe2..000000000000
--- a/tools/build/feature/test-libelf-mmap.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <libelf.h>
-
-int main(void)
-{
- Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
-
- return (long)elf;
-}
diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 2b0e02c38870..1547bc2c0950 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -4,9 +4,9 @@
/*
* Check OpenCSD library version is sufficient to provide required features
*/
-#define OCSD_MIN_VER ((0 << 16) | (11 << 8) | (0))
+#define OCSD_MIN_VER ((0 << 16) | (14 << 8) | (0))
#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
-#error "OpenCSD >= 0.11.0 is required"
+#error "OpenCSD >= 0.14.0 is required"
#endif
int main(void)
diff --git a/tools/build/feature/test-libpfm4.c b/tools/build/feature/test-libpfm4.c
new file mode 100644
index 000000000000..af49b259459e
--- /dev/null
+++ b/tools/build/feature/test-libpfm4.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <perfmon/pfmlib.h>
+
+int main(void)
+{
+ pfm_initialize();
+ return 0;
+}
diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py
index 9d8e9613008a..c4ff907c078b 100644
--- a/tools/cgroup/iocost_monitor.py
+++ b/tools/cgroup/iocost_monitor.py
@@ -28,7 +28,8 @@ parser.add_argument('devname', metavar='DEV',
parser.add_argument('--cgroup', action='append', metavar='REGEX',
help='Regex for target cgroups, ')
parser.add_argument('--interval', '-i', metavar='SECONDS', type=float, default=1,
- help='Monitoring interval in seconds')
+ help='Monitoring interval in seconds (0 exits immediately '
+ 'after checking requirements)')
parser.add_argument('--json', action='store_true',
help='Output in json')
args = parser.parse_args()
@@ -44,8 +45,7 @@ except:
err('The kernel does not have iocost enabled')
IOC_RUNNING = prog['IOC_RUNNING'].value_()
-NR_USAGE_SLOTS = prog['NR_USAGE_SLOTS'].value_()
-HWEIGHT_WHOLE = prog['HWEIGHT_WHOLE'].value_()
+WEIGHT_ONE = prog['WEIGHT_ONE'].value_()
VTIME_PER_SEC = prog['VTIME_PER_SEC'].value_()
VTIME_PER_USEC = prog['VTIME_PER_USEC'].value_()
AUTOP_SSD_FAST = prog['AUTOP_SSD_FAST'].value_()
@@ -99,7 +99,7 @@ class IocStat:
self.period_ms = ioc.period_us.value_() / 1_000
self.period_at = ioc.period_at.value_() / 1_000_000
self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC
- self.vrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC
+ self.vrate_pct = ioc.vtime_base_rate.value_() * 100 / VTIME_PER_USEC
self.busy_level = ioc.busy_level.value_()
self.autop_idx = ioc.autop_idx.value_()
self.user_cost_model = ioc.user_cost_model.value_()
@@ -112,14 +112,14 @@ class IocStat:
def dict(self, now):
return { 'device' : devname,
- 'timestamp' : str(now),
- 'enabled' : str(int(self.enabled)),
- 'running' : str(int(self.running)),
- 'period_ms' : str(self.period_ms),
- 'period_at' : str(self.period_at),
- 'period_vtime_at' : str(self.vperiod_at),
- 'busy_level' : str(self.busy_level),
- 'vrate_pct' : str(self.vrate_pct), }
+ 'timestamp' : now,
+ 'enabled' : self.enabled,
+ 'running' : self.running,
+ 'period_ms' : self.period_ms,
+ 'period_at' : self.period_at,
+ 'period_vtime_at' : self.vperiod_at,
+ 'busy_level' : self.busy_level,
+ 'vrate_pct' : self.vrate_pct, }
def table_preamble_str(self):
state = ('RUN' if self.running else 'IDLE') if self.enabled else 'OFF'
@@ -135,7 +135,7 @@ class IocStat:
def table_header_str(self):
return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \
- f'{"dbt":>3} {"delay":>6} {"usages%"}'
+ f'{"debt":>7} {"delay":>7} {"usage%"}'
class IocgStat:
def __init__(self, iocg):
@@ -143,11 +143,11 @@ class IocgStat:
blkg = iocg.pd.blkg
self.is_active = not list_empty(iocg.active_list.address_of_())
- self.weight = iocg.weight.value_()
- self.active = iocg.active.value_()
- self.inuse = iocg.inuse.value_()
- self.hwa_pct = iocg.hweight_active.value_() * 100 / HWEIGHT_WHOLE
- self.hwi_pct = iocg.hweight_inuse.value_() * 100 / HWEIGHT_WHOLE
+ self.weight = iocg.weight.value_() / WEIGHT_ONE
+ self.active = iocg.active.value_() / WEIGHT_ONE
+ self.inuse = iocg.inuse.value_() / WEIGHT_ONE
+ self.hwa_pct = iocg.hweight_active.value_() * 100 / WEIGHT_ONE
+ self.hwi_pct = iocg.hweight_inuse.value_() * 100 / WEIGHT_ONE
self.address = iocg.value_()
vdone = iocg.done_vtime.counter.value_()
@@ -159,54 +159,39 @@ class IocgStat:
else:
self.inflight_pct = 0
- # vdebt used to be an atomic64_t and is now u64, support both
- try:
- self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
- except:
- self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
-
- self.use_delay = blkg.use_delay.counter.value_()
- self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
-
- usage_idx = iocg.usage_idx.value_()
- self.usages = []
- self.usage = 0
- for i in range(NR_USAGE_SLOTS):
- usage = iocg.usages[(usage_idx + i) % NR_USAGE_SLOTS].value_()
- upct = usage * 100 / HWEIGHT_WHOLE
- self.usages.append(upct)
- self.usage = max(self.usage, upct)
+ self.usage = (100 * iocg.usage_delta_us.value_() /
+ ioc.period_us.value_()) if self.active else 0
+ self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
+ if blkg.use_delay.counter.value_() != 0:
+ self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
+ else:
+ self.delay_ms = 0
def dict(self, now, path):
out = { 'cgroup' : path,
- 'timestamp' : str(now),
- 'is_active' : str(int(self.is_active)),
- 'weight' : str(self.weight),
- 'weight_active' : str(self.active),
- 'weight_inuse' : str(self.inuse),
- 'hweight_active_pct' : str(self.hwa_pct),
- 'hweight_inuse_pct' : str(self.hwi_pct),
- 'inflight_pct' : str(self.inflight_pct),
- 'debt_ms' : str(self.debt_ms),
- 'use_delay' : str(self.use_delay),
- 'delay_ms' : str(self.delay_ms),
- 'usage_pct' : str(self.usage),
- 'address' : str(hex(self.address)) }
- for i in range(len(self.usages)):
- out[f'usage_pct_{i}'] = str(self.usages[i])
+ 'timestamp' : now,
+ 'is_active' : self.is_active,
+ 'weight' : self.weight,
+ 'weight_active' : self.active,
+ 'weight_inuse' : self.inuse,
+ 'hweight_active_pct' : self.hwa_pct,
+ 'hweight_inuse_pct' : self.hwi_pct,
+ 'inflight_pct' : self.inflight_pct,
+ 'debt_ms' : self.debt_ms,
+ 'delay_ms' : self.delay_ms,
+ 'usage_pct' : self.usage,
+ 'address' : self.address }
return out
def table_row_str(self, path):
out = f'{path[-28:]:28} ' \
f'{"*" if self.is_active else " "} ' \
- f'{self.inuse:5}/{self.active:5} ' \
+ f'{round(self.inuse):5}/{round(self.active):5} ' \
f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \
f'{self.inflight_pct:6.2f} ' \
- f'{min(math.ceil(self.debt_ms), 999):3} ' \
- f'{min(self.use_delay, 99):2}*'\
- f'{min(math.ceil(self.delay_ms), 999):03} '
- for u in self.usages:
- out += f'{min(round(u), 999):03d}:'
+ f'{self.debt_ms:7.2f} ' \
+ f'{self.delay_ms:7.2f} '\
+ f'{min(self.usage, 999):6.2f}'
out = out.rstrip(':')
return out
@@ -248,6 +233,9 @@ for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree.address_of_()):
if ioc is None:
err(f'Could not find ioc for {devname}');
+if interval == 0:
+ sys.exit(0)
+
# Keep printing
while True:
now = time.time()
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
new file mode 100644
index 000000000000..c4225ed63565
--- /dev/null
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env drgn
+#
+# Copyright (C) 2020 Roman Gushchin <guro@fb.com>
+# Copyright (C) 2020 Facebook
+
+from os import stat
+import argparse
+import sys
+
+from drgn.helpers.linux import list_for_each_entry, list_empty
+from drgn.helpers.linux import for_each_page
+from drgn.helpers.linux.cpumask import for_each_online_cpu
+from drgn.helpers.linux.percpu import per_cpu_ptr
+from drgn import container_of, FaultError, Object
+
+
+DESC = """
+This is a drgn script to provide slab statistics for memory cgroups.
+It supports cgroup v2 and v1 and can emulate memory.kmem.slabinfo
+interface of cgroup v1.
+For drgn, visit https://github.com/osandov/drgn.
+"""
+
+
+MEMCGS = {}
+
+OO_SHIFT = 16
+OO_MASK = ((1 << OO_SHIFT) - 1)
+
+
+def err(s):
+ print('slabinfo.py: error: %s' % s, file=sys.stderr, flush=True)
+ sys.exit(1)
+
+
+def find_memcg_ids(css=prog['root_mem_cgroup'].css, prefix=''):
+ if not list_empty(css.children.address_of_()):
+ for css in list_for_each_entry('struct cgroup_subsys_state',
+ css.children.address_of_(),
+ 'sibling'):
+ name = prefix + '/' + css.cgroup.kn.name.string_().decode('utf-8')
+ memcg = container_of(css, 'struct mem_cgroup', 'css')
+ MEMCGS[css.cgroup.kn.id.value_()] = memcg
+ find_memcg_ids(css, name)
+
+
+def is_root_cache(s):
+ try:
+ return False if s.memcg_params.root_cache else True
+ except AttributeError:
+ return True
+
+
+def cache_name(s):
+ if is_root_cache(s):
+ return s.name.string_().decode('utf-8')
+ else:
+ return s.memcg_params.root_cache.name.string_().decode('utf-8')
+
+
+# SLUB
+
+def oo_order(s):
+ return s.oo.x >> OO_SHIFT
+
+
+def oo_objects(s):
+ return s.oo.x & OO_MASK
+
+
+def count_partial(n, fn):
+ nr_pages = 0
+ for page in list_for_each_entry('struct page', n.partial.address_of_(),
+ 'lru'):
+ nr_pages += fn(page)
+ return nr_pages
+
+
+def count_free(page):
+ return page.objects - page.inuse
+
+
+def slub_get_slabinfo(s, cfg):
+ nr_slabs = 0
+ nr_objs = 0
+ nr_free = 0
+
+ for node in range(cfg['nr_nodes']):
+ n = s.node[node]
+ nr_slabs += n.nr_slabs.counter.value_()
+ nr_objs += n.total_objects.counter.value_()
+ nr_free += count_partial(n, count_free)
+
+ return {'active_objs': nr_objs - nr_free,
+ 'num_objs': nr_objs,
+ 'active_slabs': nr_slabs,
+ 'num_slabs': nr_slabs,
+ 'objects_per_slab': oo_objects(s),
+ 'cache_order': oo_order(s),
+ 'limit': 0,
+ 'batchcount': 0,
+ 'shared': 0,
+ 'shared_avail': 0}
+
+
+def cache_show(s, cfg, objs):
+ if cfg['allocator'] == 'SLUB':
+ sinfo = slub_get_slabinfo(s, cfg)
+ else:
+ err('SLAB isn\'t supported yet')
+
+ if cfg['shared_slab_pages']:
+ sinfo['active_objs'] = objs
+ sinfo['num_objs'] = objs
+
+ print('%-17s %6lu %6lu %6u %4u %4d'
+ ' : tunables %4u %4u %4u'
+ ' : slabdata %6lu %6lu %6lu' % (
+ cache_name(s), sinfo['active_objs'], sinfo['num_objs'],
+ s.size, sinfo['objects_per_slab'], 1 << sinfo['cache_order'],
+ sinfo['limit'], sinfo['batchcount'], sinfo['shared'],
+ sinfo['active_slabs'], sinfo['num_slabs'],
+ sinfo['shared_avail']))
+
+
+def detect_kernel_config():
+ cfg = {}
+
+ cfg['nr_nodes'] = prog['nr_online_nodes'].value_()
+
+ if prog.type('struct kmem_cache').members[1][1] == 'flags':
+ cfg['allocator'] = 'SLUB'
+ elif prog.type('struct kmem_cache').members[1][1] == 'batchcount':
+ cfg['allocator'] = 'SLAB'
+ else:
+ err('Can\'t determine the slab allocator')
+
+ cfg['shared_slab_pages'] = False
+ try:
+ if prog.type('struct obj_cgroup'):
+ cfg['shared_slab_pages'] = True
+ except:
+ pass
+
+ return cfg
+
+
+def for_each_slab_page(prog):
+ PGSlab = 1 << prog.constant('PG_slab')
+ PGHead = 1 << prog.constant('PG_head')
+
+ for page in for_each_page(prog):
+ try:
+ if page.flags.value_() & PGSlab:
+ yield page
+ except FaultError:
+ pass
+
+
+def main():
+ parser = argparse.ArgumentParser(description=DESC,
+ formatter_class=
+ argparse.RawTextHelpFormatter)
+ parser.add_argument('cgroup', metavar='CGROUP',
+ help='Target memory cgroup')
+ args = parser.parse_args()
+
+ try:
+ cgroup_id = stat(args.cgroup).st_ino
+ find_memcg_ids()
+ memcg = MEMCGS[cgroup_id]
+ except KeyError:
+ err('Can\'t find the memory cgroup')
+
+ cfg = detect_kernel_config()
+
+ print('# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>'
+ ' : tunables <limit> <batchcount> <sharedfactor>'
+ ' : slabdata <active_slabs> <num_slabs> <sharedavail>')
+
+ if cfg['shared_slab_pages']:
+ obj_cgroups = set()
+ stats = {}
+ caches = {}
+
+ # find memcg pointers belonging to the specified cgroup
+ obj_cgroups.add(memcg.objcg.value_())
+ for ptr in list_for_each_entry('struct obj_cgroup',
+ memcg.objcg_list.address_of_(),
+ 'list'):
+ obj_cgroups.add(ptr.value_())
+
+ # look over all slab pages, belonging to non-root memcgs
+ # and look for objects belonging to the given memory cgroup
+ for page in for_each_slab_page(prog):
+ objcg_vec_raw = page.obj_cgroups.value_()
+ if objcg_vec_raw == 0:
+ continue
+ cache = page.slab_cache
+ if not cache:
+ continue
+ addr = cache.value_()
+ caches[addr] = cache
+ # clear the lowest bit to get the true obj_cgroups
+ objcg_vec = Object(prog, page.obj_cgroups.type_,
+ value=objcg_vec_raw & ~1)
+
+ if addr not in stats:
+ stats[addr] = 0
+
+ for i in range(oo_objects(cache)):
+ if objcg_vec[i].value_() in obj_cgroups:
+ stats[addr] += 1
+
+ for addr in caches:
+ if stats[addr] > 0:
+ cache_show(caches[addr], cfg, stats[addr])
+
+ else:
+ for s in list_for_each_entry('struct kmem_cache',
+ memcg.kmem_caches.address_of_(),
+ 'memcg_params.kmem_caches_node'):
+ cache_show(s, cfg, None)
+
+
+main()
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 30ed0e06f52a..90c3155f05b1 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -23,17 +23,17 @@
#include <sys/ioctl.h>
#include <sys/types.h>
#include <linux/gpio.h>
+#include "gpio-utils.h"
int monitor_device(const char *device_name,
- unsigned int line,
- uint32_t handleflags,
- uint32_t eventflags,
+ unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
unsigned int loops)
{
- struct gpioevent_request req;
- struct gpiohandle_data data;
+ struct gpio_v2_line_values values;
char *chrdev_name;
- int fd;
+ int cfd, lfd;
int ret;
int i = 0;
@@ -41,44 +41,55 @@ int monitor_device(const char *device_name,
if (ret < 0)
return -ENOMEM;
- fd = open(chrdev_name, 0);
- if (fd == -1) {
+ cfd = open(chrdev_name, 0);
+ if (cfd == -1) {
ret = -errno;
fprintf(stderr, "Failed to open %s\n", chrdev_name);
- goto exit_close_error;
+ goto exit_free_name;
}
- req.lineoffset = line;
- req.handleflags = handleflags;
- req.eventflags = eventflags;
- strcpy(req.consumer_label, "gpio-event-mon");
-
- ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req);
- if (ret == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to issue GET EVENT "
- "IOCTL (%d)\n",
- ret);
- goto exit_close_error;
- }
+ ret = gpiotools_request_line(device_name, lines, num_lines, config,
+ "gpio-event-mon");
+ if (ret < 0)
+ goto exit_device_close;
+ else
+ lfd = ret;
/* Read initial states */
- ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data);
- if (ret == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE "
- "VALUES IOCTL (%d)\n",
+ values.mask = 0;
+ values.bits = 0;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&values.mask, i);
+ ret = gpiotools_get_values(lfd, &values);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to issue GPIO LINE GET VALUES IOCTL (%d)\n",
ret);
- goto exit_close_error;
+ goto exit_line_close;
}
- fprintf(stdout, "Monitoring line %d on %s\n", line, device_name);
- fprintf(stdout, "Initial line value: %d\n", data.values[0]);
+ if (num_lines == 1) {
+ fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name);
+ fprintf(stdout, "Initial line value: %d\n",
+ gpiotools_test_bit(values.bits, 0));
+ } else {
+ fprintf(stdout, "Monitoring lines %d", lines[0]);
+ for (i = 1; i < num_lines - 1; i++)
+ fprintf(stdout, ", %d", lines[i]);
+ fprintf(stdout, " and %d on %s\n", lines[i], device_name);
+ fprintf(stdout, "Initial line values: %d",
+ gpiotools_test_bit(values.bits, 0));
+ for (i = 1; i < num_lines - 1; i++)
+ fprintf(stdout, ", %d",
+ gpiotools_test_bit(values.bits, i));
+ fprintf(stdout, " and %d\n",
+ gpiotools_test_bit(values.bits, i));
+ }
while (1) {
- struct gpioevent_data event;
+ struct gpio_v2_line_event event;
- ret = read(req.fd, &event, sizeof(event));
+ ret = read(lfd, &event, sizeof(event));
if (ret == -1) {
if (errno == -EAGAIN) {
fprintf(stderr, "nothing available\n");
@@ -96,12 +107,14 @@ int monitor_device(const char *device_name,
ret = -EIO;
break;
}
- fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp);
+ fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ",
+ event.timestamp_ns, event.offset, event.line_seqno,
+ event.seqno);
switch (event.id) {
- case GPIOEVENT_EVENT_RISING_EDGE:
+ case GPIO_V2_LINE_EVENT_RISING_EDGE:
fprintf(stdout, "rising edge");
break;
- case GPIOEVENT_EVENT_FALLING_EDGE:
+ case GPIO_V2_LINE_EVENT_FALLING_EDGE:
fprintf(stdout, "falling edge");
break;
default:
@@ -114,9 +127,13 @@ int monitor_device(const char *device_name,
break;
}
-exit_close_error:
- if (close(fd) == -1)
+exit_line_close:
+ if (close(lfd) == -1)
+ perror("Failed to close line file");
+exit_device_close:
+ if (close(cfd) == -1)
perror("Failed to close GPIO character device file");
+exit_free_name:
free(chrdev_name);
return ret;
}
@@ -126,29 +143,37 @@ void print_usage(void)
fprintf(stderr, "Usage: gpio-event-mon [options]...\n"
"Listen to events on GPIO lines, 0->1 1->0\n"
" -n <name> Listen on GPIOs on a named device (must be stated)\n"
- " -o <n> Offset to monitor\n"
+ " -o <n> Offset of line to monitor (may be repeated)\n"
" -d Set line as open drain\n"
" -s Set line as open source\n"
" -r Listen for rising edges\n"
" -f Listen for falling edges\n"
+ " -b <n> Debounce the line with period n microseconds\n"
" [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n"
" -? This helptext\n"
"\n"
"Example:\n"
- "gpio-event-mon -n gpiochip0 -o 4 -r -f\n"
+ "gpio-event-mon -n gpiochip0 -o 4 -r -f -b 10000\n"
);
}
+#define EDGE_FLAGS \
+ (GPIO_V2_LINE_FLAG_EDGE_RISING | \
+ GPIO_V2_LINE_FLAG_EDGE_FALLING)
+
int main(int argc, char **argv)
{
const char *device_name = NULL;
- unsigned int line = -1;
+ unsigned int lines[GPIO_V2_LINES_MAX];
+ unsigned int num_lines = 0;
unsigned int loops = 0;
- uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT;
- uint32_t eventflags = 0;
- int c;
+ struct gpio_v2_line_config config;
+ int c, attr, i;
+ unsigned long debounce_period_us = 0;
- while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) {
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_INPUT;
+ while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) {
switch (c) {
case 'c':
loops = strtoul(optarg, NULL, 10);
@@ -157,19 +182,27 @@ int main(int argc, char **argv)
device_name = optarg;
break;
case 'o':
- line = strtoul(optarg, NULL, 10);
+ if (num_lines >= GPIO_V2_LINES_MAX) {
+ print_usage();
+ return -1;
+ }
+ lines[num_lines] = strtoul(optarg, NULL, 10);
+ num_lines++;
+ break;
+ case 'b':
+ debounce_period_us = strtoul(optarg, NULL, 10);
break;
case 'd':
- handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN;
+ config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN;
break;
case 's':
- handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE;
+ config.flags |= GPIO_V2_LINE_FLAG_OPEN_SOURCE;
break;
case 'r':
- eventflags |= GPIOEVENT_REQUEST_RISING_EDGE;
+ config.flags |= GPIO_V2_LINE_FLAG_EDGE_RISING;
break;
case 'f':
- eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE;
+ config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING;
break;
case '?':
print_usage();
@@ -177,15 +210,23 @@ int main(int argc, char **argv)
}
}
- if (!device_name || line == -1) {
+ if (debounce_period_us) {
+ attr = config.num_attrs;
+ config.num_attrs++;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&config.attrs[attr].mask, i);
+ config.attrs[attr].attr.id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE;
+ config.attrs[attr].attr.debounce_period_us = debounce_period_us;
+ }
+
+ if (!device_name || num_lines == 0) {
print_usage();
return -1;
}
- if (!eventflags) {
+ if (!(config.flags & EDGE_FLAGS)) {
printf("No flags specified, listening on both rising and "
"falling edges\n");
- eventflags = GPIOEVENT_REQUEST_BOTH_EDGES;
+ config.flags |= EDGE_FLAGS;
}
- return monitor_device(device_name, line, handleflags,
- eventflags, loops);
+ return monitor_device(device_name, lines, num_lines, &config, loops);
}
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
index 9fd926e8cb52..54fdf59dd320 100644
--- a/tools/gpio/gpio-hammer.c
+++ b/tools/gpio/gpio-hammer.c
@@ -22,39 +22,46 @@
#include <linux/gpio.h>
#include "gpio-utils.h"
-int hammer_device(const char *device_name, unsigned int *lines, int nlines,
+int hammer_device(const char *device_name, unsigned int *lines, int num_lines,
unsigned int loops)
{
- struct gpiohandle_data data;
+ struct gpio_v2_line_values values;
+ struct gpio_v2_line_config config;
char swirr[] = "-\\|/";
int fd;
int ret;
int i, j;
unsigned int iteration = 0;
- memset(&data.values, 0, sizeof(data.values));
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_OUTPUT, &data,
- "gpio-hammer");
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, "gpio-hammer");
if (ret < 0)
goto exit_error;
else
fd = ret;
- ret = gpiotools_get_values(fd, &data);
+ values.mask = 0;
+ values.bits = 0;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&values.mask, i);
+
+ ret = gpiotools_get_values(fd, &values);
if (ret < 0)
goto exit_close_error;
fprintf(stdout, "Hammer lines [");
- for (i = 0; i < nlines; i++) {
+ for (i = 0; i < num_lines; i++) {
fprintf(stdout, "%d", lines[i]);
- if (i != (nlines - 1))
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "] on %s, initial states: [", device_name);
- for (i = 0; i < nlines; i++) {
- fprintf(stdout, "%d", data.values[i]);
- if (i != (nlines - 1))
+ for (i = 0; i < num_lines; i++) {
+ fprintf(stdout, "%d", gpiotools_test_bit(values.bits, i));
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "]\n");
@@ -63,15 +70,15 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
j = 0;
while (1) {
/* Invert all lines so we blink */
- for (i = 0; i < nlines; i++)
- data.values[i] = !data.values[i];
+ for (i = 0; i < num_lines; i++)
+ gpiotools_change_bit(&values.bits, i);
- ret = gpiotools_set_values(fd, &data);
+ ret = gpiotools_set_values(fd, &values);
if (ret < 0)
goto exit_close_error;
/* Re-read values to get status */
- ret = gpiotools_get_values(fd, &data);
+ ret = gpiotools_get_values(fd, &values);
if (ret < 0)
goto exit_close_error;
@@ -81,9 +88,10 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
j = 0;
fprintf(stdout, "[");
- for (i = 0; i < nlines; i++) {
- fprintf(stdout, "%d: %d", lines[i], data.values[i]);
- if (i != (nlines - 1))
+ for (i = 0; i < num_lines; i++) {
+ fprintf(stdout, "%d: %d", lines[i],
+ gpiotools_test_bit(values.bits, i));
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "]\r");
@@ -97,7 +105,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
ret = 0;
exit_close_error:
- gpiotools_release_linehandle(fd);
+ gpiotools_release_line(fd);
exit_error:
return ret;
}
@@ -121,7 +129,7 @@ int main(int argc, char **argv)
const char *device_name = NULL;
unsigned int lines[GPIOHANDLES_MAX];
unsigned int loops = 0;
- int nlines;
+ int num_lines;
int c;
int i;
@@ -158,11 +166,11 @@ int main(int argc, char **argv)
return -1;
}
- nlines = i;
+ num_lines = i;
- if (!device_name || !nlines) {
+ if (!device_name || !num_lines) {
print_usage();
return -1;
}
- return hammer_device(device_name, lines, nlines, loops);
+ return hammer_device(device_name, lines, num_lines, loops);
}
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
index 06003789e7c7..37187e056c8b 100644
--- a/tools/gpio/gpio-utils.c
+++ b/tools/gpio/gpio-utils.c
@@ -38,7 +38,7 @@
* such as "gpiochip0"
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
+ * @num_lines: The number of lines to request.
* @flag: The new flag for requsted gpio. Reference
* "linux/gpio.h" for the meaning of flag.
* @data: Default value will be set to gpio when flag is
@@ -56,7 +56,7 @@
* On failure return the errno.
*/
int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int nlines, unsigned int flag,
+ unsigned int num_lines, unsigned int flag,
struct gpiohandle_data *data,
const char *consumer_label)
{
@@ -75,15 +75,15 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
ret = -errno;
fprintf(stderr, "Failed to open %s, %s\n",
chrdev_name, strerror(errno));
- goto exit_close_error;
+ goto exit_free_name;
}
- for (i = 0; i < nlines; i++)
+ for (i = 0; i < num_lines; i++)
req.lineoffsets[i] = lines[i];
req.flags = flag;
strcpy(req.consumer_label, consumer_label);
- req.lines = nlines;
+ req.lines = num_lines;
if (flag & GPIOHANDLE_REQUEST_OUTPUT)
memcpy(req.default_values, data, sizeof(req.default_values));
@@ -94,26 +94,93 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
"GPIO_GET_LINEHANDLE_IOCTL", ret, strerror(errno));
}
-exit_close_error:
if (close(fd) == -1)
perror("Failed to close GPIO character device file");
+exit_free_name:
free(chrdev_name);
return ret < 0 ? ret : req.fd;
}
+
+/**
+ * gpiotools_request_line() - request gpio lines in a gpiochip
+ * @device_name: The name of gpiochip without prefix "/dev/",
+ * such as "gpiochip0"
+ * @lines: An array desired lines, specified by offset
+ * index for the associated GPIO device.
+ * @num_lines: The number of lines to request.
+ * @config: The new config for requested gpio. Reference
+ * "linux/gpio.h" for config details.
+ * @consumer: The name of consumer, such as "sysfs",
+ * "powerkey". This is useful for other users to
+ * know who is using.
+ *
+ * Request gpio lines through the ioctl provided by chardev. User
+ * could call gpiotools_set_values() and gpiotools_get_values() to
+ * read and write respectively through the returned fd. Call
+ * gpiotools_release_line() to release these lines after that.
+ *
+ * Return: On success return the fd;
+ * On failure return the errno.
+ */
+int gpiotools_request_line(const char *device_name, unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
+ const char *consumer)
+{
+ struct gpio_v2_line_request req;
+ char *chrdev_name;
+ int fd;
+ int i;
+ int ret;
+
+ ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+ if (ret < 0)
+ return -ENOMEM;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s, %s\n",
+ chrdev_name, strerror(errno));
+ goto exit_free_name;
+ }
+
+ memset(&req, 0, sizeof(req));
+ for (i = 0; i < num_lines; i++)
+ req.offsets[i] = lines[i];
+
+ req.config = *config;
+ strcpy(req.consumer, consumer);
+ req.num_lines = num_lines;
+
+ ret = ioctl(fd, GPIO_V2_GET_LINE_IOCTL, &req);
+ if (ret == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to issue %s (%d), %s\n",
+ "GPIO_GET_LINE_IOCTL", ret, strerror(errno));
+ }
+
+ if (close(fd) == -1)
+ perror("Failed to close GPIO character device file");
+exit_free_name:
+ free(chrdev_name);
+ return ret < 0 ? ret : req.fd;
+}
+
/**
* gpiotools_set_values(): Set the value of gpio(s)
* @fd: The fd returned by
- * gpiotools_request_linehandle().
- * @data: The array of values want to set.
+ * gpiotools_request_line().
+ * @values: The array of values want to set.
*
* Return: On success return 0;
* On failure return the errno.
*/
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values)
{
int ret;
- ret = ioctl(fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, data);
+ ret = ioctl(fd, GPIO_V2_LINE_SET_VALUES_IOCTL, values);
if (ret == -1) {
ret = -errno;
fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -127,17 +194,17 @@ int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
/**
* gpiotools_get_values(): Get the value of gpio(s)
* @fd: The fd returned by
- * gpiotools_request_linehandle().
- * @data: The array of values get from hardware.
+ * gpiotools_request_line().
+ * @values: The array of values get from hardware.
*
* Return: On success return 0;
* On failure return the errno.
*/
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values)
{
int ret;
- ret = ioctl(fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, data);
+ ret = ioctl(fd, GPIO_V2_LINE_GET_VALUES_IOCTL, values);
if (ret == -1) {
ret = -errno;
fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -170,6 +237,27 @@ int gpiotools_release_linehandle(const int fd)
}
/**
+ * gpiotools_release_line(): Release the line(s) of gpiochip
+ * @fd: The fd returned by
+ * gpiotools_request_line().
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_release_line(const int fd)
+{
+ int ret;
+
+ ret = close(fd);
+ if (ret == -1) {
+ perror("Failed to close GPIO LINE device file");
+ ret = -errno;
+ }
+
+ return ret;
+}
+
+/**
* gpiotools_get(): Get value from specific line
* @device_name: The name of gpiochip without prefix "/dev/",
* such as "gpiochip0"
@@ -180,11 +268,14 @@ int gpiotools_release_linehandle(const int fd)
*/
int gpiotools_get(const char *device_name, unsigned int line)
{
- struct gpiohandle_data data;
+ int ret;
+ unsigned int value;
unsigned int lines[] = {line};
- gpiotools_gets(device_name, lines, 1, &data);
- return data.values[0];
+ ret = gpiotools_gets(device_name, lines, 1, &value);
+ if (ret)
+ return ret;
+ return value;
}
@@ -194,28 +285,36 @@ int gpiotools_get(const char *device_name, unsigned int line)
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
- * @data: The array of values get from gpiochip.
+ * @num_lines: The number of lines to request.
+ * @values: The array of values get from gpiochip.
*
* Return: On success return 0;
* On failure return the errno.
*/
int gpiotools_gets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data)
+ unsigned int num_lines, unsigned int *values)
{
- int fd;
+ int fd, i;
int ret;
int ret_close;
+ struct gpio_v2_line_config config;
+ struct gpio_v2_line_values lv;
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_INPUT, data,
- CONSUMER);
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_INPUT;
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, CONSUMER);
if (ret < 0)
return ret;
fd = ret;
- ret = gpiotools_get_values(fd, data);
- ret_close = gpiotools_release_linehandle(fd);
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&lv.mask, i);
+ ret = gpiotools_get_values(fd, &lv);
+ if (!ret)
+ for (i = 0; i < num_lines; i++)
+ values[i] = gpiotools_test_bit(lv.bits, i);
+ ret_close = gpiotools_release_line(fd);
return ret < 0 ? ret : ret_close;
}
@@ -232,11 +331,9 @@ int gpiotools_gets(const char *device_name, unsigned int *lines,
int gpiotools_set(const char *device_name, unsigned int line,
unsigned int value)
{
- struct gpiohandle_data data;
unsigned int lines[] = {line};
- data.values[0] = value;
- return gpiotools_sets(device_name, lines, 1, &data);
+ return gpiotools_sets(device_name, lines, 1, &value);
}
/**
@@ -245,23 +342,32 @@ int gpiotools_set(const char *device_name, unsigned int line,
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
- * @data: The array of values set to gpiochip, must be
+ * @num_lines: The number of lines to request.
+ * @value: The array of values set to gpiochip, must be
* 0(low) or 1(high).
*
* Return: On success return 0;
* On failure return the errno.
*/
int gpiotools_sets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data)
+ unsigned int num_lines, unsigned int *values)
{
- int ret;
+ int ret, i;
+ struct gpio_v2_line_config config;
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_OUTPUT, data,
- CONSUMER);
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+ config.num_attrs = 1;
+ config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES;
+ for (i = 0; i < num_lines; i++) {
+ gpiotools_set_bit(&config.attrs[0].mask, i);
+ gpiotools_assign_bit(&config.attrs[0].attr.values,
+ i, values[i]);
+ }
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, CONSUMER);
if (ret < 0)
return ret;
- return gpiotools_release_linehandle(ret);
+ return gpiotools_release_line(ret);
}
diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h
index cf37f13f3dcb..6c69a9f1c253 100644
--- a/tools/gpio/gpio-utils.h
+++ b/tools/gpio/gpio-utils.h
@@ -12,7 +12,9 @@
#ifndef _GPIO_UTILS_H_
#define _GPIO_UTILS_H_
+#include <stdbool.h>
#include <string.h>
+#include <linux/types.h>
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -23,19 +25,55 @@ static inline int check_prefix(const char *str, const char *prefix)
}
int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int nlines, unsigned int flag,
+ unsigned int num_lines, unsigned int flag,
struct gpiohandle_data *data,
const char *consumer_label);
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data);
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data);
int gpiotools_release_linehandle(const int fd);
+int gpiotools_request_line(const char *device_name,
+ unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
+ const char *consumer);
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_release_line(const int fd);
+
int gpiotools_get(const char *device_name, unsigned int line);
int gpiotools_gets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data);
+ unsigned int num_lines, unsigned int *values);
int gpiotools_set(const char *device_name, unsigned int line,
unsigned int value);
int gpiotools_sets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data);
+ unsigned int num_lines, unsigned int *values);
+
+/* helper functions for gpio_v2_line_values bits */
+static inline void gpiotools_set_bit(__u64 *b, int n)
+{
+ *b |= _BITULL(n);
+}
+
+static inline void gpiotools_change_bit(__u64 *b, int n)
+{
+ *b ^= _BITULL(n);
+}
+
+static inline void gpiotools_clear_bit(__u64 *b, int n)
+{
+ *b &= ~_BITULL(n);
+}
+
+static inline int gpiotools_test_bit(__u64 b, int n)
+{
+ return !!(b & _BITULL(n));
+}
+
+static inline void gpiotools_assign_bit(__u64 *b, int n, bool value)
+{
+ if (value)
+ gpiotools_set_bit(b, n);
+ else
+ gpiotools_clear_bit(b, n);
+}
#endif /* _GPIO_UTILS_H_ */
diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c
index 5cea24fddfa7..f229ec62301b 100644
--- a/tools/gpio/gpio-watch.c
+++ b/tools/gpio/gpio-watch.c
@@ -21,8 +21,8 @@
int main(int argc, char **argv)
{
- struct gpioline_info_changed chg;
- struct gpioline_info req;
+ struct gpio_v2_line_info_changed chg;
+ struct gpio_v2_line_info req;
struct pollfd pfd;
int fd, i, j, ret;
char *event, *end;
@@ -40,11 +40,11 @@ int main(int argc, char **argv)
for (i = 0, j = 2; i < argc - 2; i++, j++) {
memset(&req, 0, sizeof(req));
- req.line_offset = strtoul(argv[j], &end, 0);
+ req.offset = strtoul(argv[j], &end, 0);
if (*end != '\0')
goto err_usage;
- ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req);
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_WATCH_IOCTL, &req);
if (ret) {
perror("unable to set up line watch");
return EXIT_FAILURE;
@@ -71,13 +71,13 @@ int main(int argc, char **argv)
}
switch (chg.event_type) {
- case GPIOLINE_CHANGED_REQUESTED:
+ case GPIO_V2_LINE_CHANGED_REQUESTED:
event = "requested";
break;
- case GPIOLINE_CHANGED_RELEASED:
+ case GPIO_V2_LINE_CHANGED_RELEASED:
event = "released";
break;
- case GPIOLINE_CHANGED_CONFIG:
+ case GPIO_V2_LINE_CHANGED_CONFIG:
event = "config changed";
break;
default:
@@ -87,7 +87,7 @@ int main(int argc, char **argv)
}
printf("line %u: %s at %llu\n",
- chg.info.line_offset, event, chg.timestamp);
+ chg.info.offset, event, chg.timestamp_ns);
}
}
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index e1430f504c13..5a05a454d0c9 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -25,45 +25,73 @@
struct gpio_flag {
char *name;
- unsigned long mask;
+ unsigned long long mask;
};
struct gpio_flag flagnames[] = {
{
- .name = "kernel",
- .mask = GPIOLINE_FLAG_KERNEL,
+ .name = "used",
+ .mask = GPIO_V2_LINE_FLAG_USED,
+ },
+ {
+ .name = "input",
+ .mask = GPIO_V2_LINE_FLAG_INPUT,
},
{
.name = "output",
- .mask = GPIOLINE_FLAG_IS_OUT,
+ .mask = GPIO_V2_LINE_FLAG_OUTPUT,
},
{
.name = "active-low",
- .mask = GPIOLINE_FLAG_ACTIVE_LOW,
+ .mask = GPIO_V2_LINE_FLAG_ACTIVE_LOW,
},
{
.name = "open-drain",
- .mask = GPIOLINE_FLAG_OPEN_DRAIN,
+ .mask = GPIO_V2_LINE_FLAG_OPEN_DRAIN,
},
{
.name = "open-source",
- .mask = GPIOLINE_FLAG_OPEN_SOURCE,
+ .mask = GPIO_V2_LINE_FLAG_OPEN_SOURCE,
+ },
+ {
+ .name = "pull-up",
+ .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_UP,
+ },
+ {
+ .name = "pull-down",
+ .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN,
+ },
+ {
+ .name = "bias-disabled",
+ .mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED,
},
};
-void print_flags(unsigned long flags)
+static void print_attributes(struct gpio_v2_line_info *info)
{
int i;
- int printed = 0;
+ const char *field_format = "%s";
for (i = 0; i < ARRAY_SIZE(flagnames); i++) {
- if (flags & flagnames[i].mask) {
- if (printed)
- fprintf(stdout, " ");
- fprintf(stdout, "%s", flagnames[i].name);
- printed++;
+ if (info->flags & flagnames[i].mask) {
+ fprintf(stdout, field_format, flagnames[i].name);
+ field_format = ", %s";
}
}
+
+ if ((info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) &&
+ (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING))
+ fprintf(stdout, field_format, "both-edges");
+ else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING)
+ fprintf(stdout, field_format, "rising-edge");
+ else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING)
+ fprintf(stdout, field_format, "falling-edge");
+
+ for (i = 0; i < info->num_attrs; i++) {
+ if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE)
+ fprintf(stdout, ", debounce_period=%dusec",
+ info->attrs[0].debounce_period_us);
+ }
}
int list_device(const char *device_name)
@@ -82,7 +110,7 @@ int list_device(const char *device_name)
if (fd == -1) {
ret = -errno;
fprintf(stderr, "Failed to open %s\n", chrdev_name);
- goto exit_close_error;
+ goto exit_free_name;
}
/* Inspect this GPIO chip */
@@ -97,18 +125,18 @@ int list_device(const char *device_name)
/* Loop over the lines and print info */
for (i = 0; i < cinfo.lines; i++) {
- struct gpioline_info linfo;
+ struct gpio_v2_line_info linfo;
memset(&linfo, 0, sizeof(linfo));
- linfo.line_offset = i;
+ linfo.offset = i;
- ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo);
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &linfo);
if (ret == -1) {
ret = -errno;
perror("Failed to issue LINEINFO IOCTL\n");
goto exit_close_error;
}
- fprintf(stdout, "\tline %2d:", linfo.line_offset);
+ fprintf(stdout, "\tline %2d:", linfo.offset);
if (linfo.name[0])
fprintf(stdout, " \"%s\"", linfo.name);
else
@@ -119,7 +147,7 @@ int list_device(const char *device_name)
fprintf(stdout, " unused");
if (linfo.flags) {
fprintf(stdout, " [");
- print_flags(linfo.flags);
+ print_attributes(&linfo);
fprintf(stdout, "]");
}
fprintf(stdout, "\n");
@@ -129,6 +157,7 @@ int list_device(const char *device_name)
exit_close_error:
if (close(fd) == -1)
perror("Failed to close GPIO character device file");
+exit_free_name:
free(chrdev_name);
return ret;
}
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index ee9c1bb2293e..1e6fd6ca513b 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -437,7 +437,7 @@ void kvp_get_os_info(void)
/*
* Parse the /etc/os-release file if present:
- * http://www.freedesktop.org/software/systemd/man/os-release.html
+ * https://www.freedesktop.org/software/systemd/man/os-release.html
*/
file = fopen("/etc/os-release", "r");
if (file != NULL) {
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index f115d166c985..bb03859db89d 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -119,6 +119,7 @@ static const char * const iio_modifier_names[] = {
[IIO_MOD_PM2P5] = "pm2p5",
[IIO_MOD_PM4] = "pm4",
[IIO_MOD_PM10] = "pm10",
+ [IIO_MOD_O2] = "o2",
};
static bool event_is_known(struct iio_event_data *event)
@@ -211,6 +212,7 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_MOD_PM2P5:
case IIO_MOD_PM4:
case IIO_MOD_PM10:
+ case IIO_MOD_O2:
break;
default:
return false;
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 4671fbf28842..7f475d59a097 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -18,8 +18,7 @@
* position @h. For example
* GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
*/
-#if !defined(__ASSEMBLY__) && \
- (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000)
+#if !defined(__ASSEMBLY__)
#include <linux/build_bug.h>
#define GENMASK_INPUT_CHECK(h, l) \
(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
new file mode 100644
index 000000000000..57890b357f85
--- /dev/null
+++ b/tools/include/linux/btf_ids.h
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_BTF_IDS_H
+#define _LINUX_BTF_IDS_H
+
+struct btf_id_set {
+ u32 cnt;
+ u32 ids[];
+};
+
+#ifdef CONFIG_DEBUG_INFO_BTF
+
+#include <linux/compiler.h> /* for __PASTE */
+
+/*
+ * Following macros help to define lists of BTF IDs placed
+ * in .BTF_ids section. They are initially filled with zeros
+ * (during compilation) and resolved later during the
+ * linking phase by resolve_btfids tool.
+ *
+ * Any change in list layout must be reflected in resolve_btfids
+ * tool logic.
+ */
+
+#define BTF_IDS_SECTION ".BTF_ids"
+
+#define ____BTF_ID(symbol) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+".local " #symbol " ; \n" \
+".type " #symbol ", STT_OBJECT; \n" \
+".size " #symbol ", 4; \n" \
+#symbol ": \n" \
+".zero 4 \n" \
+".popsection; \n");
+
+#define __BTF_ID(symbol) \
+ ____BTF_ID(symbol)
+
+#define __ID(prefix) \
+ __PASTE(prefix, __COUNTER__)
+
+/*
+ * The BTF_ID defines unique symbol for each ID pointing
+ * to 4 zero bytes.
+ */
+#define BTF_ID(prefix, name) \
+ __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__))
+
+/*
+ * The BTF_ID_LIST macro defines pure (unsorted) list
+ * of BTF IDs, with following layout:
+ *
+ * BTF_ID_LIST(list1)
+ * BTF_ID(type1, name1)
+ * BTF_ID(type2, name2)
+ *
+ * list1:
+ * __BTF_ID__type1__name1__1:
+ * .zero 4
+ * __BTF_ID__type2__name2__2:
+ * .zero 4
+ *
+ */
+#define __BTF_ID_LIST(name, scope) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+"." #scope " " #name "; \n" \
+#name ":; \n" \
+".popsection; \n");
+
+#define BTF_ID_LIST(name) \
+__BTF_ID_LIST(name, local) \
+extern u32 name[];
+
+#define BTF_ID_LIST_GLOBAL(name) \
+__BTF_ID_LIST(name, globl)
+
+/* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with
+ * a single entry.
+ */
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) \
+ BTF_ID_LIST(name) \
+ BTF_ID(prefix, typename)
+
+/*
+ * The BTF_ID_UNUSED macro defines 4 zero bytes.
+ * It's used when we want to define 'unused' entry
+ * in BTF_ID_LIST, like:
+ *
+ * BTF_ID_LIST(bpf_skb_output_btf_ids)
+ * BTF_ID(struct, sk_buff)
+ * BTF_ID_UNUSED
+ * BTF_ID(struct, task_struct)
+ */
+
+#define BTF_ID_UNUSED \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+".zero 4 \n" \
+".popsection; \n");
+
+/*
+ * The BTF_SET_START/END macros pair defines sorted list of
+ * BTF IDs plus its members count, with following layout:
+ *
+ * BTF_SET_START(list)
+ * BTF_ID(type1, name1)
+ * BTF_ID(type2, name2)
+ * BTF_SET_END(list)
+ *
+ * __BTF_ID__set__list:
+ * .zero 4
+ * list:
+ * __BTF_ID__type1__name1__3:
+ * .zero 4
+ * __BTF_ID__type2__name2__4:
+ * .zero 4
+ *
+ */
+#define __BTF_SET_START(name, scope) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+"." #scope " __BTF_ID__set__" #name "; \n" \
+"__BTF_ID__set__" #name ":; \n" \
+".zero 4 \n" \
+".popsection; \n");
+
+#define BTF_SET_START(name) \
+__BTF_ID_LIST(name, local) \
+__BTF_SET_START(name, local)
+
+#define BTF_SET_START_GLOBAL(name) \
+__BTF_ID_LIST(name, globl) \
+__BTF_SET_START(name, globl)
+
+#define BTF_SET_END(name) \
+asm( \
+".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
+".size __BTF_ID__set__" #name ", .-" #name " \n" \
+".popsection; \n"); \
+extern struct btf_id_set name;
+
+#else
+
+#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID(prefix, name)
+#define BTF_ID_UNUSED
+#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
+#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_SET_END(name)
+
+#endif /* CONFIG_DEBUG_INFO_BTF */
+
+#ifdef CONFIG_NET
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+#define BTF_SOCK_TYPE_xxx \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+
+enum {
+#define BTF_SOCK_TYPE(name, str) name,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+MAX_BTF_SOCK_TYPE,
+};
+
+extern u32 btf_sock_ids[];
+#endif
+
+#endif
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 95c072b70d0e..b9d4322e1e65 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -27,6 +27,18 @@
#define __pure __attribute__((pure))
#endif
#define noinline __attribute__((noinline))
+#ifdef __has_attribute
+#if __has_attribute(disable_tail_calls)
+#define __no_tail_call __attribute__((disable_tail_calls))
+#endif
+#endif
+#ifndef __no_tail_call
+#if GCC_VERSION > 40201
+#define __no_tail_call __attribute__((optimize("no-optimize-sibling-calls")))
+#else
+#define __no_tail_call
+#endif
+#endif
#ifndef __packed
#define __packed __attribute__((packed))
#endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 180f7714a5f1..2b3f7353e891 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -47,6 +47,9 @@
#ifndef noinline
#define noinline
#endif
+#ifndef __no_tail_call
+#define __no_tail_call
+#endif
/* Are two types/vars the same type (ignoring qualifiers)? */
#ifndef __same_type
@@ -108,8 +111,6 @@
# define noinline
#endif
-#define uninitialized_var(x) x = *(&(x))
-
#include <linux/types.h>
/*
@@ -198,4 +199,8 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
# define __fallthrough
#endif
+/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
+#define ___PASTE(a, b) a##b
+#define __PASTE(a, b) ___PASTE(a, b)
+
#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h
index 67e01bbadbfe..501262aee8ff 100644
--- a/tools/include/linux/irqflags.h
+++ b/tools/include/linux/irqflags.h
@@ -2,9 +2,9 @@
#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
-# define lockdep_hardirq_context(p) 0
+# define lockdep_hardirq_context() 0
# define lockdep_softirq_context(p) 0
-# define lockdep_hardirqs_enabled(p) 0
+# define lockdep_hardirqs_enabled() 0
# define lockdep_softirqs_enabled(p) 0
# define lockdep_hardirq_enter() do { } while (0)
# define lockdep_hardirq_exit() do { } while (0)
diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h
index 348c6f47e4cc..af8d0fe1c6ce 100644
--- a/tools/include/linux/jhash.h
+++ b/tools/include/linux/jhash.h
@@ -5,7 +5,7 @@
*
* Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
*
- * http://burtleburtle.net/bob/hash/
+ * https://burtleburtle.net/bob/hash/
*
* These are the credits from Bob's sources:
*
diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h
index 89ca6fe257cc..efb6c3f5f2a9 100644
--- a/tools/include/linux/kallsyms.h
+++ b/tools/include/linux/kallsyms.h
@@ -20,7 +20,7 @@ static inline const char *kallsyms_lookup(unsigned long addr,
#include <execinfo.h>
#include <stdlib.h>
-static inline void print_ip_sym(unsigned long ip)
+static inline void print_ip_sym(const char *loglvl, unsigned long ip)
{
char **name;
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
new file mode 100644
index 000000000000..ab82c793c897
--- /dev/null
+++ b/tools/include/linux/objtool.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_OBJTOOL_H
+#define _LINUX_OBJTOOL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack.
+ */
+struct unwind_hint {
+ u32 ip;
+ s16 sp_offset;
+ u8 sp_reg;
+ u8 type;
+ u8 end;
+};
+#endif
+
+/*
+ * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP
+ * (the caller's SP right before it made the call). Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset
+ * points to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
+ * sp_reg+sp_offset points to the iret return frame.
+ */
+#define UNWIND_HINT_TYPE_CALL 0
+#define UNWIND_HINT_TYPE_REGS 1
+#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+#define UNWIND_HINT_TYPE_RET_OFFSET 3
+
+#ifdef CONFIG_STACK_VALIDATION
+
+#ifndef __ASSEMBLY__
+
+#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
+ "987: \n\t" \
+ ".pushsection .discard.unwind_hints\n\t" \
+ /* struct unwind_hint */ \
+ ".long 987b - .\n\t" \
+ ".short " __stringify(sp_offset) "\n\t" \
+ ".byte " __stringify(sp_reg) "\n\t" \
+ ".byte " __stringify(type) "\n\t" \
+ ".byte " __stringify(end) "\n\t" \
+ ".balign 4 \n\t" \
+ ".popsection\n\t"
+
+/*
+ * This macro marks the given function's stack frame as "non-standard", which
+ * tells objtool to ignore the function when doing stack metadata validation.
+ * It should only be used in special cases where you're 100% sure it won't
+ * affect the reliability of frame pointers and kernel stack traces.
+ *
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+#define STACK_FRAME_NON_STANDARD(func) \
+ static void __used __section(.discard.func_stack_frame_non_standard) \
+ *__func_stack_frame_non_standard_##func = func
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * This macro indicates that the following intra-function call is valid.
+ * Any non-annotated intra-function call will cause objtool to issue a warning.
+ */
+#define ANNOTATE_INTRA_FUNCTION_CALL \
+ 999: \
+ .pushsection .discard.intra_function_calls; \
+ .long 999b; \
+ .popsection;
+
+/*
+ * In asm, there are two kinds of code: normal C-type callable functions and
+ * the rest. The normal callable functions can be called by other code, and
+ * don't do anything unusual with the stack. Such normal callable functions
+ * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
+ * category. In this case, no special debugging annotations are needed because
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
+ * at runtime.
+ *
+ * Anything which doesn't fall into the above category, such as syscall and
+ * interrupt handlers, tends to not be called directly by other functions, and
+ * often does unusual non-C-function-type things with the stack pointer. Such
+ * code needs to be annotated such that objtool can understand it. The
+ * following CFI hint macros are for this type of code.
+ *
+ * These macros provide hints to objtool about the state of the stack at each
+ * instruction. Objtool starts from the hints and follows the code flow,
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
+ * the debuginfo as necessary. It will also warn if it sees any
+ * inconsistencies.
+ */
+.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.Lunwind_hint_ip_\@:
+ .pushsection .discard.unwind_hints
+ /* struct unwind_hint */
+ .long .Lunwind_hint_ip_\@ - .
+ .short \sp_offset
+ .byte \sp_reg
+ .byte \type
+ .byte \end
+ .balign 4
+ .popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#else /* !CONFIG_STACK_VALIDATION */
+
+#ifndef __ASSEMBLY__
+
+#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
+ "\n\t"
+#define STACK_FRAME_NON_STANDARD(func)
+#else
+#define ANNOTATE_INTRA_FUNCTION_CALL
+.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.endm
+#endif
+
+#endif /* CONFIG_STACK_VALIDATION */
+
+#endif /* _LINUX_OBJTOOL_H */
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
index e03b1ea23e0e..30dd21f976c3 100644
--- a/tools/include/linux/rbtree.h
+++ b/tools/include/linux/rbtree.h
@@ -11,7 +11,7 @@
I know it's not the cleaner way, but in C (not in C++) to get
performances and genericity...
- See Documentation/rbtree.txt for documentation and samples.
+ See Documentation/core-api/rbtree.rst for documentation and samples.
*/
#ifndef __TOOLS_LINUX_PERF_RBTREE_H
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index 381aa948610d..570bb9794421 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -23,7 +23,7 @@
* rb_insert_augmented() and rb_erase_augmented() are intended to be public.
* The rest are implementation details you are not expected to depend on.
*
- * See Documentation/rbtree.txt for documentation and samples.
+ * See Documentation/core-api/rbtree.rst for documentation and samples.
*/
struct rb_augment_callbacks {
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
new file mode 100644
index 000000000000..89135bb35bf7
--- /dev/null
+++ b/tools/include/linux/static_call_types.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _STATIC_CALL_TYPES_H
+#define _STATIC_CALL_TYPES_H
+
+#include <linux/types.h>
+#include <linux/stringify.h>
+
+#define STATIC_CALL_KEY_PREFIX __SCK__
+#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX)
+#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1)
+#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name)
+
+#define STATIC_CALL_TRAMP_PREFIX __SCT__
+#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX)
+#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1)
+#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
+#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))
+
+/*
+ * Flags in the low bits of static_call_site::key.
+ */
+#define STATIC_CALL_SITE_TAIL 1UL /* tail call */
+#define STATIC_CALL_SITE_INIT 2UL /* init section */
+#define STATIC_CALL_SITE_FLAGS 3UL
+
+/*
+ * The static call site table needs to be created by external tooling (objtool
+ * or a compiler plugin).
+ */
+struct static_call_site {
+ s32 addr;
+ s32 key;
+};
+
+#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 3a3201e4618e..f2b5d72a46c2 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat)
#define __NR_umount2 39
__SYSCALL(__NR_umount2, sys_umount)
#define __NR_mount 40
-__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
+__SYSCALL(__NR_mount, sys_mount)
#define __NR_pivot_root 41
__SYSCALL(__NR_pivot_root, sys_pivot_root)
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
#define __NR_write 64
__SYSCALL(__NR_write, sys_write)
#define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
#define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
#define __NR_pread64 67
__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
#define __NR_pwrite64 68
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
/* fs/splice.c */
#define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_splice 76
__SYSCALL(__NR_splice, sys_splice)
#define __NR_tee 77
@@ -606,9 +606,9 @@ __SYSCALL(__NR_sendto, sys_sendto)
#define __NR_recvfrom 207
__SC_COMP(__NR_recvfrom, sys_recvfrom, compat_sys_recvfrom)
#define __NR_setsockopt 208
-__SC_COMP(__NR_setsockopt, sys_setsockopt, compat_sys_setsockopt)
+__SC_COMP(__NR_setsockopt, sys_setsockopt, sys_setsockopt)
#define __NR_getsockopt 209
-__SC_COMP(__NR_getsockopt, sys_getsockopt, compat_sys_getsockopt)
+__SC_COMP(__NR_getsockopt, sys_getsockopt, sys_getsockopt)
#define __NR_shutdown 210
__SYSCALL(__NR_shutdown, sys_shutdown)
#define __NR_sendmsg 211
@@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
#define __NR_sendmmsg 269
__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
#define __NR_process_vm_readv 270
-__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
- compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
#define __NR_process_vm_writev 271
-__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
- compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
#define __NR_kcmp 272
__SYSCALL(__NR_kcmp, sys_kcmp)
#define __NR_finit_module 273
@@ -850,14 +848,18 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
#endif
+#define __NR_close_range 436
+__SYSCALL(__NR_close_range, sys_close_range)
#define __NR_openat2 437
__SYSCALL(__NR_openat2, sys_openat2)
#define __NR_pidfd_getfd 438
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
+#define __NR_faccessat2 439
+__SYSCALL(__NR_faccessat2, sys_faccessat2)
#undef __NR_syscalls
-#define __NR_syscalls 439
+#define __NR_syscalls 440
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 2813e579b480..00546062e023 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -55,7 +55,7 @@ extern "C" {
* cause the related events to not be seen.
*
* I915_RESET_UEVENT - Event is generated just before an attempt to reset the
- * the GPU. The value supplied with the event is always 1. NOTE: Disable
+ * GPU. The value supplied with the event is always 1. NOTE: Disable
* reset via module parameter will cause this event to not be seen.
*/
#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
@@ -1934,7 +1934,7 @@ enum drm_i915_perf_property_id {
/**
* The value specifies which set of OA unit metrics should be
- * be configured, defining the contents of any OA unit reports.
+ * configured, defining the contents of any OA unit reports.
*
* This property is available in perf revision 1.
*/
@@ -1969,6 +1969,30 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_HOLD_PREEMPTION,
+ /**
+ * Specifying this pins all contexts to the specified SSEU power
+ * configuration for the duration of the recording.
+ *
+ * This parameter's value is a pointer to a struct
+ * drm_i915_gem_context_param_sseu.
+ *
+ * This property is available in perf revision 4.
+ */
+ DRM_I915_PERF_PROP_GLOBAL_SSEU,
+
+ /**
+ * This optional parameter specifies the timer interval in nanoseconds
+ * at which the i915 driver will check the OA buffer for available data.
+ * Minimum allowed value is 100 microseconds. A default value is used by
+ * the driver if this parameter is not specified. Note that larger timer
+ * values will reduce cpu consumption during OA perf captures. However,
+ * excessively large values would potentially result in OA buffer
+ * overwrites as captures reach end of the OA buffer.
+ *
+ * This property is available in perf revision 5.
+ */
+ DRM_I915_PERF_PROP_POLL_OA_PERIOD,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7bbf1b65be10..e6ceac3f7d62 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -73,7 +73,7 @@ struct bpf_insn {
/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
- __u8 data[]; /* Arbitrary size */
+ __u8 data[0]; /* Arbitrary size */
};
struct bpf_cgroup_storage_key {
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
__u32 attach_type; /* program attach type */
};
+union bpf_iter_link_info {
+ struct {
+ __u32 map_fd;
+ } map;
+};
+
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
@@ -113,6 +119,12 @@ enum bpf_cmd {
BPF_MAP_DELETE_BATCH,
BPF_LINK_CREATE,
BPF_LINK_UPDATE,
+ BPF_LINK_GET_FD_BY_ID,
+ BPF_LINK_GET_NEXT_ID,
+ BPF_ENABLE_STATS,
+ BPF_ITER_CREATE,
+ BPF_LINK_DETACH,
+ BPF_PROG_BIND_MAP,
};
enum bpf_map_type {
@@ -143,6 +155,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_SK_STORAGE,
BPF_MAP_TYPE_DEVMAP_HASH,
BPF_MAP_TYPE_STRUCT_OPS,
+ BPF_MAP_TYPE_RINGBUF,
+ BPF_MAP_TYPE_INODE_STORAGE,
};
/* Note that tracing related programs such as
@@ -184,6 +198,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_STRUCT_OPS,
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
+ BPF_PROG_TYPE_SK_LOOKUP,
};
enum bpf_attach_type {
@@ -215,11 +230,33 @@ enum bpf_attach_type {
BPF_TRACE_FEXIT,
BPF_MODIFY_RETURN,
BPF_LSM_MAC,
+ BPF_TRACE_ITER,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ BPF_XDP_DEVMAP,
+ BPF_CGROUP_INET_SOCK_RELEASE,
+ BPF_XDP_CPUMAP,
+ BPF_SK_LOOKUP,
+ BPF_XDP,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+enum bpf_link_type {
+ BPF_LINK_TYPE_UNSPEC = 0,
+ BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
+ BPF_LINK_TYPE_TRACING = 2,
+ BPF_LINK_TYPE_CGROUP = 3,
+ BPF_LINK_TYPE_ITER = 4,
+ BPF_LINK_TYPE_NETNS = 5,
+ BPF_LINK_TYPE_XDP = 6,
+
+ MAX_BPF_LINK_TYPE,
+};
+
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
@@ -310,19 +347,45 @@ enum bpf_attach_type {
/* The verifier internal test flag. Behavior is undefined */
#define BPF_F_TEST_STATE_FREQ (1U << 3)
+/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
+ * restrict map and helper usage for such programs. Sleepable BPF programs can
+ * only be attached to hooks where kernel execution context allows sleeping.
+ * Such programs are allowed to use helpers that may sleep like
+ * bpf_copy_from_user().
+ */
+#define BPF_F_SLEEPABLE (1U << 4)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
- * two extensions:
- *
- * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
- * insn[0].imm: map fd map fd
- * insn[1].imm: 0 offset into value
- * insn[0].off: 0 0
- * insn[1].off: 0 0
- * ldimm64 rewrite: address of map address of map[0]+offset
- * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ * the following extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD
+ * insn[0].imm: map fd
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of map
+ * verifier type: CONST_PTR_TO_MAP
*/
#define BPF_PSEUDO_MAP_FD 1
+/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd
+ * insn[1].imm: offset into value
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of map[0]+offset
+ * verifier type: PTR_TO_MAP_VALUE
+ */
#define BPF_PSEUDO_MAP_VALUE 2
+/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
+ * insn[0].imm: kernel btd id of VAR
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of the kernel variable
+ * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
+ * is struct/union.
+ */
+#define BPF_PSEUDO_BTF_ID 3
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@@ -369,6 +432,12 @@ enum {
/* Enable memory-mapping BPF map */
BPF_F_MMAPABLE = (1U << 10),
+
+/* Share perf_event among processes */
+ BPF_F_PRESERVE_ELEMS = (1U << 11),
+
+/* Create a map that is suitable to be an inner map with dynamic max entries */
+ BPF_F_INNER_MAP = (1U << 12),
};
/* Flags for BPF_PROG_QUERY. */
@@ -379,6 +448,17 @@ enum {
*/
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+/* Flags for BPF_PROG_TEST_RUN */
+
+/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
+#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
+
+/* type for BPF_ENABLE_STATS */
+enum bpf_stats_type {
+ /* enabled run_time_ns and run_cnt */
+ BPF_STATS_RUN_TIME = 0,
+};
+
enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -515,6 +595,8 @@ union bpf_attr {
*/
__aligned_u64 ctx_in;
__aligned_u64 ctx_out;
+ __u32 flags;
+ __u32 cpu;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -523,6 +605,7 @@ union bpf_attr {
__u32 prog_id;
__u32 map_id;
__u32 btf_id;
+ __u32 link_id;
};
__u32 next_id;
__u32 open_flags;
@@ -574,9 +657,19 @@ union bpf_attr {
struct { /* struct used by BPF_LINK_CREATE command */
__u32 prog_fd; /* eBPF program to attach */
- __u32 target_fd; /* object to attach to */
+ union {
+ __u32 target_fd; /* object to attach to */
+ __u32 target_ifindex; /* target ifindex */
+ };
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
+ union {
+ __u32 target_btf_id; /* btf_id of target to attach to */
+ struct {
+ __aligned_u64 iter_info; /* extra bpf_iter_link_info */
+ __u32 iter_info_len; /* iter_info length */
+ };
+ };
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
@@ -589,6 +682,25 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
+ struct {
+ __u32 link_fd;
+ } link_detach;
+
+ struct { /* struct used by BPF_ENABLE_STATS command */
+ __u32 type;
+ } enable_stats;
+
+ struct { /* struct used by BPF_ITER_CREATE command */
+ __u32 link_fd;
+ __u32 flags;
+ } iter_create;
+
+ struct { /* struct used by BPF_PROG_BIND_MAP command */
+ __u32 prog_fd;
+ __u32 map_fd;
+ __u32 flags; /* extra flags */
+ } prog_bind_map;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -615,7 +727,7 @@ union bpf_attr {
* Map value associated to *key*, or **NULL** if no entry was
* found.
*
- * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
* Description
* Add or update the value of the entry associated to *key* in
* *map* with *value*. *flags* is one of:
@@ -633,29 +745,31 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * long bpf_map_delete_elem(struct bpf_map *map, const void *key)
* Description
* Delete entry with *key* from *map*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
* Description
* For tracing programs, safely attempt to read *size* bytes from
* kernel space address *unsafe_ptr* and store the data in *dst*.
*
- * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
- * instead.
+ * Generally, use **bpf_probe_read_user**\ () or
+ * **bpf_probe_read_kernel**\ () instead.
* Return
* 0 on success, or a negative error in case of failure.
*
* u64 bpf_ktime_get_ns(void)
* Description
* Return the time elapsed since system boot, in nanoseconds.
+ * Does not include time the system was suspended.
+ * See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
* Return
* Current *ktime*.
*
- * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
* Description
* This helper is a "printk()-like" facility for debugging. It
* prints a message defined by format *fmt* (of size *fmt_size*)
@@ -705,7 +819,7 @@ union bpf_attr {
*
* Also, note that **bpf_trace_printk**\ () is slow, and should
* only be used for debugging purposes. For this reason, a notice
- * bloc (spanning several lines) is printed to kernel logs and
+ * block (spanning several lines) is printed to kernel logs and
* states that the helper should not be used "for production use"
* the first time this helper is used (or more precisely, when
* **trace_printk**\ () buffers are allocated). For passing values
@@ -735,7 +849,7 @@ union bpf_attr {
* Return
* The SMP id of the processor running the program.
*
- * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. *flags* are a combination of
@@ -752,7 +866,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
* Description
* Recompute the layer 3 (e.g. IP) checksum for the packet
* associated to *skb*. Computation is incremental, so the helper
@@ -777,7 +891,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
* Description
* Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
* packet associated to *skb*. Computation is incremental, so the
@@ -809,7 +923,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
* Description
* This special helper is used to trigger a "tail call", or in
* other words, to jump into another eBPF program. The same stack
@@ -840,7 +954,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
* Description
* Clone and redirect the packet associated to *skb* to another
* net device of index *ifindex*. Both ingress and egress
@@ -876,7 +990,7 @@ union bpf_attr {
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
*
- * int bpf_get_current_comm(void *buf, u32 size_of_buf)
+ * long bpf_get_current_comm(void *buf, u32 size_of_buf)
* Description
* Copy the **comm** attribute of the current task into *buf* of
* *size_of_buf*. The **comm** attribute contains the name of
@@ -913,7 +1027,7 @@ union bpf_attr {
* Return
* The classid, or 0 for the default unconfigured classid.
*
- * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
* Description
* Push a *vlan_tci* (VLAN tag control information) of protocol
* *vlan_proto* to the packet associated to *skb*, then update
@@ -929,7 +1043,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * long bpf_skb_vlan_pop(struct sk_buff *skb)
* Description
* Pop a VLAN header from the packet associated to *skb*.
*
@@ -941,7 +1055,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
* Description
* Get tunnel metadata. This helper takes a pointer *key* to an
* empty **struct bpf_tunnel_key** of **size**, that will be
@@ -971,14 +1085,14 @@ union bpf_attr {
*
* int ret;
* struct bpf_tunnel_key key = {};
- *
+ *
* ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
* if (ret < 0)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* if (key.remote_ipv4 != 0x0a000001)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* return TC_ACT_OK; // accept packet
*
* This interface can also be used with all encapsulation devices
@@ -992,7 +1106,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
* Description
* Populate tunnel metadata for packet associated to *skb.* The
* tunnel metadata is set to the contents of *key*, of *size*. The
@@ -1058,7 +1172,7 @@ union bpf_attr {
* The value of the perf event counter read from the map, or a
* negative error code in case of failure.
*
- * int bpf_redirect(u32 ifindex, u64 flags)
+ * long bpf_redirect(u32 ifindex, u64 flags)
* Description
* Redirect the packet to another net device of index *ifindex*.
* This helper is somewhat similar to **bpf_clone_redirect**\
@@ -1085,7 +1199,7 @@ union bpf_attr {
* Description
* Retrieve the realm or the route, that is to say the
* **tclassid** field of the destination for the *skb*. The
- * indentifier retrieved is a user-provided tag, similar to the
+ * identifier retrieved is a user-provided tag, similar to the
* one used with the net_cls cgroup (see description for
* **bpf_get_cgroup_classid**\ () helper), but here this tag is
* held by a route (a destination entry), not by a task.
@@ -1105,7 +1219,7 @@ union bpf_attr {
* The realm of the route for the packet associated to *skb*, or 0
* if none was found.
*
- * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1150,7 +1264,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
+ * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
* Description
* This helper was provided as an easy way to load data from a
* packet. It can be used to load *len* bytes from *offset* from
@@ -1167,7 +1281,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
+ * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
* Description
* Walk a user or a kernel stack and return its id. To achieve
* this, the helper needs *ctx*, which is a pointer to the context
@@ -1236,7 +1350,7 @@ union bpf_attr {
* The checksum result, or a negative error code in case of
* failure.
*
- * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
* Description
* Retrieve tunnel options metadata for the packet associated to
* *skb*, and store the raw tunnel option data to the buffer *opt*
@@ -1254,7 +1368,7 @@ union bpf_attr {
* Return
* The size of the option data retrieved.
*
- * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
* Description
* Set tunnel options metadata for the packet associated to *skb*
* to the option data contained in the raw buffer *opt* of *size*.
@@ -1264,7 +1378,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
* Description
* Change the protocol of the *skb* to *proto*. Currently
* supported are transition from IPv4 to IPv6, and from IPv6 to
@@ -1291,7 +1405,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * long bpf_skb_change_type(struct sk_buff *skb, u32 type)
* Description
* Change the packet type for the packet associated to *skb*. This
* comes down to setting *skb*\ **->pkt_type** to *type*, except
@@ -1318,7 +1432,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
* Description
* Check whether *skb* is a descendant of the cgroup2 held by
* *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
@@ -1349,7 +1463,7 @@ union bpf_attr {
* Return
* A pointer to the current task struct.
*
- * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * long bpf_probe_write_user(void *dst, const void *src, u32 len)
* Description
* Attempt in a safe way to write *len* bytes from the buffer
* *src* to *dst* in memory. It only works for threads that are in
@@ -1368,7 +1482,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
* Description
* Check whether the probe is being run is the context of a given
* subset of the cgroup2 hierarchy. The cgroup2 to test is held by
@@ -1376,11 +1490,11 @@ union bpf_attr {
* Return
* The return value depends on the result of the test, and can be:
*
- * * 0, if the *skb* task belongs to the cgroup2.
- * * 1, if the *skb* task does not belong to the cgroup2.
+ * * 0, if current task belongs to the cgroup2.
+ * * 1, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
- * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
* Description
* Resize (trim or grow) the packet associated to *skb* to the
* new *len*. The *flags* are reserved for future usage, and must
@@ -1404,7 +1518,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * long bpf_skb_pull_data(struct sk_buff *skb, u32 len)
* Description
* Pull in non-linear data in case the *skb* is non-linear and not
* all of *len* are part of the linear section. Make *len* bytes
@@ -1460,7 +1574,7 @@ union bpf_attr {
* recalculation the next time the kernel tries to access this
* hash or when the **bpf_get_hash_recalc**\ () helper is called.
*
- * int bpf_get_numa_node_id(void)
+ * long bpf_get_numa_node_id(void)
* Description
* Return the id of the current NUMA node. The primary use case
* for this helper is the selection of sockets for the local NUMA
@@ -1471,7 +1585,7 @@ union bpf_attr {
* Return
* The id of current NUMA node.
*
- * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
* Description
* Grows headroom of packet associated to *skb* and adjusts the
* offset of the MAC header accordingly, adding *len* bytes of
@@ -1492,7 +1606,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
* it is possible to use a negative value for *delta*. This helper
@@ -1507,14 +1621,14 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address
- * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
* more details.
*
- * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
- * instead.
+ * Generally, use **bpf_probe_read_user_str**\ () or
+ * **bpf_probe_read_kernel_str**\ () instead.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
@@ -1542,7 +1656,7 @@ union bpf_attr {
*
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
* Description
- * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
* *skb*, but gets socket from **struct bpf_sock_ops** context.
* Return
* A 8-byte long non-decreasing number.
@@ -1555,14 +1669,14 @@ union bpf_attr {
* is returned (note that **overflowuid** might also be the actual
* UID value for the socket).
*
- * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * long bpf_set_hash(struct sk_buff *skb, u32 hash)
* Description
* Set the full hash for *skb* (set the field *skb*\ **->hash**)
* to value *hash*.
* Return
* 0
*
- * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **setsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1570,25 +1684,41 @@ union bpf_attr {
* must be specified, see **setsockopt(2)** for more information.
* The option value of length *optlen* is pointed by *optval*.
*
+ * *bpf_socket* should be one of the following:
+ *
+ * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
+ * and **BPF_CGROUP_INET6_CONNECT**.
+ *
* This helper actually implements a subset of **setsockopt()**.
* It supports the following *level*\ s:
*
* * **SOL_SOCKET**, which supports the following *optname*\ s:
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
- * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
+ * **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
* **TCP_CONGESTION**, **TCP_BPF_IW**,
- * **TCP_BPF_SNDCWND_CLAMP**.
+ * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
+ * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
+ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
* Description
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
*
+ * By default, the helper will reset any offloaded checksum
+ * indicator of the skb to CHECKSUM_NONE. This can be avoided
+ * by the following flag:
+ *
+ * * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
+ * checksum data of the skb to CHECKSUM_NONE.
+ *
* There are two supported modes at this time:
*
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
@@ -1623,7 +1753,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
* Description
* Redirect the packet to the endpoint referenced by *map* at
* index *key*. Depending on its type, this *map* can contain
@@ -1634,17 +1764,17 @@ union bpf_attr {
*
* The lower two bits of *flags* are used as the return code if
* the map lookup fails. This is so that the return value can be
- * one of the XDP program return codes up to XDP_TX, as chosen by
- * the caller. Any higher bits in the *flags* argument must be
+ * one of the XDP program return codes up to **XDP_TX**, as chosen
+ * by the caller. Any higher bits in the *flags* argument must be
* unset.
*
- * See also bpf_redirect(), which only supports redirecting to an
- * ifindex, but doesn't require a map to do so.
+ * See also **bpf_redirect**\ (), which only supports redirecting
+ * to an ifindex, but doesn't require a map to do so.
* Return
* **XDP_REDIRECT** on success, or the value of the two lower bits
* of the *flags* argument on error.
*
- * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
* Redirect the packet to the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
@@ -1655,7 +1785,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a *map* referencing sockets. The
* *skops* is used as a new value for the entry associated to
@@ -1674,7 +1804,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust the address pointed by *xdp_md*\ **->data_meta** by
* *delta* (which can be positive or negative). Note that this
@@ -1703,7 +1833,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
* Description
* Read the value of a perf event counter, and store it into *buf*
* of size *buf_size*. This helper relies on a *map* of type
@@ -1747,13 +1877,13 @@ union bpf_attr {
* the time running for event since last normalization. The
* enabled and running times are accumulated since the perf event
* open. To achieve scaling factor between two invocations of an
- * eBPF program, users can can use CPU id as the key (which is
+ * eBPF program, users can use CPU id as the key (which is
* typical for perf array usage model) to remember the previous
* value and do the calculation inside the eBPF program.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
* Description
* For en eBPF program attached to a perf event, retrieve the
* value of the event counter associated to *ctx* and store it in
@@ -1764,7 +1894,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **getsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1773,6 +1903,12 @@ union bpf_attr {
* The retrieved value is stored in the structure pointed by
* *opval* and of length *optlen*.
*
+ * *bpf_socket* should be one of the following:
+ *
+ * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
+ * and **BPF_CGROUP_INET6_CONNECT**.
+ *
* This helper actually implements a subset of **getsockopt()**.
* It supports the following *level*\ s:
*
@@ -1783,14 +1919,14 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_override_return(struct pt_regs *regs, u64 rc)
+ * long bpf_override_return(struct pt_regs *regs, u64 rc)
* Description
* Used for error injection, this helper uses kprobes to override
* the return value of the probed function, and to set it to *rc*.
* The first argument is the context *regs* on which the kprobe
* works.
*
- * This helper works by setting setting the PC (program counter)
+ * This helper works by setting the PC (program counter)
* to an override function which is run in place of the original
* probed function. This means the probed function is not run at
* all. The replacement function just returns with the required
@@ -1808,7 +1944,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
* Description
* Attempt to set the value of the **bpf_sock_ops_cb_flags** field
* for the full TCP socket associated to *bpf_sock_ops* to
@@ -1852,7 +1988,7 @@ union bpf_attr {
* be set is returned (which comes down to 0 if all bits were set
* as required).
*
- * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* socket level. If the message *msg* is allowed to pass (i.e. if
@@ -1866,7 +2002,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
* Description
* For socket policies, apply the verdict of the eBPF program to
* the next *bytes* (number of bytes) of message *msg*.
@@ -1900,7 +2036,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
* Description
* For socket policies, prevent the execution of the verdict eBPF
* program for message *msg* until *bytes* (byte number) have been
@@ -1918,7 +2054,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
* Description
* For socket policies, pull in non-linear data from user space
* for *msg* and set pointers *msg*\ **->data** and *msg*\
@@ -1949,7 +2085,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
* Description
* Bind the socket associated to *ctx* to the address pointed by
* *addr*, of length *addr_len*. This allows for making outgoing
@@ -1959,18 +2095,19 @@ union bpf_attr {
*
* This helper works for IPv4 and IPv6, TCP and UDP sockets. The
* domain (*addr*\ **->sa_family**) must be **AF_INET** (or
- * **AF_INET6**). Looking for a free port to bind to can be
- * expensive, therefore binding to port is not permitted by the
- * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
- * must be set to zero.
+ * **AF_INET6**). It's advised to pass zero port (**sin_port**
+ * or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
+ * behavior and lets the kernel efficiently pick up an unused
+ * port as long as 4-tuple is unique. Passing non-zero port might
+ * lead to degraded performance.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
- * only possible to shrink the packet as of this writing,
- * therefore *delta* must be a negative integer.
+ * possible to both shrink and grow the packet tail.
+ * Shrink done via *delta* being a negative integer.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -1980,7 +2117,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
* Description
* Retrieve the XFRM state (IP transform framework, see also
* **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
@@ -1996,7 +2133,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
* To achieve this, the helper needs *ctx*, which is a pointer
@@ -2029,7 +2166,7 @@ union bpf_attr {
* A non-negative value equal to or less than *size* on success,
* or a negative error in case of failure.
*
- * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
* Description
* This helper is similar to **bpf_skb_load_bytes**\ () in that
* it provides an easy way to load *len* bytes from *offset*
@@ -2051,7 +2188,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
* Description
* Do FIB lookup in kernel tables using parameters in *params*.
* If lookup is successful and result shows packet is to be
@@ -2082,7 +2219,7 @@ union bpf_attr {
* * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
* packet is not forwarded or needs assist from full stack
*
- * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a sockhash *map* referencing sockets.
* The *skops* is used as a new value for the entry associated to
@@ -2101,7 +2238,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* socket level. If the message *msg* is allowed to pass (i.e. if
@@ -2115,11 +2252,11 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
- * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * if the verdict eBPF program returns **SK_PASS**), redirect it
* to the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
* egress interfaces can be used for redirection. The
@@ -2129,7 +2266,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
* Description
* Encapsulate the packet associated to *skb* within a Layer 3
* protocol header. This header is provided in the buffer at
@@ -2166,7 +2303,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
* Description
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. Only the flags, tag and TLVs
@@ -2181,7 +2318,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
* Description
* Adjust the size allocated to TLVs in the outermost IPv6
* Segment Routing Header contained in the packet associated to
@@ -2197,7 +2334,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
* Description
* Apply an IPv6 Segment Routing action of type *action* to the
* packet associated to *skb*. Each action takes a parameter
@@ -2226,7 +2363,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_rc_repeat(void *ctx)
+ * long bpf_rc_repeat(void *ctx)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded repeat key message. This delays
@@ -2245,7 +2382,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded key press with *scancode*,
@@ -2256,7 +2393,7 @@ union bpf_attr {
* **bpf_rc_keydown**\ () again with the same values, or calling
* **bpf_rc_repeat**\ ().
*
- * Some protocols include a toggle bit, in case the button was
+ * Some protocols include a toggle bit, in case the button was
* released and pressed again between consecutive scancodes.
*
* The *ctx* should point to the lirc sample as passed into
@@ -2310,7 +2447,7 @@ union bpf_attr {
* Return
* A pointer to the local storage area.
*
- * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
* Description
* Select a **SO_REUSEPORT** socket from a
* **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
@@ -2355,7 +2492,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
- * socket lookup table in the netns associated with the *ctx* will
+ * socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@@ -2392,7 +2529,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
- * socket lookup table in the netns associated with the *ctx* will
+ * socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@@ -2411,7 +2548,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * int bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(void *sock)
* Description
* Release the reference held by *sock*. *sock* must be a
* non-**NULL** pointer that was returned from
@@ -2419,7 +2556,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
* Description
* Push an element *value* in *map*. *flags* is one of:
*
@@ -2429,19 +2566,19 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * long bpf_map_pop_elem(struct bpf_map *map, void *value)
* Description
* Pop an element from *map*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * long bpf_map_peek_elem(struct bpf_map *map, void *value)
* Description
* Get an element from *map* without removing it.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
* Description
* For socket policies, insert *len* bytes into *msg* at offset
* *start*.
@@ -2457,7 +2594,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
* Description
* Will remove *len* bytes from a *msg* starting at byte *start*.
* This may result in **ENOMEM** errors under certain situations if
@@ -2469,7 +2606,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded pointer movement.
@@ -2483,7 +2620,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ * long bpf_spin_lock(struct bpf_spin_lock *lock)
* Description
* Acquire a spinlock represented by the pointer *lock*, which is
* stored as part of a value of a map. Taking the lock allows to
@@ -2531,7 +2668,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ * long bpf_spin_unlock(struct bpf_spin_lock *lock)
* Description
* Release the *lock* previously locked by a call to
* **bpf_spin_lock**\ (\ *lock*\ ).
@@ -2554,7 +2691,7 @@ union bpf_attr {
* A **struct bpf_tcp_sock** pointer on success, or **NULL** in
* case of failure.
*
- * int bpf_skb_ecn_set_ce(struct sk_buff *skb)
+ * long bpf_skb_ecn_set_ce(struct sk_buff *skb)
* Description
* Set ECN (Explicit Congestion Notification) field of IP header
* to **CE** (Congestion Encountered) if current value is **ECT**
@@ -2591,7 +2728,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK for
* the listening socket in *sk*.
@@ -2602,12 +2739,11 @@ union bpf_attr {
*
* *th* points to the start of the TCP header, while *th_len*
* contains **sizeof**\ (**struct tcphdr**).
- *
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise.
*
- * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
* Description
* Get name of sysctl in /proc/sys/ and copy it into provided by
* program buffer *buf* of size *buf_len*.
@@ -2623,7 +2759,7 @@ union bpf_attr {
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
- * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get current value of sysctl as it is presented in /proc/sys
* (incl. newline, etc), and copy it as a string into provided
@@ -2642,7 +2778,7 @@ union bpf_attr {
* **-EINVAL** if current value was unavailable, e.g. because
* sysctl is uninitialized and read returns -EIO for it.
*
- * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get new value being written by user space to sysctl (before
* the actual write happens) and copy it as a string into
@@ -2659,7 +2795,7 @@ union bpf_attr {
*
* **-EINVAL** if sysctl is being read.
*
- * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
* Description
* Override new value being written by user space to sysctl with
* value provided by program in buffer *buf* of size *buf_len*.
@@ -2676,7 +2812,7 @@ union bpf_attr {
*
* **-EINVAL** if sysctl is being read.
*
- * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to a long integer according to the given base
@@ -2700,7 +2836,7 @@ union bpf_attr {
*
* **-ERANGE** if resulting value was out of range.
*
- * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to an unsigned long integer according to the
@@ -2723,7 +2859,7 @@ union bpf_attr {
*
* **-ERANGE** if resulting value was out of range.
*
- * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
* Description
* Get a bpf-local-storage from a *sk*.
*
@@ -2739,6 +2875,9 @@ union bpf_attr {
* "type". The bpf-local-storage "type" (i.e. the *map*) is
* searched against all bpf-local-storages residing at *sk*.
*
+ * *sk* is a kernel **struct sock** pointer for LSM program.
+ * *sk* is a **struct bpf_sock** pointer for other program types.
+ *
* An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
* used such that a new bpf-local-storage will be
* created if one does not exist. *value* can be used
@@ -2751,15 +2890,16 @@ union bpf_attr {
* **NULL** if not found or there was an error in adding
* a new bpf-local-storage.
*
- * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
* Description
* Delete a bpf-local-storage from a *sk*.
* Return
* 0 on success.
*
* **-ENOENT** if the bpf-local-storage cannot be found.
+ * **-EINVAL** if sk is not a fullsock (e.g. a request_sock).
*
- * int bpf_send_signal(u32 sig)
+ * long bpf_send_signal(u32 sig)
* Description
* Send signal *sig* to the process of the current task.
* The signal may be delivered to any of this process's threads.
@@ -2774,7 +2914,7 @@ union bpf_attr {
*
* **-EAGAIN** if bpf program can try again.
*
- * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
@@ -2785,7 +2925,6 @@ union bpf_attr {
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header.
- *
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
@@ -2801,7 +2940,7 @@ union bpf_attr {
*
* **-EPROTONOSUPPORT** IP packet version is not 4 or 6
*
- * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -2825,21 +2964,21 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from user space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from kernel space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe user address
* *unsafe_ptr* to *dst*. The *size* should include the
@@ -2868,7 +3007,7 @@ union bpf_attr {
* // size, after checking its boundaries.
* }
*
- * In comparison, using **bpf_probe_read_user()** helper here
+ * In comparison, using **bpf_probe_read_user**\ () helper here
* instead to read the string would require to estimate the length
* at compile time, and would often result in copying more memory
* than necessary.
@@ -2883,22 +3022,22 @@ union bpf_attr {
* including the trailing NUL character. On error, a negative
* value.
*
- * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
- * to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
* Return
- * On success, the strictly positive length of the string, including
+ * On success, the strictly positive length of the string, including
* the trailing NUL character. On error, a negative value.
*
- * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
+ * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
* Description
- * Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
+ * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
* *rcv_nxt* is the ack_seq to be sent out.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_send_signal_thread(u32 sig)
+ * long bpf_send_signal_thread(u32 sig)
* Description
* Send signal *sig* to the thread corresponding to the current task.
* Return
@@ -2918,38 +3057,38 @@ union bpf_attr {
* Return
* The 64 bit jiffies
*
- * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
* Description
* For an eBPF program attached to a perf event, retrieve the
- * branch records (struct perf_branch_entry) associated to *ctx*
- * and store it in the buffer pointed by *buf* up to size
+ * branch records (**struct perf_branch_entry**) associated to *ctx*
+ * and store it in the buffer pointed by *buf* up to size
* *size* bytes.
* Return
* On success, number of bytes written to *buf*. On error, a
* negative value.
*
* The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
- * instead return the number of bytes required to store all the
+ * instead return the number of bytes required to store all the
* branch entries. If this flag is set, *buf* may be NULL.
*
* **-EINVAL** if arguments invalid or **size** not a multiple
- * of sizeof(struct perf_branch_entry).
+ * of **sizeof**\ (**struct perf_branch_entry**\ ).
*
* **-ENOENT** if architecture does not support branch records.
*
- * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
* Description
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
* *namespace* will be returned in *nsdata*.
- *
- * On failure, the returned value is one of the following:
+ * Return
+ * 0 on success, or one of the following in case of failure:
*
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
*
* **-ENOENT** if pidns does not exists for the current task.
*
- * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -2981,8 +3120,8 @@ union bpf_attr {
* a global identifier that can be assumed unique. If *ctx* is
* NULL, then the helper returns the cookie for the initial
* network namespace. The cookie itself is very similar to that
- * of bpf_get_socket_cookie() helper, but for network namespaces
- * instead of sockets.
+ * of **bpf_get_socket_cookie**\ () helper, but for network
+ * namespaces instead of sockets.
* Return
* A 8-byte long opaque number.
*
@@ -3004,8 +3143,12 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
* Description
+ * Helper is overloaded depending on BPF program type. This
+ * description applies to **BPF_PROG_TYPE_SCHED_CLS** and
+ * **BPF_PROG_TYPE_SCHED_ACT** programs.
+ *
* Assign the *sk* to the *skb*. When combined with appropriate
* routing configuration to receive the packet towards the socket,
* will cause *skb* to be delivered to the specified socket.
@@ -3017,14 +3160,588 @@ union bpf_attr {
*
* The *flags* argument must be zero.
* Return
+ * 0 on success, or a negative error in case of failure:
+ *
+ * **-EINVAL** if specified *flags* are not supported.
+ *
+ * **-ENOENT** if the socket is unavailable for assignment.
+ *
+ * **-ENETUNREACH** if the socket is unreachable (wrong netns).
+ *
+ * **-EOPNOTSUPP** if the operation is not supported, for example
+ * a call from outside of TC ingress.
+ *
+ * **-ESOCKTNOSUPPORT** if the socket type is not supported
+ * (reuseport).
+ *
+ * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
+ * Description
+ * Helper is overloaded depending on BPF program type. This
+ * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs.
+ *
+ * Select the *sk* as a result of a socket lookup.
+ *
+ * For the operation to succeed passed socket must be compatible
+ * with the packet description provided by the *ctx* object.
+ *
+ * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must
+ * be an exact match. While IP family (**AF_INET** or
+ * **AF_INET6**) must be compatible, that is IPv6 sockets
+ * that are not v6-only can be selected for IPv4 packets.
+ *
+ * Only TCP listeners and UDP unconnected sockets can be
+ * selected. *sk* can also be NULL to reset any previous
+ * selection.
+ *
+ * *flags* argument can combination of following values:
+ *
+ * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous
+ * socket selection, potentially done by a BPF program
+ * that ran before us.
+ *
+ * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip
+ * load-balancing within reuseport group for the socket
+ * being selected.
+ *
+ * On success *ctx->sk* will point to the selected socket.
+ *
+ * Return
* 0 on success, or a negative errno in case of failure.
*
- * * **-EINVAL** Unsupported flags specified.
- * * **-ENOENT** Socket is unavailable for assignment.
- * * **-ENETUNREACH** Socket is unreachable (wrong netns).
- * * **-EOPNOTSUPP** Unsupported operation, for example a
- * call from outside of TC ingress.
- * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
+ * * **-EAFNOSUPPORT** if socket family (*sk->family*) is
+ * not compatible with packet family (*ctx->family*).
+ *
+ * * **-EEXIST** if socket has been already selected,
+ * potentially by another program, and
+ * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified.
+ *
+ * * **-EINVAL** if unsupported flags were specified.
+ *
+ * * **-EPROTOTYPE** if socket L4 protocol
+ * (*sk->protocol*) doesn't match packet protocol
+ * (*ctx->protocol*).
+ *
+ * * **-ESOCKTNOSUPPORT** if socket is not in allowed
+ * state (TCP listening or UDP unconnected).
+ *
+ * u64 bpf_ktime_get_boot_ns(void)
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Does include the time the system was suspended.
+ * See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
+ * Return
+ * Current *ktime*.
+ *
+ * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * Description
+ * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
+ * out the format string.
+ * The *m* represents the seq_file. The *fmt* and *fmt_size* are for
+ * the format string itself. The *data* and *data_len* are format string
+ * arguments. The *data* are a **u64** array and corresponding format string
+ * values are stored in the array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data* array.
+ * The *data_len* is the size of *data* in bytes.
+ *
+ * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
+ * Reading kernel memory may fail due to either invalid address or
+ * valid address but requiring a major memory fault. If reading kernel memory
+ * fails, the string for **%s** will be an empty string, and the ip
+ * address for **%p{i,I}{4,6}** will be 0. Not returning error to
+ * bpf program is consistent with what **bpf_trace_printk**\ () does for now.
+ * Return
+ * 0 on success, or a negative error in case of failure:
+ *
+ * **-EBUSY** if per-CPU memory copy buffer is busy, can try again
+ * by returning 1 from bpf program.
+ *
+ * **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
+ *
+ * **-E2BIG** if *fmt* contains too many format specifiers.
+ *
+ * **-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * long bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * Description
+ * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
+ * The *m* represents the seq_file. The *data* and *len* represent the
+ * data to write in bytes.
+ * Return
+ * 0 on success, or a negative error in case of failure:
+ *
+ * **-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * u64 bpf_sk_cgroup_id(void *sk)
+ * Description
+ * Return the cgroup v2 id of the socket *sk*.
+ *
+ * *sk* must be a non-**NULL** pointer to a socket, e.g. one
+ * returned from **bpf_sk_lookup_xxx**\ (),
+ * **bpf_sk_fullsock**\ (), etc. The format of returned id is
+ * same as in **bpf_skb_cgroup_id**\ ().
+ *
+ * This helper is available only if the kernel was compiled with
+ * the **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *sk* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *sk*, then return value will be same as that
+ * of **bpf_sk_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *sk*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_sk_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ * Description
+ * Copy *size* bytes from *data* into a ring buffer *ringbuf*.
+ * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
+ * of new data availability is sent.
+ * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
+ * of new data availability is sent unconditionally.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
+ * Description
+ * Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * Return
+ * Valid pointer with *size* bytes of memory available; NULL,
+ * otherwise.
+ *
+ * void bpf_ringbuf_submit(void *data, u64 flags)
+ * Description
+ * Submit reserved ring buffer sample, pointed to by *data*.
+ * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
+ * of new data availability is sent.
+ * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
+ * of new data availability is sent unconditionally.
+ * Return
+ * Nothing. Always succeeds.
+ *
+ * void bpf_ringbuf_discard(void *data, u64 flags)
+ * Description
+ * Discard reserved ring buffer sample, pointed to by *data*.
+ * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
+ * of new data availability is sent.
+ * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
+ * of new data availability is sent unconditionally.
+ * Return
+ * Nothing. Always succeeds.
+ *
+ * u64 bpf_ringbuf_query(void *ringbuf, u64 flags)
+ * Description
+ * Query various characteristics of provided ring buffer. What
+ * exactly is queries is determined by *flags*:
+ *
+ * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed.
+ * * **BPF_RB_RING_SIZE**: The size of ring buffer.
+ * * **BPF_RB_CONS_POS**: Consumer position (can wrap around).
+ * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around).
+ *
+ * Data returned is just a momentary snapshot of actual values
+ * and could be inaccurate, so this facility should be used to
+ * power heuristics and for reporting, not to make 100% correct
+ * calculation.
+ * Return
+ * Requested value, or 0, if *flags* are not recognized.
+ *
+ * long bpf_csum_level(struct sk_buff *skb, u64 level)
+ * Description
+ * Change the skbs checksum level by one layer up or down, or
+ * reset it entirely to none in order to have the stack perform
+ * checksum validation. The level is applicable to the following
+ * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+ * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+ * through **bpf_skb_adjust_room**\ () helper with passing in
+ * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
+ * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
+ * the UDP header is removed. Similarly, an encap of the latter
+ * into the former could be accompanied by a helper call to
+ * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
+ * skb is still intended to be processed in higher layers of the
+ * stack instead of just egressing at tc.
+ *
+ * There are three supported level settings at this time:
+ *
+ * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
+ * with CHECKSUM_UNNECESSARY.
+ * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
+ * with CHECKSUM_UNNECESSARY.
+ * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
+ * sets CHECKSUM_NONE to force checksum validation by the stack.
+ * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
+ * skb->csum_level.
+ * Return
+ * 0 on success, or a negative error in case of failure. In the
+ * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
+ * is returned or the error code -EACCES in case the skb is not
+ * subject to CHECKSUM_UNNECESSARY.
+ *
+ * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or **NULL** otherwise.
+ *
+ * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or **NULL** otherwise.
+ *
+ * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or **NULL** otherwise.
+ *
+ * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or **NULL** otherwise.
+ *
+ * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or **NULL** otherwise.
+ *
+ * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
+ * Description
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *task*, which is a valid
+ * pointer to **struct task_struct**. To store the stacktrace, the
+ * bpf program provides *buf* with a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_task_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ *
+ * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
+ * Description
+ * Load header option. Support reading a particular TCP header
+ * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**).
+ *
+ * If *flags* is 0, it will search the option from the
+ * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops**
+ * has details on what skb_data contains under different
+ * *skops*\ **->op**.
+ *
+ * The first byte of the *searchby_res* specifies the
+ * kind that it wants to search.
+ *
+ * If the searching kind is an experimental kind
+ * (i.e. 253 or 254 according to RFC6994). It also
+ * needs to specify the "magic" which is either
+ * 2 bytes or 4 bytes. It then also needs to
+ * specify the size of the magic by using
+ * the 2nd byte which is "kind-length" of a TCP
+ * header option and the "kind-length" also
+ * includes the first 2 bytes "kind" and "kind-length"
+ * itself as a normal TCP header option also does.
+ *
+ * For example, to search experimental kind 254 with
+ * 2 byte magic 0xeB9F, the searchby_res should be
+ * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
+ *
+ * To search for the standard window scale option (3),
+ * the *searchby_res* should be [ 3, 0, 0, .... 0 ].
+ * Note, kind-length must be 0 for regular option.
+ *
+ * Searching for No-Op (0) and End-of-Option-List (1) are
+ * not supported.
+ *
+ * *len* must be at least 2 bytes which is the minimal size
+ * of a header option.
+ *
+ * Supported flags:
+ *
+ * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
+ * saved_syn packet or the just-received syn packet.
+ *
+ * Return
+ * > 0 when found, the header option is copied to *searchby_res*.
+ * The return value is the total length copied. On failure, a
+ * negative error code is returned:
+ *
+ * **-EINVAL** if a parameter is invalid.
+ *
+ * **-ENOMSG** if the option is not found.
+ *
+ * **-ENOENT** if no syn packet is available when
+ * **BPF_LOAD_HDR_OPT_TCP_SYN** is used.
+ *
+ * **-ENOSPC** if there is not enough space. Only *len* number of
+ * bytes are copied.
+ *
+ * **-EFAULT** on failure to parse the header options in the
+ * packet.
+ *
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
+ *
+ * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
+ * Description
+ * Store header option. The data will be copied
+ * from buffer *from* with length *len* to the TCP header.
+ *
+ * The buffer *from* should have the whole option that
+ * includes the kind, kind-length, and the actual
+ * option data. The *len* must be at least kind-length
+ * long. The kind-length does not have to be 4 byte
+ * aligned. The kernel will take care of the padding
+ * and setting the 4 bytes aligned value to th->doff.
+ *
+ * This helper will check for duplicated option
+ * by searching the same option in the outgoing skb.
+ *
+ * This helper can only be called during
+ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
+ *
+ * Return
+ * 0 on success, or negative error in case of failure:
+ *
+ * **-EINVAL** If param is invalid.
+ *
+ * **-ENOSPC** if there is not enough space in the header.
+ * Nothing has been written
+ *
+ * **-EEXIST** if the option already exists.
+ *
+ * **-EFAULT** on failrue to parse the existing header options.
+ *
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
+ *
+ * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
+ * Description
+ * Reserve *len* bytes for the bpf header option. The
+ * space will be used by **bpf_store_hdr_opt**\ () later in
+ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
+ *
+ * If **bpf_reserve_hdr_opt**\ () is called multiple times,
+ * the total number of bytes will be reserved.
+ *
+ * This helper can only be called during
+ * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**.
+ *
+ * Return
+ * 0 on success, or negative error in case of failure:
+ *
+ * **-EINVAL** if a parameter is invalid.
+ *
+ * **-ENOSPC** if there is not enough space in the header.
+ *
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
+ *
+ * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
+ * Description
+ * Get a bpf_local_storage from an *inode*.
+ *
+ * Logically, it could be thought of as getting the value from
+ * a *map* with *inode* as the **key**. From this
+ * perspective, the usage is not much different from
+ * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this
+ * helper enforces the key must be an inode and the map must also
+ * be a **BPF_MAP_TYPE_INODE_STORAGE**.
+ *
+ * Underneath, the value is stored locally at *inode* instead of
+ * the *map*. The *map* is used as the bpf-local-storage
+ * "type". The bpf-local-storage "type" (i.e. the *map*) is
+ * searched against all bpf_local_storage residing at *inode*.
+ *
+ * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ * used such that a new bpf_local_storage will be
+ * created if one does not exist. *value* can be used
+ * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ * the initial value of a bpf_local_storage. If *value* is
+ * **NULL**, the new bpf_local_storage will be zero initialized.
+ * Return
+ * A bpf_local_storage pointer is returned on success.
+ *
+ * **NULL** if not found or there was an error in adding
+ * a new bpf_local_storage.
+ *
+ * int bpf_inode_storage_delete(struct bpf_map *map, void *inode)
+ * Description
+ * Delete a bpf_local_storage from an *inode*.
+ * Return
+ * 0 on success.
+ *
+ * **-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ * Description
+ * Return full path for given **struct path** object, which
+ * needs to be the kernel BTF *path* object. The path is
+ * returned in the provided buffer *buf* of size *sz* and
+ * is zero terminated.
+ *
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* and store
+ * the data in *dst*. This is a wrapper of **copy_from_user**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags)
+ * Description
+ * Use BTF to store a string representation of *ptr*->ptr in *str*,
+ * using *ptr*->type_id. This value should specify the type
+ * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1)
+ * can be used to look up vmlinux BTF type ids. Traversing the
+ * data structure using BTF, the type information and values are
+ * stored in the first *str_size* - 1 bytes of *str*. Safe copy of
+ * the pointer data is carried out to avoid kernel crashes during
+ * operation. Smaller types can use string space on the stack;
+ * larger programs can use map data to store the string
+ * representation.
+ *
+ * The string can be subsequently shared with userspace via
+ * bpf_perf_event_output() or ring buffer interfaces.
+ * bpf_trace_printk() is to be avoided as it places too small
+ * a limit on string size to be useful.
+ *
+ * *flags* is a combination of
+ *
+ * **BTF_F_COMPACT**
+ * no formatting around type information
+ * **BTF_F_NONAME**
+ * no struct/union member names/types
+ * **BTF_F_PTR_RAW**
+ * show raw (unobfuscated) pointer values;
+ * equivalent to printk specifier %px.
+ * **BTF_F_ZERO**
+ * show zero-valued struct/union members; they
+ * are not displayed by default
+ *
+ * Return
+ * The number of bytes that were written (or would have been
+ * written if output had to be truncated due to string size),
+ * or a negative error in cases of failure.
+ *
+ * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags)
+ * Description
+ * Use BTF to write to seq_write a string representation of
+ * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf().
+ * *flags* are identical to those used for bpf_snprintf_btf.
+ * Return
+ * 0 on success or a negative error in case of failure.
+ *
+ * u64 bpf_skb_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * See **bpf_get_cgroup_classid**\ () for the main description.
+ * This helper differs from **bpf_get_cgroup_classid**\ () in that
+ * the cgroup v1 net_cls class is retrieved only from the *skb*'s
+ * associated socket instead of the current process.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*
+ * and fill in L2 addresses from neighboring subsystem. This helper
+ * is somewhat similar to **bpf_redirect**\ (), except that it
+ * populates L2 addresses as well, meaning, internally, the helper
+ * relies on the neighbor lookup for the L2 address of the nexthop.
+ *
+ * The helper will perform a FIB lookup based on the skb's
+ * networking header to get the address of the next hop, unless
+ * this is supplied by the caller in the *params* argument. The
+ * *plen* argument indicates the len of *params* and should be set
+ * to 0 if *params* is NULL.
+ *
+ * The *flags* argument is reserved and must be 0. The helper is
+ * currently only supported for tc BPF program types, and enabled
+ * for IPv4 and IPv6 protocols.
+ * Return
+ * The helper returns **TC_ACT_REDIRECT** on success or
+ * **TC_ACT_SHOT** on error.
+ *
+ * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
+ * Description
+ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ * pointer to the percpu kernel variable on *cpu*. A ksym is an
+ * extern variable decorated with '__ksym'. For ksym, there is a
+ * global var (either static or global) defined of the same name
+ * in the kernel. The ksym is percpu if the global var is percpu.
+ * The returned pointer points to the global percpu var on *cpu*.
+ *
+ * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
+ * kernel, except that bpf_per_cpu_ptr() may return NULL. This
+ * happens if *cpu* is larger than nr_cpu_ids. The caller of
+ * bpf_per_cpu_ptr() must check the returned value.
+ * Return
+ * A pointer pointing to the kernel percpu variable on *cpu*, or
+ * NULL, if *cpu* is invalid.
+ *
+ * void *bpf_this_cpu_ptr(const void *percpu_ptr)
+ * Description
+ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a
+ * pointer to the percpu kernel variable on this cpu. See the
+ * description of 'ksym' in **bpf_per_cpu_ptr**\ ().
+ *
+ * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
+ * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
+ * never return NULL.
+ * Return
+ * A pointer pointing to the kernel percpu variable on this cpu.
+ *
+ * long bpf_redirect_peer(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_redirect**\ (), except
+ * that the redirection happens to the *ifindex*' peer device and
+ * the netns switch takes place from ingress to ingress without
+ * going through the CPU's backlog queue.
+ *
+ * The *flags* argument is reserved and must be 0. The helper is
+ * currently only supported for tc BPF program types at the ingress
+ * hook and for veth device types. The peer device must reside in a
+ * different network namespace.
+ * Return
+ * The helper returns **TC_ACT_REDIRECT** on success or
+ * **TC_ACT_SHOT** on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3151,7 +3868,39 @@ union bpf_attr {
FN(xdp_output), \
FN(get_netns_cookie), \
FN(get_current_ancestor_cgroup_id), \
- FN(sk_assign),
+ FN(sk_assign), \
+ FN(ktime_get_boot_ns), \
+ FN(seq_printf), \
+ FN(seq_write), \
+ FN(sk_cgroup_id), \
+ FN(sk_ancestor_cgroup_id), \
+ FN(ringbuf_output), \
+ FN(ringbuf_reserve), \
+ FN(ringbuf_submit), \
+ FN(ringbuf_discard), \
+ FN(ringbuf_query), \
+ FN(csum_level), \
+ FN(skc_to_tcp6_sock), \
+ FN(skc_to_tcp_sock), \
+ FN(skc_to_tcp_timewait_sock), \
+ FN(skc_to_tcp_request_sock), \
+ FN(skc_to_udp6_sock), \
+ FN(get_task_stack), \
+ FN(load_hdr_opt), \
+ FN(store_hdr_opt), \
+ FN(reserve_hdr_opt), \
+ FN(inode_storage_get), \
+ FN(inode_storage_delete), \
+ FN(d_path), \
+ FN(copy_from_user), \
+ FN(snprintf_btf), \
+ FN(seq_printf_btf), \
+ FN(skb_cgroup_classid), \
+ FN(redirect_neigh), \
+ FN(bpf_per_cpu_ptr), \
+ FN(bpf_this_cpu_ptr), \
+ FN(redirect_peer), \
+ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -3228,6 +3977,14 @@ enum {
BPF_F_CURRENT_NETNS = (-1L),
};
+/* BPF_FUNC_csum_level level values. */
+enum {
+ BPF_CSUM_LEVEL_QUERY,
+ BPF_CSUM_LEVEL_INC,
+ BPF_CSUM_LEVEL_DEC,
+ BPF_CSUM_LEVEL_RESET,
+};
+
/* BPF_FUNC_skb_adjust_room flags. */
enum {
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
@@ -3235,6 +3992,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
};
enum {
@@ -3251,9 +4009,13 @@ enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};
-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<kernel_obj>_storage_get flags */
enum {
- BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+ BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0),
+ /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+ * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+ */
+ BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE,
};
/* BPF_FUNC_read_branch_records flags. */
@@ -3261,6 +4023,35 @@ enum {
BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0),
};
+/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and
+ * BPF_FUNC_bpf_ringbuf_output flags.
+ */
+enum {
+ BPF_RB_NO_WAKEUP = (1ULL << 0),
+ BPF_RB_FORCE_WAKEUP = (1ULL << 1),
+};
+
+/* BPF_FUNC_bpf_ringbuf_query flags */
+enum {
+ BPF_RB_AVAIL_DATA = 0,
+ BPF_RB_RING_SIZE = 1,
+ BPF_RB_CONS_POS = 2,
+ BPF_RB_PROD_POS = 3,
+};
+
+/* BPF ring buffer constants */
+enum {
+ BPF_RINGBUF_BUSY_BIT = (1U << 31),
+ BPF_RINGBUF_DISCARD_BIT = (1U << 30),
+ BPF_RINGBUF_HDR_SZ = 8,
+};
+
+/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */
+enum {
+ BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0),
+ BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1),
+};
+
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
@@ -3393,6 +4184,7 @@ struct bpf_sock {
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
+ __s32 rx_queue_mapping;
};
struct bpf_tcp_sock {
@@ -3486,6 +4278,34 @@ struct xdp_md {
/* Below access go through struct xdp_rxq_info */
__u32 ingress_ifindex; /* rxq->dev->ifindex */
__u32 rx_queue_index; /* rxq->queue_index */
+
+ __u32 egress_ifindex; /* txq->dev->ifindex */
+};
+
+/* DEVMAP map-value layout
+ *
+ * The struct data-layout of map-value is a configuration interface.
+ * New members can only be added to the end of this structure.
+ */
+struct bpf_devmap_val {
+ __u32 ifindex; /* device index */
+ union {
+ int fd; /* prog fd on map write */
+ __u32 id; /* prog id on map read */
+ } bpf_prog;
+};
+
+/* CPUMAP map-value layout
+ *
+ * The struct data-layout of map-value is a configuration interface.
+ * New members can only be added to the end of this structure.
+ */
+struct bpf_cpumap_val {
+ __u32 qsize; /* queue size to remote target CPU */
+ union {
+ int fd; /* prog fd on map write */
+ __u32 id; /* prog id on map read */
+ } bpf_prog;
};
enum sk_action {
@@ -3508,6 +4328,8 @@ struct sk_msg_md {
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
__u32 size; /* Total size of sk_msg */
+
+ __bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
};
struct sk_reuseport_md {
@@ -3598,9 +4420,44 @@ struct bpf_btf_info {
__u32 id;
} __attribute__((aligned(8)));
+struct bpf_link_info {
+ __u32 type;
+ __u32 id;
+ __u32 prog_id;
+ union {
+ struct {
+ __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
+ __u32 tp_name_len; /* in/out: tp_name buffer len */
+ } raw_tracepoint;
+ struct {
+ __u32 attach_type;
+ } tracing;
+ struct {
+ __u64 cgroup_id;
+ __u32 attach_type;
+ } cgroup;
+ struct {
+ __aligned_u64 target_name; /* in/out: target_name buffer ptr */
+ __u32 target_name_len; /* in/out: target_name buffer len */
+ union {
+ struct {
+ __u32 map_id;
+ } map;
+ };
+ } iter;
+ struct {
+ __u32 netns_ino;
+ __u32 attach_type;
+ } netns;
+ struct {
+ __u32 ifindex;
+ } xdp;
+ };
+} __attribute__((aligned(8)));
+
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
- * attach attach type).
+ * attach type).
*/
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
@@ -3610,7 +4467,7 @@ struct bpf_sock_addr {
__u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
* Stored in network byte order.
*/
- __u32 user_port; /* Allows 4-byte read and write.
+ __u32 user_port; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order
*/
__u32 family; /* Allows 4-byte read, but no write */
@@ -3675,6 +4532,36 @@ struct bpf_sock_ops {
__u64 bytes_received;
__u64 bytes_acked;
__bpf_md_ptr(struct bpf_sock *, sk);
+ /* [skb_data, skb_data_end) covers the whole TCP header.
+ *
+ * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received
+ * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the
+ * header has not been written.
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have
+ * been written so far.
+ * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes
+ * the 3WHS.
+ * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes
+ * the 3WHS.
+ *
+ * bpf_load_hdr_opt() can also be used to read a particular option.
+ */
+ __bpf_md_ptr(void *, skb_data);
+ __bpf_md_ptr(void *, skb_data_end);
+ __u32 skb_len; /* The total length of a packet.
+ * It includes the header, options,
+ * and payload.
+ */
+ __u32 skb_tcp_flags; /* tcp_flags of the header. It provides
+ * an easy way to check for tcp_flags
+ * without parsing skb_data.
+ *
+ * In particular, the skb_tcp_flags
+ * will still be available in
+ * BPF_SOCK_OPS_HDR_OPT_LEN even though
+ * the outgoing header has not
+ * been written yet.
+ */
};
/* Definitions for bpf_sock_ops_cb_flags */
@@ -3683,8 +4570,51 @@ enum {
BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1),
BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2),
BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3),
+ /* Call bpf for all received TCP headers. The bpf prog will be
+ * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ * for the header option related helpers that will be useful
+ * to the bpf programs.
+ *
+ * It could be used at the client/active side (i.e. connect() side)
+ * when the server told it that the server was in syncookie
+ * mode and required the active side to resend the bpf-written
+ * options. The active side can keep writing the bpf-options until
+ * it received a valid packet from the server side to confirm
+ * the earlier packet (and options) has been received. The later
+ * example patch is using it like this at the active side when the
+ * server is in syncookie mode.
+ *
+ * The bpf prog will usually turn this off in the common cases.
+ */
+ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4),
+ /* Call bpf when kernel has received a header option that
+ * the kernel cannot handle. The bpf prog will be called under
+ * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB.
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB
+ * for the header option related helpers that will be useful
+ * to the bpf programs.
+ */
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5),
+ /* Call bpf when the kernel is writing header options for the
+ * outgoing packet. The bpf prog will first be called
+ * to reserve space in a skb under
+ * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then
+ * the bpf prog will be called to write the header option(s)
+ * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ *
+ * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB
+ * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option
+ * related helpers that will be useful to the bpf programs.
+ *
+ * The kernel gets its chance to reserve space and write
+ * options first before the BPF program does.
+ */
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6),
/* Mask of all currently supported cb flags */
- BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF,
+ BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
};
/* List of known BPF sock_ops operators.
@@ -3740,6 +4670,63 @@ enum {
*/
BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
*/
+ BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option.
+ * It will be called to handle
+ * the packets received at
+ * an already established
+ * connection.
+ *
+ * sock_ops->skb_data:
+ * Referring to the received skb.
+ * It covers the TCP header only.
+ *
+ * bpf_load_hdr_opt() can also
+ * be used to search for a
+ * particular option.
+ */
+ BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the
+ * header option later in
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ * Arg1: bool want_cookie. (in
+ * writing SYNACK only)
+ *
+ * sock_ops->skb_data:
+ * Not available because no header has
+ * been written yet.
+ *
+ * sock_ops->skb_tcp_flags:
+ * The tcp_flags of the
+ * outgoing skb. (e.g. SYN, ACK, FIN).
+ *
+ * bpf_reserve_hdr_opt() should
+ * be used to reserve space.
+ */
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options
+ * Arg1: bool want_cookie. (in
+ * writing SYNACK only)
+ *
+ * sock_ops->skb_data:
+ * Referring to the outgoing skb.
+ * It covers the TCP header
+ * that has already been written
+ * by the kernel and the
+ * earlier bpf-progs.
+ *
+ * sock_ops->skb_tcp_flags:
+ * The tcp_flags of the outgoing
+ * skb. (e.g. SYN, ACK, FIN).
+ *
+ * bpf_store_hdr_opt() should
+ * be used to write the
+ * option.
+ *
+ * bpf_load_hdr_opt() can also
+ * be used to search for a
+ * particular option that
+ * has already been written
+ * by the kernel or the
+ * earlier bpf-progs.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -3767,6 +4754,63 @@ enum {
enum {
TCP_BPF_IW = 1001, /* Set TCP initial congestion window */
TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */
+ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */
+ TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */
+ /* Copy the SYN pkt to optval
+ *
+ * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the
+ * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit
+ * to only getting from the saved_syn. It can either get the
+ * syn packet from:
+ *
+ * 1. the just-received SYN packet (only available when writing the
+ * SYNACK). It will be useful when it is not necessary to
+ * save the SYN packet for latter use. It is also the only way
+ * to get the SYN during syncookie mode because the syn
+ * packet cannot be saved during syncookie.
+ *
+ * OR
+ *
+ * 2. the earlier saved syn which was done by
+ * bpf_setsockopt(TCP_SAVE_SYN).
+ *
+ * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the
+ * SYN packet is obtained.
+ *
+ * If the bpf-prog does not need the IP[46] header, the
+ * bpf-prog can avoid parsing the IP header by using
+ * TCP_BPF_SYN. Otherwise, the bpf-prog can get both
+ * IP[46] and TCP header by using TCP_BPF_SYN_IP.
+ *
+ * >0: Total number of bytes copied
+ * -ENOSPC: Not enough space in optval. Only optlen number of
+ * bytes is copied.
+ * -ENOENT: The SYN skb is not available now and the earlier SYN pkt
+ * is not saved by setsockopt(TCP_SAVE_SYN).
+ */
+ TCP_BPF_SYN = 1005, /* Copy the TCP header */
+ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
+ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+};
+
+enum {
+ BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0),
+};
+
+/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and
+ * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ */
+enum {
+ BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the
+ * total option spaces
+ * required for an established
+ * sk in order to calculate the
+ * MSS. No skb is actually
+ * sent.
+ */
+ BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode
+ * when sending a SYN.
+ */
};
struct bpf_perf_event_value {
@@ -3866,6 +4910,16 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */
};
+struct bpf_redir_neigh {
+ /* network family for lookup (AF_INET, AF_INET6) */
+ __u32 nh_family;
+ /* network address of nexthop; skips fib lookup to find gateway */
+ union {
+ __be32 ipv4_nh;
+ __u32 ipv6_nh[4]; /* in6_addr; network order */
+ };
+};
+
enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */
@@ -3949,4 +5003,49 @@ struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
+
+/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
+struct bpf_sk_lookup {
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+
+ __u32 family; /* Protocol family (AF_INET, AF_INET6) */
+ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+ __u32 remote_ip4; /* Network byte order */
+ __u32 remote_ip6[4]; /* Network byte order */
+ __u32 remote_port; /* Network byte order */
+ __u32 local_ip4; /* Network byte order */
+ __u32 local_ip6[4]; /* Network byte order */
+ __u32 local_port; /* Host byte order */
+};
+
+/*
+ * struct btf_ptr is used for typed pointer representation; the
+ * type id is used to render the pointer data as the appropriate type
+ * via the bpf_snprintf_btf() helper described above. A flags field -
+ * potentially to specify additional details about the BTF pointer
+ * (rather than its mode of display) - is included for future use.
+ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately.
+ */
+struct btf_ptr {
+ void *ptr;
+ __u32 type_id;
+ __u32 flags; /* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_snprintf_btf() behaviour.
+ * - BTF_F_COMPACT: no formatting around type information
+ * - BTF_F_NONAME: no struct/union member names/types
+ * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ * equivalent to %px.
+ * - BTF_F_ZERO: show zero-valued struct/union members; they
+ * are not displayed by default
+ */
+enum {
+ BTF_F_COMPACT = (1ULL << 0),
+ BTF_F_NONAME = (1ULL << 1),
+ BTF_F_PTR_RAW = (1ULL << 2),
+ BTF_F_ZERO = (1ULL << 3),
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index ca88b7bce553..2f86b2ad6d7e 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -84,10 +84,20 @@
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
+/*
+ * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
+ * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
+ * unlinkat. The two functions do completely different things and therefore,
+ * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
+ * faccessat would be undefined behavior and thus treating it equivalent to
+ * AT_EACCESS is valid undefined behavior.
+ */
#define AT_FDCWD -100 /* Special value used to indicate
openat should use the current
working directory. */
#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
+#define AT_EACCESS 0x200 /* Test access permitted for
+ effective IDs, not real IDs. */
#define AT_REMOVEDIR 0x200 /* Remove directory instead of
unlinking file. */
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
diff --git a/tools/include/uapi/linux/filter.h b/tools/include/uapi/linux/filter.h
new file mode 100644
index 000000000000..eaef459e7bd4
--- /dev/null
+++ b/tools/include/uapi/linux/filter.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Linux Socket Filter Data Structures
+ */
+
+#ifndef __LINUX_FILTER_H__
+#define __LINUX_FILTER_H__
+
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/*
+ * Current version of the filter code architecture.
+ */
+#define BPF_MAJOR_VERSION 1
+#define BPF_MINOR_VERSION 1
+
+/*
+ * Try and keep these values and structures similar to BSD, especially
+ * the BPF code definitions which need to match so you can share filters
+ */
+
+struct sock_filter { /* Filter block */
+ __u16 code; /* Actual filter code */
+ __u8 jt; /* Jump true */
+ __u8 jf; /* Jump false */
+ __u32 k; /* Generic multiuse field */
+};
+
+struct sock_fprog { /* Required for SO_ATTACH_FILTER. */
+ unsigned short len; /* Number of filter blocks */
+ struct sock_filter *filter;
+};
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code) ((code) & 0x18)
+#define BPF_A 0x10
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define BPF_TAX 0x00
+#define BPF_TXA 0x80
+
+/*
+ * Macros for filter block array initializers.
+ */
+#ifndef BPF_STMT
+#define BPF_STMT(code, k) { (unsigned short)(code), 0, 0, k }
+#endif
+#ifndef BPF_JUMP
+#define BPF_JUMP(code, k, jt, jf) { (unsigned short)(code), jt, jf, k }
+#endif
+
+/*
+ * Number of scratch memory words for: BPF_ST and BPF_STX
+ */
+#define BPF_MEMWORDS 16
+
+/* RATIONALE. Negative offsets are invalid in BPF.
+ We use them to reference ancillary data.
+ Unlike introduction new instructions, it does not break
+ existing compilers/optimizers.
+ */
+#define SKF_AD_OFF (-0x1000)
+#define SKF_AD_PROTOCOL 0
+#define SKF_AD_PKTTYPE 4
+#define SKF_AD_IFINDEX 8
+#define SKF_AD_NLATTR 12
+#define SKF_AD_NLATTR_NEST 16
+#define SKF_AD_MARK 20
+#define SKF_AD_QUEUE 24
+#define SKF_AD_HATYPE 28
+#define SKF_AD_RXHASH 32
+#define SKF_AD_CPU 36
+#define SKF_AD_ALU_XOR_X 40
+#define SKF_AD_VLAN_TAG 44
+#define SKF_AD_VLAN_TAG_PRESENT 48
+#define SKF_AD_PAY_OFFSET 52
+#define SKF_AD_RANDOM 56
+#define SKF_AD_VLAN_TPID 60
+#define SKF_AD_MAX 64
+
+#define SKF_NET_OFF (-0x100000)
+#define SKF_LL_OFF (-0x200000)
+
+#define BPF_NET_OFF SKF_NET_OFF
+#define BPF_LL_OFF SKF_LL_OFF
+
+#endif /* __LINUX_FILTER_H__ */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index 379a612f8f1d..f44eb0a04afd 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -262,6 +262,7 @@ struct fsxattr {
#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#define FS_DAX_FL 0x02000000 /* Inode is DAX */
#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index a10e3cdc2839..7875709ccfeb 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -19,7 +19,8 @@
#define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03
#define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04
#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08
-#define FSCRYPT_POLICY_FLAGS_VALID 0x0F
+#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32 0x10
+#define FSCRYPT_POLICY_FLAGS_VALID 0x1F
/* Encryption algorithms */
#define FSCRYPT_MODE_AES_256_XTS 1
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index ca6665ea758a..781e482dc499 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -343,6 +343,8 @@ enum {
IFLA_BRPORT_NEIGH_SUPPRESS,
IFLA_BRPORT_ISOLATED,
IFLA_BRPORT_BACKUP_PORT,
+ IFLA_BRPORT_MRP_RING_OPEN,
+ IFLA_BRPORT_MRP_IN_OPEN,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index be328c59389d..a78a8096f4ce 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -73,9 +73,12 @@ struct xdp_umem_reg {
};
struct xdp_statistics {
- __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+ __u64 rx_dropped; /* Dropped for other reasons */
__u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+ __u64 rx_ring_full; /* Dropped due to rx ring being full */
+ __u64 rx_fill_ring_empty_descs; /* Failed to retrieve item from fill ring */
+ __u64 tx_ring_empty_descs; /* Failed to retrieve item from tx ring */
};
struct xdp_options {
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 8533bf07450f..7d6687618d80 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -123,6 +123,7 @@ struct in_addr {
#define IP_CHECKSUM 23
#define IP_BIND_ADDRESS_NO_PORT 24
#define IP_RECVFRAGSIZE 25
+#define IP_RECVERR_RFC4884 26
/* IP_MTU_DISCOVER values */
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
@@ -134,7 +135,7 @@ struct in_addr {
* this socket to prevent accepting spoofed ones.
*/
#define IP_PMTUDISC_INTERFACE 4
-/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
+/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get
* fragmented if they exeed the interface mtu
*/
#define IP_PMTUDISC_OMIT 5
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 428c7dde6b4b..7d8eced6f459 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
* ACPI gsi notion of irq.
* For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
* For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
- * For ARM: See Documentation/virt/kvm/api.txt
+ * For ARM: See Documentation/virt/kvm/api.rst
*/
union {
__u32 irq;
@@ -188,10 +188,13 @@ struct kvm_s390_cmma_log {
struct kvm_hyperv_exit {
#define KVM_EXIT_HYPERV_SYNIC 1
#define KVM_EXIT_HYPERV_HCALL 2
+#define KVM_EXIT_HYPERV_SYNDBG 3
__u32 type;
+ __u32 pad1;
union {
struct {
__u32 msr;
+ __u32 pad2;
__u64 control;
__u64 evt_page;
__u64 msg_page;
@@ -201,6 +204,15 @@ struct kvm_hyperv_exit {
__u64 result;
__u64 params[2];
} hcall;
+ struct {
+ __u32 msr;
+ __u32 pad2;
+ __u64 control;
+ __u64 status;
+ __u64 send_page;
+ __u64 recv_page;
+ __u64 pending_page;
+ } syndbg;
} u;
};
@@ -277,6 +289,7 @@ struct kvm_run {
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
+ __u32 cpu;
} fail_entry;
/* KVM_EXIT_EXCEPTION */
struct {
@@ -777,9 +790,10 @@ struct kvm_ppc_resize_hpt {
#define KVM_VM_PPC_HV 1
#define KVM_VM_PPC_PR 2
-/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */
-#define KVM_VM_MIPS_TE 0
+/* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */
+#define KVM_VM_MIPS_AUTO 0
#define KVM_VM_MIPS_VZ 1
+#define KVM_VM_MIPS_TE 2
#define KVM_S390_SIE_PAGE_OFFSET 1
@@ -1017,6 +1031,12 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_VCPU_RESETS 179
#define KVM_CAP_S390_PROTECTED 180
#define KVM_CAP_PPC_SECURE_GUEST 181
+#define KVM_CAP_HALT_POLL 182
+#define KVM_CAP_ASYNC_PF_INT 183
+#define KVM_CAP_LAST_CPU 184
+#define KVM_CAP_SMALLER_MAXPHYADDR 185
+#define KVM_CAP_S390_DIAG318 186
+#define KVM_CAP_STEAL_TIME 187
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1107,7 +1127,7 @@ struct kvm_xen_hvm_config {
*
* KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
* the irqfd to operate in resampling mode for level triggered interrupt
- * emulation. See Documentation/virt/kvm/api.txt.
+ * emulation. See Documentation/virt/kvm/api.rst.
*/
#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 7b2d6fc9e6ed..3e5dcdd48a49 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -383,7 +383,8 @@ struct perf_event_attr {
bpf_event : 1, /* include bpf events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
- __reserved_1 : 31;
+ text_poke : 1, /* include text poke events */
+ __reserved_1 : 30;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -532,9 +533,10 @@ struct perf_event_mmap_page {
cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */
- cap_user_time : 1, /* The time_* fields are used */
+ cap_user_time : 1, /* The time_{shift,mult,offset} fields are used */
cap_user_time_zero : 1, /* The time_zero field is used */
- cap_____res : 59;
+ cap_user_time_short : 1, /* the time_{cycle,mask} fields are used */
+ cap_____res : 58;
};
};
@@ -593,13 +595,29 @@ struct perf_event_mmap_page {
* ((rem * time_mult) >> time_shift);
*/
__u64 time_zero;
+
__u32 size; /* Header size up to __reserved[] fields. */
+ __u32 __reserved_1;
+
+ /*
+ * If cap_usr_time_short, the hardware clock is less than 64bit wide
+ * and we must compute the 'cyc' value, as used by cap_usr_time, as:
+ *
+ * cyc = time_cycles + ((cyc - time_cycles) & time_mask)
+ *
+ * NOTE: this form is explicitly chosen such that cap_usr_time_short
+ * is a correction on top of cap_usr_time, and code that doesn't
+ * know about cap_usr_time_short still works under the assumption
+ * the counter doesn't wrap.
+ */
+ __u64 time_cycles;
+ __u64 time_mask;
/*
* Hole for extension of the self monitor capabilities
*/
- __u8 __reserved[118*8+4]; /* align to 1k. */
+ __u8 __reserved[116*8]; /* align to 1k. */
/*
* Control data for the mmap() data buffer.
@@ -1024,12 +1042,35 @@ enum perf_event_type {
*/
PERF_RECORD_CGROUP = 19,
+ /*
+ * Records changes to kernel text i.e. self-modified code. 'old_len' is
+ * the number of old bytes, 'new_len' is the number of new bytes. Either
+ * 'old_len' or 'new_len' may be zero to indicate, for example, the
+ * addition or removal of a trampoline. 'bytes' contains the old bytes
+ * followed immediately by the new bytes.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 addr;
+ * u16 old_len;
+ * u16 new_len;
+ * u8 bytes[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_TEXT_POKE = 20,
+
PERF_RECORD_MAX, /* non-ABI */
};
enum perf_record_ksymbol_type {
PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
PERF_RECORD_KSYMBOL_TYPE_BPF = 1,
+ /*
+ * Out of line code such as kprobe-replaced instructions or optimized
+ * kprobes or ftrace trampolines.
+ */
+ PERF_RECORD_KSYMBOL_TYPE_OOL = 2,
PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
};
@@ -1155,7 +1196,7 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
-#define PERF_MEM_SNOOPX_SHIFT 37
+#define PERF_MEM_SNOOPX_SHIFT 38
/* locked instruction */
#define PERF_MEM_LOCK_NA 0x01 /* not available */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index ad80a5c885d5..82cc58fe9368 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -123,7 +123,10 @@ struct statx {
__u32 stx_dev_major; /* ID of device containing file [uncond] */
__u32 stx_dev_minor;
/* 0x90 */
- __u64 __spare2[14]; /* Spare space for future expansion */
+ __u64 stx_mnt_id;
+ __u64 __spare2;
+ /* 0xa0 */
+ __u64 __spare3[12]; /* Spare space for future expansion */
/* 0x100 */
};
@@ -148,9 +151,19 @@ struct statx {
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
-#define STATX_ALL 0x00000fffU /* All currently supported flags */
+#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
+
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
+#ifndef __KERNEL__
+/*
+ * This is deprecated, and shall remain the same value in the future. To avoid
+ * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
+ * instead.
+ */
+#define STATX_ALL 0x00000fffU
+#endif
+
/*
* Attributes to be found in stx_attributes and masked in stx_attributes_mask.
*
@@ -168,7 +181,9 @@ struct statx {
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
+#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
+#define STATX_ATTR_DAX 0x00002000 /* [I] File is DAX */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 9fe72e4b1373..75232185324a 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -15,6 +15,8 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+#define VHOST_FILE_UNBIND -1
+
/* ioctls */
#define VHOST_VIRTIO 0xAF
@@ -89,6 +91,8 @@
/* Use message type V2 */
#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+/* IOTLB can accept batching hints */
+#define VHOST_BACKEND_F_IOTLB_BATCH 0x2
#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
@@ -140,4 +144,6 @@
/* Get the max ring size. */
#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
+/* Set event fd for config interrupt*/
+#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
#endif
diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
index 0f257139b003..7703f0118385 100644
--- a/tools/io_uring/io_uring-bench.c
+++ b/tools/io_uring/io_uring-bench.c
@@ -130,7 +130,7 @@ static int io_uring_register_files(struct submitter *s)
s->nr_files);
}
-static int gettid(void)
+static int lk_gettid(void)
{
return syscall(__NR_gettid);
}
@@ -281,7 +281,7 @@ static void *submitter_fn(void *data)
struct io_sq_ring *ring = &s->sq_ring;
int ret, prepped;
- printf("submitter=%d\n", gettid());
+ printf("submitter=%d\n", lk_gettid());
srand48_r(pthread_self(), &s->rand);
diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
index 5f305c86b892..28a837b6069d 100644
--- a/tools/io_uring/liburing.h
+++ b/tools/io_uring/liburing.h
@@ -10,6 +10,7 @@ extern "C" {
#include <string.h>
#include "../../include/uapi/linux/io_uring.h"
#include <inttypes.h>
+#include <linux/swab.h>
#include "barrier.h"
/*
@@ -145,11 +146,14 @@ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
}
static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
- short poll_mask)
+ unsigned poll_mask)
{
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = IORING_OP_POLL_ADD;
sqe->fd = fd;
+#if __BYTE_ORDER == __BIG_ENDIAN
+ poll_mask = __swahw32(poll_mask);
+#endif
sqe->poll_events = poll_mask;
}
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index e83fc8e868f4..d199a3694be8 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -32,6 +32,7 @@ import resource
import struct
import re
import subprocess
+import signal
from collections import defaultdict, namedtuple
from functools import reduce
from datetime import datetime
@@ -228,6 +229,8 @@ IOCTL_NUMBERS = {
'RESET': 0x00002403,
}
+signal_received = False
+
ENCODING = locale.getpreferredencoding(False)
TRACE_FILTER = re.compile(r'^[^\(]*$')
@@ -1500,8 +1503,7 @@ class StdFormat(object):
def get_banner(self):
return self._banner
- @staticmethod
- def get_statline(keys, s):
+ def get_statline(self, keys, s):
res = ''
for key in keys:
res += ' %9d' % s[key].delta
@@ -1517,27 +1519,71 @@ class CSVFormat(object):
def get_banner(self):
return self._banner
- @staticmethod
- def get_statline(keys, s):
+ def get_statline(self, keys, s):
return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta),
keys, '')
def log(stats, opts, frmt, keys):
"""Prints statistics as reiterating key block, multiple value blocks."""
+ global signal_received
line = 0
banner_repeat = 20
+ f = None
+
+ def do_banner(opts):
+ nonlocal f
+ if opts.log_to_file:
+ if not f:
+ try:
+ f = open(opts.log_to_file, 'a')
+ except (IOError, OSError):
+ sys.exit("Error: Could not open file: %s" %
+ opts.log_to_file)
+ if isinstance(frmt, CSVFormat) and f.tell() != 0:
+ return
+ print(frmt.get_banner(), file=f or sys.stdout)
+
+ def do_statline(opts, values):
+ statline = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + \
+ frmt.get_statline(keys, values)
+ print(statline, file=f or sys.stdout)
+
+ do_banner(opts)
+ banner_printed = True
while True:
try:
time.sleep(opts.set_delay)
- if line % banner_repeat == 0:
- print(frmt.get_banner())
- print(datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
- frmt.get_statline(keys, stats.get()))
- line += 1
+ if signal_received:
+ banner_printed = True
+ line = 0
+ f.close()
+ do_banner(opts)
+ signal_received = False
+ if (line % banner_repeat == 0 and not banner_printed and
+ not (opts.log_to_file and isinstance(frmt, CSVFormat))):
+ do_banner(opts)
+ banner_printed = True
+ values = stats.get()
+ if (not opts.skip_zero_records or
+ any(values[k].delta != 0 for k in keys)):
+ do_statline(opts, values)
+ line += 1
+ banner_printed = False
except KeyboardInterrupt:
break
+ if opts.log_to_file:
+ f.close()
+
+
+def handle_signal(sig, frame):
+ global signal_received
+
+ signal_received = True
+
+ return
+
def is_delay_valid(delay):
"""Verify delay is in valid value range."""
@@ -1610,7 +1656,7 @@ Press any other key to refresh statistics immediately.
argparser.add_argument('-c', '--csv',
action='store_true',
default=False,
- help='log in csv format - requires option -l/--log',
+ help='log in csv format - requires option -l/-L',
)
argparser.add_argument('-d', '--debugfs',
action='store_true',
@@ -1638,6 +1684,11 @@ Press any other key to refresh statistics immediately.
default=False,
help='run in logging mode (like vmstat)',
)
+ argparser.add_argument('-L', '--log-to-file',
+ type=str,
+ metavar='FILE',
+ help="like '--log', but logging to a file"
+ )
argparser.add_argument('-p', '--pid',
type=int,
default=0,
@@ -1655,9 +1706,16 @@ Press any other key to refresh statistics immediately.
default=False,
help='retrieve statistics from tracepoints',
)
+ argparser.add_argument('-z', '--skip-zero-records',
+ action='store_true',
+ default=False,
+ help='omit records with all zeros in logging mode',
+ )
options = argparser.parse_args()
- if options.csv and not options.log:
+ if options.csv and not (options.log or options.log_to_file):
sys.exit('Error: Option -c/--csv requires -l/--log')
+ if options.skip_zero_records and not (options.log or options.log_to_file):
+ sys.exit('Error: Option -z/--skip-zero-records requires -l/-L')
try:
# verify that we were passed a valid regex up front
re.compile(options.fields)
@@ -1737,7 +1795,9 @@ def main():
sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
sys.exit(0)
- if options.log:
+ if options.log or options.log_to_file:
+ if options.log_to_file:
+ signal.signal(signal.SIGHUP, handle_signal)
keys = sorted(stats.get().keys())
if options.csv:
frmt = CSVFormat(keys)
diff --git a/tools/kvm/kvm_stat/kvm_stat.service b/tools/kvm/kvm_stat/kvm_stat.service
new file mode 100644
index 000000000000..71aabaffe779
--- /dev/null
+++ b/tools/kvm/kvm_stat/kvm_stat.service
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+[Unit]
+Description=Service that logs KVM kernel module trace events
+Before=qemu-kvm.service
+
+[Service]
+Type=simple
+ExecStart=/usr/bin/kvm_stat -dtcz -s 10 -L /var/log/kvm_stat.csv
+ExecReload=/bin/kill -HUP $MAINPID
+Restart=always
+SyslogIdentifier=kvm_stat
+SyslogLevel=debug
+
+[Install]
+WantedBy=multi-user.target
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index a97ded2aedad..feaf46451e83 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -65,8 +65,10 @@ OPTIONS
run in batch mode for one second
-c::
---csv=<file>::
- log in csv format - requires option -l/--log
+--csv::
+ log in csv format. Requires option -l/--log or -L/--log-to-file.
+ When used with option -L/--log-to-file, the header is only ever
+ written to start of file to preserve the format.
-d::
--debugfs::
@@ -92,6 +94,11 @@ OPTIONS
--log::
run in logging mode (like vmstat)
+
+-L<file>::
+--log-to-file=<file>::
+ like -l/--log, but logging to a file. Appends to existing files.
+
-p<pid>::
--pid=<pid>::
limit statistics to one virtual machine (pid)
@@ -104,6 +111,10 @@ OPTIONS
--tracepoints::
retrieve statistics from tracepoints
+*z*::
+--skip-zero-records::
+ omit records with all zeros in logging mode
+
SEE ALSO
--------
'perf'(1), 'trace-cmd'(1)
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
index 58d44d5eee31..5e6cb9debe37 100644
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -8,6 +8,7 @@
#include <poll.h>
#include <stdlib.h>
#include <unistd.h>
+#include <string.h>
void fdarray__init(struct fdarray *fda, int nr_autogrow)
{
@@ -19,7 +20,7 @@ void fdarray__init(struct fdarray *fda, int nr_autogrow)
int fdarray__grow(struct fdarray *fda, int nr)
{
- void *priv;
+ struct priv *priv;
int nr_alloc = fda->nr_alloc + nr;
size_t psize = sizeof(fda->priv[0]) * nr_alloc;
size_t size = sizeof(struct pollfd) * nr_alloc;
@@ -34,6 +35,9 @@ int fdarray__grow(struct fdarray *fda, int nr)
return -ENOMEM;
}
+ memset(&entries[fda->nr_alloc], 0, sizeof(struct pollfd) * nr);
+ memset(&priv[fda->nr_alloc], 0, sizeof(fda->priv[0]) * nr);
+
fda->nr_alloc = nr_alloc;
fda->entries = entries;
fda->priv = priv;
@@ -69,7 +73,7 @@ void fdarray__delete(struct fdarray *fda)
free(fda);
}
-int fdarray__add(struct fdarray *fda, int fd, short revents)
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags)
{
int pos = fda->nr;
@@ -79,6 +83,7 @@ int fdarray__add(struct fdarray *fda, int fd, short revents)
fda->entries[fda->nr].fd = fd;
fda->entries[fda->nr].events = revents;
+ fda->priv[fda->nr].flags = flags;
fda->nr++;
return pos;
}
@@ -93,22 +98,22 @@ int fdarray__filter(struct fdarray *fda, short revents,
return 0;
for (fd = 0; fd < fda->nr; ++fd) {
+ if (!fda->entries[fd].events)
+ continue;
+
if (fda->entries[fd].revents & revents) {
if (entry_destructor)
entry_destructor(fda, fd, arg);
+ fda->entries[fd].revents = fda->entries[fd].events = 0;
continue;
}
- if (fd != nr) {
- fda->entries[nr] = fda->entries[fd];
- fda->priv[nr] = fda->priv[fd];
- }
-
- ++nr;
+ if (!(fda->priv[fd].flags & fdarray_flag__nonfilterable))
+ ++nr;
}
- return fda->nr = nr;
+ return nr;
}
int fdarray__poll(struct fdarray *fda, int timeout)
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index b39557d1a88f..7fcf21a33c0c 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -21,19 +21,27 @@ struct fdarray {
int nr_alloc;
int nr_autogrow;
struct pollfd *entries;
- union {
- int idx;
- void *ptr;
+ struct priv {
+ union {
+ int idx;
+ void *ptr;
+ };
+ unsigned int flags;
} *priv;
};
+enum fdarray_flags {
+ fdarray_flag__default = 0x00000000,
+ fdarray_flag__nonfilterable = 0x00000001
+};
+
void fdarray__init(struct fdarray *fda, int nr_autogrow);
void fdarray__exit(struct fdarray *fda);
struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
void fdarray__delete(struct fdarray *fda);
-int fdarray__add(struct fdarray *fda, int fd, short revents);
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags);
int fdarray__poll(struct fdarray *fda, int timeout);
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 027b18f7ed8c..82f53d81a7a7 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -90,6 +90,7 @@ struct fs {
const char * const *mounts;
char path[PATH_MAX];
bool found;
+ bool checked;
long magic;
};
@@ -111,31 +112,37 @@ static struct fs fs__entries[] = {
.name = "sysfs",
.mounts = sysfs__fs_known_mountpoints,
.magic = SYSFS_MAGIC,
+ .checked = false,
},
[FS__PROCFS] = {
.name = "proc",
.mounts = procfs__known_mountpoints,
.magic = PROC_SUPER_MAGIC,
+ .checked = false,
},
[FS__DEBUGFS] = {
.name = "debugfs",
.mounts = debugfs__known_mountpoints,
.magic = DEBUGFS_MAGIC,
+ .checked = false,
},
[FS__TRACEFS] = {
.name = "tracefs",
.mounts = tracefs__known_mountpoints,
.magic = TRACEFS_MAGIC,
+ .checked = false,
},
[FS__HUGETLBFS] = {
.name = "hugetlbfs",
.mounts = hugetlbfs__known_mountpoints,
.magic = HUGETLBFS_MAGIC,
+ .checked = false,
},
[FS__BPF_FS] = {
.name = "bpf",
.mounts = bpf_fs__known_mountpoints,
.magic = BPF_FS_MAGIC,
+ .checked = false,
},
};
@@ -158,6 +165,7 @@ static bool fs__read_mounts(struct fs *fs)
}
fclose(fp);
+ fs->checked = true;
return fs->found = found;
}
@@ -220,6 +228,7 @@ static bool fs__env_override(struct fs *fs)
return false;
fs->found = true;
+ fs->checked = true;
strncpy(fs->path, override_path, sizeof(fs->path) - 1);
fs->path[sizeof(fs->path) - 1] = '\0';
return true;
@@ -246,6 +255,14 @@ static const char *fs__mountpoint(int idx)
if (fs->found)
return (const char *)fs->path;
+ /* the mount point was already checked for the mount point
+ * but and did not exist, so return NULL to avoid scanning again.
+ * This makes the found and not found paths cost equivalent
+ * in case of multiple calls.
+ */
+ if (fs->checked)
+ return NULL;
+
return fs__get_mountpoint(fs);
}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 936edb95e1f3..aa222ca30311 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -18,6 +18,18 @@
const char *name##__mount(void); \
bool name##__configured(void); \
+/*
+ * The xxxx__mountpoint() entry points find the first match mount point for each
+ * filesystems listed below, where xxxx is the filesystem type.
+ *
+ * The interface is as follows:
+ *
+ * - If a mount point is found on first call, it is cached and used for all
+ * subsequent calls.
+ *
+ * - If a mount point is not found, NULL is returned on first call and all
+ * subsequent calls.
+ */
FS(sysfs)
FS(procfs)
FS(debugfs)
diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
new file mode 100644
index 000000000000..777c20f6b604
--- /dev/null
+++ b/tools/lib/api/io.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lightweight buffered reading library.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#ifndef __API_IO__
+#define __API_IO__
+
+#include <stdlib.h>
+#include <unistd.h>
+
+struct io {
+ /* File descriptor being read/ */
+ int fd;
+ /* Size of the read buffer. */
+ unsigned int buf_len;
+ /* Pointer to storage for buffering read. */
+ char *buf;
+ /* End of the storage. */
+ char *end;
+ /* Currently accessed data pointer. */
+ char *data;
+ /* Set true on when the end of file on read error. */
+ bool eof;
+};
+
+static inline void io__init(struct io *io, int fd,
+ char *buf, unsigned int buf_len)
+{
+ io->fd = fd;
+ io->buf_len = buf_len;
+ io->buf = buf;
+ io->end = buf;
+ io->data = buf;
+ io->eof = false;
+}
+
+/* Reads one character from the "io" file with similar semantics to fgetc. */
+static inline int io__get_char(struct io *io)
+{
+ char *ptr = io->data;
+
+ if (io->eof)
+ return -1;
+
+ if (ptr == io->end) {
+ ssize_t n = read(io->fd, io->buf, io->buf_len);
+
+ if (n <= 0) {
+ io->eof = true;
+ return -1;
+ }
+ ptr = &io->buf[0];
+ io->end = &io->buf[n];
+ }
+ io->data = ptr + 1;
+ return *ptr;
+}
+
+/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
+ * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
+ * returns the character after the hexadecimal value which may be -1 for eof.
+ * If the read value is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_hex(struct io *io, __u64 *hex)
+{
+ bool first_read = true;
+
+ *hex = 0;
+ while (true) {
+ int ch = io__get_char(io);
+
+ if (ch < 0)
+ return ch;
+ if (ch >= '0' && ch <= '9')
+ *hex = (*hex << 4) | (ch - '0');
+ else if (ch >= 'a' && ch <= 'f')
+ *hex = (*hex << 4) | (ch - 'a' + 10);
+ else if (ch >= 'A' && ch <= 'F')
+ *hex = (*hex << 4) | (ch - 'A' + 10);
+ else if (first_read)
+ return -2;
+ else
+ return ch;
+ first_read = false;
+ }
+}
+
+/* Read a positive decimal value with out argument dec. If the first character
+ * isn't a decimal returns -2, io->eof returns -1, otherwise returns the
+ * character after the decimal value which may be -1 for eof. If the read value
+ * is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_dec(struct io *io, __u64 *dec)
+{
+ bool first_read = true;
+
+ *dec = 0;
+ while (true) {
+ int ch = io__get_char(io);
+
+ if (ch < 0)
+ return ch;
+ if (ch >= '0' && ch <= '9')
+ *dec = (*dec * 10) + ch - '0';
+ else if (first_read)
+ return -2;
+ else
+ return ch;
+ first_read = false;
+ }
+}
+
+#endif /* __API_IO__ */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index e3962cfbc9a6..190366d05588 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o
+ btf_dump.o ringbuf.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index aee7f1a83c77..5f9abed3e226 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -1,6 +1,9 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Most of this file is copied from tools/lib/traceevent/Makefile
+RM ?= rm
+srctree = $(abs_srctree)
+
LIBBPF_VERSION := $(shell \
grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
sort -rV | head -n1 | cut -d'_' -f2)
@@ -56,10 +59,10 @@ ifndef VERBOSE
endif
FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray
+FEATURE_TESTS = libelf zlib bpf
FEATURE_DISPLAY = libelf zlib bpf
-INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi
FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
check_feat := 1
@@ -95,27 +98,18 @@ PC_FILE = libbpf.pc
ifdef EXTRA_CFLAGS
CFLAGS := $(EXTRA_CFLAGS)
else
- CFLAGS := -g -Wall
-endif
-
-ifeq ($(feature-libelf-mmap), 1)
- override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
-endif
-
-ifeq ($(feature-reallocarray), 0)
- override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
+ CFLAGS := -g -O2
endif
# Append required CFLAGS
-override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
override CFLAGS += -Werror -Wall
-override CFLAGS += -fPIC
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
# flags specific for shared library
-SHLIB_FLAGS := -DSHARED
+SHLIB_FLAGS := -DSHARED -fPIC
ifeq ($(VERBOSE),1)
Q =
@@ -151,7 +145,8 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
sort -u | wc -l)
-VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
+VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -196,7 +191,7 @@ $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
$(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
- $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+ $(QUIET_LINK)$(RM) -f $@; $(AR) rcs $@ $^
$(OUTPUT)libbpf.pc:
$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
@@ -218,7 +213,8 @@ check_abi: $(OUTPUT)libbpf.so
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
- readelf -s --wide $(OUTPUT)libbpf.so | \
+ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
diff -u $(OUTPUT)libbpf_global_syms.tmp \
@@ -264,15 +260,15 @@ install_pkgconfig: $(PC_FILE)
$(call QUIET_INSTALL, $(PC_FILE)) \
$(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644)
-install: install_lib install_pkgconfig
+install: install_lib install_pkgconfig install_headers
### Cleaning rules
config-clean:
- $(call QUIET_CLEAN, config)
+ $(call QUIET_CLEAN, feature-detect)
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
-clean:
+clean: config-clean
$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \
*~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \
$(SHARED_OBJDIR) $(STATIC_OBJDIR) \
@@ -299,7 +295,7 @@ cscope:
cscope -b -q -I $(srctree)/include -f cscope.out
tags:
- rm -f TAGS tags
+ $(RM) -f TAGS tags
ls *.c *.h | xargs $(TAGS_PROG) -a
# Declare the contents of the .PHONY variable as phony. We keep that
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 5cc1b0785d18..d27e34133973 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -32,9 +32,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
/*
* When building perf, unistd.h is overridden. __NR_bpf is
* required to be defined explicitly.
@@ -589,19 +586,45 @@ int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts)
{
+ __u32 target_btf_id, iter_info_len;
union bpf_attr attr;
if (!OPTS_VALID(opts, bpf_link_create_opts))
return -EINVAL;
+ iter_info_len = OPTS_GET(opts, iter_info_len, 0);
+ target_btf_id = OPTS_GET(opts, target_btf_id, 0);
+
+ if (iter_info_len && target_btf_id)
+ return -EINVAL;
+
memset(&attr, 0, sizeof(attr));
attr.link_create.prog_fd = prog_fd;
attr.link_create.target_fd = target_fd;
attr.link_create.attach_type = attach_type;
+ attr.link_create.flags = OPTS_GET(opts, flags, 0);
+
+ if (iter_info_len) {
+ attr.link_create.iter_info =
+ ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+ attr.link_create.iter_info_len = iter_info_len;
+ } else if (target_btf_id) {
+ attr.link_create.target_btf_id = target_btf_id;
+ }
return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
}
+int bpf_link_detach(int link_fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_detach.link_fd = link_fd;
+
+ return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+}
+
int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts)
{
@@ -619,6 +642,16 @@ int bpf_link_update(int link_fd, int new_prog_fd,
return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
}
+int bpf_iter_create(int link_fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.iter_create.link_fd = link_fd;
+
+ return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+}
+
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
{
@@ -691,6 +724,37 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
return ret;
}
+int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
+{
+ union bpf_attr attr;
+ int ret;
+
+ if (!OPTS_VALID(opts, bpf_test_run_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.test.prog_fd = prog_fd;
+ attr.test.cpu = OPTS_GET(opts, cpu, 0);
+ attr.test.flags = OPTS_GET(opts, flags, 0);
+ attr.test.repeat = OPTS_GET(opts, repeat, 0);
+ attr.test.duration = OPTS_GET(opts, duration, 0);
+ attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0);
+ attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0);
+ attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0);
+ attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0);
+ attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL));
+ attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL));
+ attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL));
+ attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
+
+ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+ OPTS_SET(opts, data_size_out, attr.test.data_size_out);
+ OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
+ OPTS_SET(opts, duration, attr.test.duration);
+ OPTS_SET(opts, retval, attr.test.retval);
+ return ret;
+}
+
static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
{
union bpf_attr attr;
@@ -721,6 +785,11 @@ int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
}
+int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
+}
+
int bpf_prog_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
@@ -751,13 +820,23 @@ int bpf_btf_get_fd_by_id(__u32 id)
return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
}
-int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
+int bpf_link_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_id = id;
+
+ return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
{
union bpf_attr attr;
int err;
memset(&attr, 0, sizeof(attr));
- attr.info.bpf_fd = prog_fd;
+ attr.info.bpf_fd = bpf_fd;
attr.info.info_len = *info_len;
attr.info.info = ptr_to_u64(info);
@@ -779,7 +858,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
}
-int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
bool do_log)
{
union bpf_attr attr = {};
@@ -826,3 +905,29 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
return err;
}
+
+int bpf_enable_stats(enum bpf_stats_type type)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.enable_stats.type = type;
+
+ return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+}
+
+int bpf_prog_bind_map(int prog_fd, int map_fd,
+ const struct bpf_prog_bind_opts *opts)
+{
+ union bpf_attr attr;
+
+ if (!OPTS_VALID(opts, bpf_prog_bind_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_bind_map.prog_fd = prog_fd;
+ attr.prog_bind_map.map_fd = map_fd;
+ attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
+
+ return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 46d47afdd887..875dde20d56e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -168,15 +168,22 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
+union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */
struct bpf_link_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ union bpf_iter_link_info *iter_info;
+ __u32 iter_info_len;
+ __u32 target_btf_id;
};
-#define bpf_link_create_opts__last_field sz
+#define bpf_link_create_opts__last_field target_btf_id
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts);
+LIBBPF_API int bpf_link_detach(int link_fd);
+
struct bpf_link_update_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 flags; /* extra flags */
@@ -187,6 +194,8 @@ struct bpf_link_update_opts {
LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts);
+LIBBPF_API int bpf_iter_create(int link_fd);
+
struct bpf_prog_test_run_attr {
int prog_fd;
int repeat;
@@ -216,20 +225,59 @@ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
-LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
+LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
__u32 query_flags, __u32 *attach_flags,
__u32 *prog_ids, __u32 *prog_cnt);
LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
-LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
+LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
__u32 log_buf_size, bool do_log);
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
__u64 *probe_offset, __u64 *probe_addr);
+enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
+LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
+
+struct bpf_prog_bind_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+};
+#define bpf_prog_bind_opts__last_field flags
+
+LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd,
+ const struct bpf_prog_bind_opts *opts);
+
+struct bpf_test_run_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ const void *data_in; /* optional */
+ void *data_out; /* optional */
+ __u32 data_size_in;
+ __u32 data_size_out; /* in: max length of data_out
+ * out: length of data_out
+ */
+ const void *ctx_in; /* optional */
+ void *ctx_out; /* optional */
+ __u32 ctx_size_in;
+ __u32 ctx_size_out; /* in: max length of ctx_out
+ * out: length of cxt_out
+ */
+ __u32 retval; /* out: return code of the BPF program */
+ int repeat;
+ __u32 duration; /* out: average per repetition in ns */
+ __u32 flags;
+ __u32 cpu;
+};
+#define bpf_test_run_opts__last_field cpu
+
+LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
+ struct bpf_test_run_opts *opts);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 7009dc90e012..bbcefb3ff5a5 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -19,32 +19,52 @@ enum bpf_field_info_kind {
BPF_FIELD_RSHIFT_U64 = 5,
};
+/* second argument to __builtin_btf_type_id() built-in */
+enum bpf_type_id_kind {
+ BPF_TYPE_ID_LOCAL = 0, /* BTF type ID in local program */
+ BPF_TYPE_ID_TARGET = 1, /* BTF type ID in target kernel */
+};
+
+/* second argument to __builtin_preserve_type_info() built-in */
+enum bpf_type_info_kind {
+ BPF_TYPE_EXISTS = 0, /* type existence in target kernel */
+ BPF_TYPE_SIZE = 1, /* type size in target kernel */
+};
+
+/* second argument to __builtin_preserve_enum_value() built-in */
+enum bpf_enum_value_kind {
+ BPF_ENUMVAL_EXISTS = 0, /* enum value existence in kernel */
+ BPF_ENUMVAL_VALUE = 1, /* enum value value relocation */
+};
+
#define __CORE_RELO(src, field, info) \
__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
- bpf_probe_read((void *)dst, \
- __CORE_RELO(src, fld, BYTE_SIZE), \
- (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+ bpf_probe_read_kernel( \
+ (void *)dst, \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#else
/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so
* for big-endian we need to adjust destination pointer accordingly, based on
* field byte size
*/
#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
- bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
- __CORE_RELO(src, fld, BYTE_SIZE), \
- (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+ bpf_probe_read_kernel( \
+ (void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
#endif
/*
* Extract bitfield, identified by s->field, and return its value as u64.
* All this is done in relocatable manner, so bitfield changes such as
* signedness, bit size, offset changes, this will be handled automatically.
- * This version of macro is using bpf_probe_read() to read underlying integer
- * storage. Macro functions as an expression and its return type is
- * bpf_probe_read()'s return value: 0, on success, <0 on error.
+ * This version of macro is using bpf_probe_read_kernel() to read underlying
+ * integer storage. Macro functions as an expression and its return type is
+ * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error.
*/
#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \
unsigned long long val = 0; \
@@ -92,15 +112,75 @@ enum bpf_field_info_kind {
__builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
/*
- * Convenience macro to get byte size of a field. Works for integers,
+ * Convenience macro to get the byte size of a field. Works for integers,
* struct/unions, pointers, arrays, and enums.
*/
#define bpf_core_field_size(field) \
__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
/*
- * bpf_core_read() abstracts away bpf_probe_read() call and captures offset
- * relocation for source address using __builtin_preserve_access_index()
+ * Convenience macro to get BTF type ID of a specified type, using a local BTF
+ * information. Return 32-bit unsigned integer with type ID from program's own
+ * BTF. Always succeeds.
+ */
+#define bpf_core_type_id_local(type) \
+ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL)
+
+/*
+ * Convenience macro to get BTF type ID of a target kernel's type that matches
+ * specified local type.
+ * Returns:
+ * - valid 32-bit unsigned type ID in kernel BTF;
+ * - 0, if no matching type was found in a target kernel BTF.
+ */
+#define bpf_core_type_id_kernel(type) \
+ __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET)
+
+/*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) exists in a target kernel.
+ * Returns:
+ * 1, if such type is present in target kernel's BTF;
+ * 0, if no matching type is found.
+ */
+#define bpf_core_type_exists(type) \
+ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
+
+/*
+ * Convenience macro to get the byte size of a provided named type
+ * (struct/union/enum/typedef) in a target kernel.
+ * Returns:
+ * >= 0 size (in bytes), if type is present in target kernel's BTF;
+ * 0, if no matching type is found.
+ */
+#define bpf_core_type_size(type) \
+ __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE)
+
+/*
+ * Convenience macro to check that provided enumerator value is defined in
+ * a target kernel.
+ * Returns:
+ * 1, if specified enum type and its enumerator value are present in target
+ * kernel's BTF;
+ * 0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value_exists(enum_type, enum_value) \
+ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS)
+
+/*
+ * Convenience macro to get the integer value of an enumerator value in
+ * a target kernel.
+ * Returns:
+ * 64-bit value, if specified enum type and its enumerator value are
+ * present in target kernel's BTF;
+ * 0, if no matching enum and/or enum value within that enum is found.
+ */
+#define bpf_core_enum_value(enum_type, enum_value) \
+ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE)
+
+/*
+ * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures
+ * offset relocation for source address using __builtin_preserve_access_index()
* built-in, provided by Clang.
*
* __builtin_preserve_access_index() takes as an argument an expression of
@@ -115,8 +195,8 @@ enum bpf_field_info_kind {
* (local) BTF, used to record relocation.
*/
#define bpf_core_read(dst, sz, src) \
- bpf_probe_read(dst, sz, \
- (const void *)__builtin_preserve_access_index(src))
+ bpf_probe_read_kernel(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
/*
* bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
@@ -124,8 +204,8 @@ enum bpf_field_info_kind {
* argument.
*/
#define bpf_core_read_str(dst, sz, src) \
- bpf_probe_read_str(dst, sz, \
- (const void *)__builtin_preserve_access_index(src))
+ bpf_probe_read_kernel_str(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
#define ___concat(a, b) a ## b
#define ___apply(fn, n) ___concat(fn, n)
@@ -217,7 +297,7 @@ enum bpf_field_info_kind {
*/
#define BPF_CORE_READ_INTO(dst, src, a, ...) \
({ \
- ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \
+ ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \
})
/*
@@ -227,7 +307,7 @@ enum bpf_field_info_kind {
*/
#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \
({ \
- ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \
+ ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\
})
/*
@@ -239,23 +319,25 @@ enum bpf_field_info_kind {
* int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
*
* BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
- * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to:
+ * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically
+ * equivalent to:
* 1. const void *__t = s->a.b.c;
* 2. __t = __t->d.e;
* 3. __t = __t->f;
* 4. return __t->g;
*
* Equivalence is logical, because there is a heavy type casting/preservation
- * involved, as well as all the reads are happening through bpf_probe_read()
- * calls using __builtin_preserve_access_index() to emit CO-RE relocations.
+ * involved, as well as all the reads are happening through
+ * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to
+ * emit CO-RE relocations.
*
* N.B. Only up to 9 "field accessors" are supported, which should be more
* than enough for any practical purpose.
*/
#define BPF_CORE_READ(src, a, ...) \
({ \
- ___type(src, a, ##__VA_ARGS__) __r; \
- BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \
+ ___type((src), a, ##__VA_ARGS__) __r; \
+ BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \
__r; \
})
diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h
index fbe28008450f..ec9db4feca9f 100644
--- a/tools/lib/bpf/bpf_endian.h
+++ b/tools/lib/bpf/bpf_endian.h
@@ -2,8 +2,35 @@
#ifndef __BPF_ENDIAN__
#define __BPF_ENDIAN__
-#include <linux/stddef.h>
-#include <linux/swab.h>
+/*
+ * Isolate byte #n and put it into byte #m, for __u##b type.
+ * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64:
+ * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx
+ * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000
+ * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn
+ * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000
+ */
+#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8))
+
+#define ___bpf_swab16(x) ((__u16)( \
+ ___bpf_mvb(x, 16, 0, 1) | \
+ ___bpf_mvb(x, 16, 1, 0)))
+
+#define ___bpf_swab32(x) ((__u32)( \
+ ___bpf_mvb(x, 32, 0, 3) | \
+ ___bpf_mvb(x, 32, 1, 2) | \
+ ___bpf_mvb(x, 32, 2, 1) | \
+ ___bpf_mvb(x, 32, 3, 0)))
+
+#define ___bpf_swab64(x) ((__u64)( \
+ ___bpf_mvb(x, 64, 0, 7) | \
+ ___bpf_mvb(x, 64, 1, 6) | \
+ ___bpf_mvb(x, 64, 2, 5) | \
+ ___bpf_mvb(x, 64, 3, 4) | \
+ ___bpf_mvb(x, 64, 4, 3) | \
+ ___bpf_mvb(x, 64, 5, 2) | \
+ ___bpf_mvb(x, 64, 6, 1) | \
+ ___bpf_mvb(x, 64, 7, 0)))
/* LLVM's BPF target selects the endianness of the CPU
* it compiles on, or the user specifies (bpfel/bpfeb),
@@ -23,16 +50,16 @@
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define __bpf_ntohs(x) __builtin_bswap16(x)
# define __bpf_htons(x) __builtin_bswap16(x)
-# define __bpf_constant_ntohs(x) ___constant_swab16(x)
-# define __bpf_constant_htons(x) ___constant_swab16(x)
+# define __bpf_constant_ntohs(x) ___bpf_swab16(x)
+# define __bpf_constant_htons(x) ___bpf_swab16(x)
# define __bpf_ntohl(x) __builtin_bswap32(x)
# define __bpf_htonl(x) __builtin_bswap32(x)
-# define __bpf_constant_ntohl(x) ___constant_swab32(x)
-# define __bpf_constant_htonl(x) ___constant_swab32(x)
+# define __bpf_constant_ntohl(x) ___bpf_swab32(x)
+# define __bpf_constant_htonl(x) ___bpf_swab32(x)
# define __bpf_be64_to_cpu(x) __builtin_bswap64(x)
# define __bpf_cpu_to_be64(x) __builtin_bswap64(x)
-# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x)
-# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x)
+# define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x)
+# define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# define __bpf_ntohs(x) (x)
# define __bpf_htons(x) (x)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index f69cc208778a..72b251110c4d 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -2,10 +2,17 @@
#ifndef __BPF_HELPERS__
#define __BPF_HELPERS__
+/*
+ * Note that bpf programs need to include either
+ * vmlinux.h (auto-generated from BTF) or linux/types.h
+ * in advance since bpf_helper_defs.h uses such types
+ * as __u64.
+ */
#include "bpf_helper_defs.h"
#define __uint(name, val) int (*name)[val]
#define __type(name, val) typeof(val) *name
+#define __array(name, val) typeof(val) *name[]
/* Helper macro to print out debug messages */
#define bpf_printk(fmt, ...) \
@@ -25,11 +32,76 @@
#ifndef __always_inline
#define __always_inline __attribute__((always_inline))
#endif
+#ifndef __noinline
+#define __noinline __attribute__((noinline))
+#endif
#ifndef __weak
#define __weak __attribute__((weak))
#endif
/*
+ * Helper macro to manipulate data structures
+ */
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER)
+#endif
+#ifndef container_of
+#define container_of(ptr, type, member) \
+ ({ \
+ void *__mptr = (void *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+#endif
+
+/*
+ * Helper macro to throw a compilation error if __bpf_unreachable() gets
+ * built into the resulting code. This works given BPF back end does not
+ * implement __builtin_trap(). This is useful to assert that certain paths
+ * of the program code are never used and hence eliminated by the compiler.
+ *
+ * For example, consider a switch statement that covers known cases used by
+ * the program. __bpf_unreachable() can then reside in the default case. If
+ * the program gets extended such that a case is not covered in the switch
+ * statement, then it will throw a build error due to the default case not
+ * being compiled out.
+ */
+#ifndef __bpf_unreachable
+# define __bpf_unreachable() __builtin_trap()
+#endif
+
+/*
+ * Helper function to perform a tail call with a constant/immediate map slot.
+ */
+#if __clang_major__ >= 8 && defined(__bpf__)
+static __always_inline void
+bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
+{
+ if (!__builtin_constant_p(slot))
+ __bpf_unreachable();
+
+ /*
+ * Provide a hard guarantee that LLVM won't optimize setting r2 (map
+ * pointer) and r3 (constant map index) from _different paths_ ending
+ * up at the _same_ call insn as otherwise we won't be able to use the
+ * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
+ * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
+ * tracking for prog array pokes") for details on verifier tracking.
+ *
+ * Note on clobber list: we need to stay in-line with BPF calling
+ * convention, so even if we don't end up using r0, r4, r5, we need
+ * to mark them as clobber so that LLVM doesn't end up using them
+ * before / after the call.
+ */
+ asm volatile("r1 = %[ctx]\n\t"
+ "r2 = %[map]\n\t"
+ "r3 = %[slot]\n\t"
+ "call 12"
+ :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
+ : "r0", "r1", "r2", "r3", "r4", "r5");
+}
+#endif
+
+/*
* Helper structure used by eBPF C program
* to describe BPF map attributes to libbpf loader
*/
@@ -54,5 +126,6 @@ enum libbpf_tristate {
};
#define __kconfig __attribute__((section(".kconfig")))
+#define __ksym __attribute__((section(".ksyms")))
#endif
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index bafca49cb1e6..3ed1a27b5f7c 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -8,9 +8,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
struct bpf_prog_linfo {
void *raw_linfo;
void *raw_jited_linfo;
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 48a9c7c69ef1..f9ef37707888 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -215,7 +215,7 @@ struct pt_regs;
#define PT_REGS_PARM5(x) ((x)->regs[8])
#define PT_REGS_RET(x) ((x)->regs[31])
#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_RC(x) ((x)->regs[2])
#define PT_REGS_SP(x) ((x)->regs[29])
#define PT_REGS_IP(x) ((x)->cp0_epc)
@@ -226,7 +226,7 @@ struct pt_regs;
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2])
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
@@ -289,9 +289,9 @@ struct pt_regs;
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
#else
#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
- ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
- ({ bpf_probe_read(&(ip), sizeof(ip), \
+ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif
@@ -413,4 +413,20 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...) \
+ ({ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[] = { args }; \
+ _Pragma("GCC diagnostic pop") \
+ int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+ ___ret; \
+ })
+
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index bfef3d606b54..231b07203e3d 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2018 Facebook */
+#include <byteswap.h>
#include <endian.h>
#include <stdio.h>
#include <stdlib.h>
@@ -21,26 +22,79 @@
#include "libbpf_internal.h"
#include "hashmap.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#define BTF_MAX_NR_TYPES 0x7fffffffU
#define BTF_MAX_STR_OFFSET 0x7fffffffU
static struct btf_type btf_void;
struct btf {
- union {
- struct btf_header *hdr;
- void *data;
- };
- struct btf_type **types;
- const char *strings;
- void *nohdr_data;
+ /* raw BTF data in native endianness */
+ void *raw_data;
+ /* raw BTF data in non-native endianness */
+ void *raw_data_swapped;
+ __u32 raw_size;
+ /* whether target endianness differs from the native one */
+ bool swapped_endian;
+
+ /*
+ * When BTF is loaded from an ELF or raw memory it is stored
+ * in a contiguous memory block. The hdr, type_data, and, strs_data
+ * point inside that memory region to their respective parts of BTF
+ * representation:
+ *
+ * +--------------------------------+
+ * | Header | Types | Strings |
+ * +--------------------------------+
+ * ^ ^ ^
+ * | | |
+ * hdr | |
+ * types_data-+ |
+ * strs_data------------+
+ *
+ * If BTF data is later modified, e.g., due to types added or
+ * removed, BTF deduplication performed, etc, this contiguous
+ * representation is broken up into three independently allocated
+ * memory regions to be able to modify them independently.
+ * raw_data is nulled out at that point, but can be later allocated
+ * and cached again if user calls btf__get_raw_data(), at which point
+ * raw_data will contain a contiguous copy of header, types, and
+ * strings:
+ *
+ * +----------+ +---------+ +-----------+
+ * | Header | | Types | | Strings |
+ * +----------+ +---------+ +-----------+
+ * ^ ^ ^
+ * | | |
+ * hdr | |
+ * types_data----+ |
+ * strs_data------------------+
+ *
+ * +----------+---------+-----------+
+ * | Header | Types | Strings |
+ * raw_data----->+----------+---------+-----------+
+ */
+ struct btf_header *hdr;
+
+ void *types_data;
+ size_t types_data_cap; /* used size stored in hdr->type_len */
+
+ /* type ID to `struct btf_type *` lookup index */
+ __u32 *type_offs;
+ size_t type_offs_cap;
__u32 nr_types;
- __u32 types_size;
- __u32 data_size;
+
+ void *strs_data;
+ size_t strs_data_cap; /* used size stored in hdr->str_len */
+
+ /* lookup index for each unique string in strings section */
+ struct hashmap *strs_hash;
+ /* whether strings are already deduplicated */
+ bool strs_deduped;
+ /* BTF object FD, if loaded into kernel */
int fd;
+
+ /* Pointer size (in bytes) for a target architecture of this BTF */
+ int ptr_sz;
};
static inline __u64 ptr_to_u64(const void *ptr)
@@ -48,60 +102,114 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
-static int btf_add_type(struct btf *btf, struct btf_type *t)
+/* Ensure given dynamically allocated memory region pointed to by *data* with
+ * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough
+ * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements
+ * are already used. At most *max_cnt* elements can be ever allocated.
+ * If necessary, memory is reallocated and all existing data is copied over,
+ * new pointer to the memory region is stored at *data, new memory region
+ * capacity (in number of elements) is stored in *cap.
+ * On success, memory pointer to the beginning of unused memory is returned.
+ * On error, NULL is returned.
+ */
+void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt)
{
- if (btf->types_size - btf->nr_types < 2) {
- struct btf_type **new_types;
- __u32 expand_by, new_size;
+ size_t new_cnt;
+ void *new_data;
- if (btf->types_size == BTF_MAX_NR_TYPES)
- return -E2BIG;
+ if (cur_cnt + add_cnt <= *cap_cnt)
+ return *data + cur_cnt * elem_sz;
- expand_by = max(btf->types_size >> 2, 16U);
- new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
+ /* requested more than the set limit */
+ if (cur_cnt + add_cnt > max_cnt)
+ return NULL;
- new_types = realloc(btf->types, sizeof(*new_types) * new_size);
- if (!new_types)
- return -ENOMEM;
+ new_cnt = *cap_cnt;
+ new_cnt += new_cnt / 4; /* expand by 25% */
+ if (new_cnt < 16) /* but at least 16 elements */
+ new_cnt = 16;
+ if (new_cnt > max_cnt) /* but not exceeding a set limit */
+ new_cnt = max_cnt;
+ if (new_cnt < cur_cnt + add_cnt) /* also ensure we have enough memory */
+ new_cnt = cur_cnt + add_cnt;
+
+ new_data = libbpf_reallocarray(*data, new_cnt, elem_sz);
+ if (!new_data)
+ return NULL;
- if (btf->nr_types == 0)
- new_types[0] = &btf_void;
+ /* zero out newly allocated portion of memory */
+ memset(new_data + (*cap_cnt) * elem_sz, 0, (new_cnt - *cap_cnt) * elem_sz);
- btf->types = new_types;
- btf->types_size = new_size;
- }
+ *data = new_data;
+ *cap_cnt = new_cnt;
+ return new_data + cur_cnt * elem_sz;
+}
+
+/* Ensure given dynamically allocated memory region has enough allocated space
+ * to accommodate *need_cnt* elements of size *elem_sz* bytes each
+ */
+int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
+{
+ void *p;
+
+ if (need_cnt <= *cap_cnt)
+ return 0;
+
+ p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
+ if (!p)
+ return -ENOMEM;
+
+ return 0;
+}
- btf->types[++(btf->nr_types)] = t;
+static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
+{
+ __u32 *p;
+ p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
+ btf->nr_types + 1, BTF_MAX_NR_TYPES, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = type_off;
return 0;
}
+static void btf_bswap_hdr(struct btf_header *h)
+{
+ h->magic = bswap_16(h->magic);
+ h->hdr_len = bswap_32(h->hdr_len);
+ h->type_off = bswap_32(h->type_off);
+ h->type_len = bswap_32(h->type_len);
+ h->str_off = bswap_32(h->str_off);
+ h->str_len = bswap_32(h->str_len);
+}
+
static int btf_parse_hdr(struct btf *btf)
{
- const struct btf_header *hdr = btf->hdr;
+ struct btf_header *hdr = btf->hdr;
__u32 meta_left;
- if (btf->data_size < sizeof(struct btf_header)) {
+ if (btf->raw_size < sizeof(struct btf_header)) {
pr_debug("BTF header not found\n");
return -EINVAL;
}
- if (hdr->magic != BTF_MAGIC) {
+ if (hdr->magic == bswap_16(BTF_MAGIC)) {
+ btf->swapped_endian = true;
+ if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) {
+ pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n",
+ bswap_32(hdr->hdr_len));
+ return -ENOTSUP;
+ }
+ btf_bswap_hdr(hdr);
+ } else if (hdr->magic != BTF_MAGIC) {
pr_debug("Invalid BTF magic:%x\n", hdr->magic);
return -EINVAL;
}
- if (hdr->version != BTF_VERSION) {
- pr_debug("Unsupported BTF version:%u\n", hdr->version);
- return -ENOTSUP;
- }
-
- if (hdr->flags) {
- pr_debug("Unsupported BTF flags:%x\n", hdr->flags);
- return -ENOTSUP;
- }
-
- meta_left = btf->data_size - sizeof(*hdr);
+ meta_left = btf->raw_size - sizeof(*hdr);
if (!meta_left) {
pr_debug("BTF has no data\n");
return -EINVAL;
@@ -127,15 +235,13 @@ static int btf_parse_hdr(struct btf *btf)
return -EINVAL;
}
- btf->nohdr_data = btf->hdr + 1;
-
return 0;
}
static int btf_parse_str_sec(struct btf *btf)
{
const struct btf_header *hdr = btf->hdr;
- const char *start = btf->nohdr_data + hdr->str_off;
+ const char *start = btf->strs_data;
const char *end = start + btf->hdr->str_len;
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
@@ -144,14 +250,12 @@ static int btf_parse_str_sec(struct btf *btf)
return -EINVAL;
}
- btf->strings = start;
-
return 0;
}
-static int btf_type_size(struct btf_type *t)
+static int btf_type_size(const struct btf_type *t)
{
- int base_size = sizeof(struct btf_type);
+ const int base_size = sizeof(struct btf_type);
__u16 vlen = btf_vlen(t);
switch (btf_kind(t)) {
@@ -184,25 +288,120 @@ static int btf_type_size(struct btf_type *t)
}
}
+static void btf_bswap_type_base(struct btf_type *t)
+{
+ t->name_off = bswap_32(t->name_off);
+ t->info = bswap_32(t->info);
+ t->type = bswap_32(t->type);
+}
+
+static int btf_bswap_type_rest(struct btf_type *t)
+{
+ struct btf_var_secinfo *v;
+ struct btf_member *m;
+ struct btf_array *a;
+ struct btf_param *p;
+ struct btf_enum *e;
+ __u16 vlen = btf_vlen(t);
+ int i;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ return 0;
+ case BTF_KIND_INT:
+ *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
+ return 0;
+ case BTF_KIND_ENUM:
+ for (i = 0, e = btf_enum(t); i < vlen; i++, e++) {
+ e->name_off = bswap_32(e->name_off);
+ e->val = bswap_32(e->val);
+ }
+ return 0;
+ case BTF_KIND_ARRAY:
+ a = btf_array(t);
+ a->type = bswap_32(a->type);
+ a->index_type = bswap_32(a->index_type);
+ a->nelems = bswap_32(a->nelems);
+ return 0;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ for (i = 0, m = btf_members(t); i < vlen; i++, m++) {
+ m->name_off = bswap_32(m->name_off);
+ m->type = bswap_32(m->type);
+ m->offset = bswap_32(m->offset);
+ }
+ return 0;
+ case BTF_KIND_FUNC_PROTO:
+ for (i = 0, p = btf_params(t); i < vlen; i++, p++) {
+ p->name_off = bswap_32(p->name_off);
+ p->type = bswap_32(p->type);
+ }
+ return 0;
+ case BTF_KIND_VAR:
+ btf_var(t)->linkage = bswap_32(btf_var(t)->linkage);
+ return 0;
+ case BTF_KIND_DATASEC:
+ for (i = 0, v = btf_var_secinfos(t); i < vlen; i++, v++) {
+ v->type = bswap_32(v->type);
+ v->offset = bswap_32(v->offset);
+ v->size = bswap_32(v->size);
+ }
+ return 0;
+ default:
+ pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
+ return -EINVAL;
+ }
+}
+
static int btf_parse_type_sec(struct btf *btf)
{
struct btf_header *hdr = btf->hdr;
- void *nohdr_data = btf->nohdr_data;
- void *next_type = nohdr_data + hdr->type_off;
- void *end_type = nohdr_data + hdr->str_off;
+ void *next_type = btf->types_data;
+ void *end_type = next_type + hdr->type_len;
+ int err, i = 0, type_size;
+
+ /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(),
+ * so ensure we can never properly use its offset from index by
+ * setting it to a large value
+ */
+ err = btf_add_type_idx_entry(btf, UINT_MAX);
+ if (err)
+ return err;
+
+ while (next_type + sizeof(struct btf_type) <= end_type) {
+ i++;
- while (next_type < end_type) {
- struct btf_type *t = next_type;
- int type_size;
- int err;
+ if (btf->swapped_endian)
+ btf_bswap_type_base(next_type);
- type_size = btf_type_size(t);
+ type_size = btf_type_size(next_type);
if (type_size < 0)
return type_size;
- next_type += type_size;
- err = btf_add_type(btf, t);
+ if (next_type + type_size > end_type) {
+ pr_warn("BTF type [%d] is malformed\n", i);
+ return -EINVAL;
+ }
+
+ if (btf->swapped_endian && btf_bswap_type_rest(next_type))
+ return -EINVAL;
+
+ err = btf_add_type_idx_entry(btf, next_type - btf->types_data);
if (err)
return err;
+
+ next_type += type_size;
+ btf->nr_types++;
+ }
+
+ if (next_type != end_type) {
+ pr_warn("BTF types data is malformed\n");
+ return -EINVAL;
}
return 0;
@@ -213,12 +412,116 @@ __u32 btf__get_nr_types(const struct btf *btf)
return btf->nr_types;
}
+/* internal helper returning non-const pointer to a type */
+static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+{
+ if (type_id == 0)
+ return &btf_void;
+
+ return btf->types_data + btf->type_offs[type_id];
+}
+
const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
{
if (type_id > btf->nr_types)
return NULL;
+ return btf_type_by_id((struct btf *)btf, type_id);
+}
+
+static int determine_ptr_size(const struct btf *btf)
+{
+ const struct btf_type *t;
+ const char *name;
+ int i;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ t = btf__type_by_id(btf, i);
+ if (!btf_is_int(t))
+ continue;
+
+ name = btf__name_by_offset(btf, t->name_off);
+ if (!name)
+ continue;
+
+ if (strcmp(name, "long int") == 0 ||
+ strcmp(name, "long unsigned int") == 0) {
+ if (t->size != 4 && t->size != 8)
+ continue;
+ return t->size;
+ }
+ }
+
+ return -1;
+}
+
+static size_t btf_ptr_sz(const struct btf *btf)
+{
+ if (!btf->ptr_sz)
+ ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
+ return btf->ptr_sz < 0 ? sizeof(void *) : btf->ptr_sz;
+}
+
+/* Return pointer size this BTF instance assumes. The size is heuristically
+ * determined by looking for 'long' or 'unsigned long' integer type and
+ * recording its size in bytes. If BTF type information doesn't have any such
+ * type, this function returns 0. In the latter case, native architecture's
+ * pointer size is assumed, so will be either 4 or 8, depending on
+ * architecture that libbpf was compiled for. It's possible to override
+ * guessed value by using btf__set_pointer_size() API.
+ */
+size_t btf__pointer_size(const struct btf *btf)
+{
+ if (!btf->ptr_sz)
+ ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
+
+ if (btf->ptr_sz < 0)
+ /* not enough BTF type info to guess */
+ return 0;
+
+ return btf->ptr_sz;
+}
- return btf->types[type_id];
+/* Override or set pointer size in bytes. Only values of 4 and 8 are
+ * supported.
+ */
+int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
+{
+ if (ptr_sz != 4 && ptr_sz != 8)
+ return -EINVAL;
+ btf->ptr_sz = ptr_sz;
+ return 0;
+}
+
+static bool is_host_big_endian(void)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ return false;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ return true;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+}
+
+enum btf_endianness btf__endianness(const struct btf *btf)
+{
+ if (is_host_big_endian())
+ return btf->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN;
+ else
+ return btf->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN;
+}
+
+int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
+{
+ if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
+ return -EINVAL;
+
+ btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
+ if (!btf->swapped_endian) {
+ free(btf->raw_data_swapped);
+ btf->raw_data_swapped = NULL;
+ }
+ return 0;
}
static bool btf_type_is_void(const struct btf_type *t)
@@ -253,7 +556,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
size = t->size;
goto done;
case BTF_KIND_PTR:
- size = sizeof(void *);
+ size = btf_ptr_sz(btf);
goto done;
case BTF_KIND_TYPEDEF:
case BTF_KIND_VOLATILE:
@@ -293,9 +596,9 @@ int btf__align_of(const struct btf *btf, __u32 id)
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
- return min(sizeof(void *), (size_t)t->size);
+ return min(btf_ptr_sz(btf), (size_t)t->size);
case BTF_KIND_PTR:
- return sizeof(void *);
+ return btf_ptr_sz(btf);
case BTF_KIND_TYPEDEF:
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
@@ -352,7 +655,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
return 0;
for (i = 1; i <= btf->nr_types; i++) {
- const struct btf_type *t = btf->types[i];
+ const struct btf_type *t = btf__type_by_id(btf, i);
const char *name = btf__name_by_offset(btf, t->name_off);
if (name && !strcmp(type_name, name))
@@ -371,7 +674,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
return 0;
for (i = 1; i <= btf->nr_types; i++) {
- const struct btf_type *t = btf->types[i];
+ const struct btf_type *t = btf__type_by_id(btf, i);
const char *name;
if (btf_kind(t) != kind)
@@ -384,48 +687,99 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
return -ENOENT;
}
+static bool btf_is_modifiable(const struct btf *btf)
+{
+ return (void *)btf->hdr != btf->raw_data;
+}
+
void btf__free(struct btf *btf)
{
- if (!btf)
+ if (IS_ERR_OR_NULL(btf))
return;
- if (btf->fd != -1)
+ if (btf->fd >= 0)
close(btf->fd);
- free(btf->data);
- free(btf->types);
+ if (btf_is_modifiable(btf)) {
+ /* if BTF was modified after loading, it will have a split
+ * in-memory representation for header, types, and strings
+ * sections, so we need to free all of them individually. It
+ * might still have a cached contiguous raw data present,
+ * which will be unconditionally freed below.
+ */
+ free(btf->hdr);
+ free(btf->types_data);
+ free(btf->strs_data);
+ }
+ free(btf->raw_data);
+ free(btf->raw_data_swapped);
+ free(btf->type_offs);
free(btf);
}
-struct btf *btf__new(__u8 *data, __u32 size)
+struct btf *btf__new_empty(void)
{
struct btf *btf;
- int err;
- btf = calloc(1, sizeof(struct btf));
+ btf = calloc(1, sizeof(*btf));
if (!btf)
return ERR_PTR(-ENOMEM);
btf->fd = -1;
+ btf->ptr_sz = sizeof(void *);
+ btf->swapped_endian = false;
+
+ /* +1 for empty string at offset 0 */
+ btf->raw_size = sizeof(struct btf_header) + 1;
+ btf->raw_data = calloc(1, btf->raw_size);
+ if (!btf->raw_data) {
+ free(btf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ btf->hdr = btf->raw_data;
+ btf->hdr->hdr_len = sizeof(struct btf_header);
+ btf->hdr->magic = BTF_MAGIC;
+ btf->hdr->version = BTF_VERSION;
+
+ btf->types_data = btf->raw_data + btf->hdr->hdr_len;
+ btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
+ btf->hdr->str_len = 1; /* empty string at offset 0 */
+
+ return btf;
+}
+
+struct btf *btf__new(const void *data, __u32 size)
+{
+ struct btf *btf;
+ int err;
- btf->data = malloc(size);
- if (!btf->data) {
+ btf = calloc(1, sizeof(struct btf));
+ if (!btf)
+ return ERR_PTR(-ENOMEM);
+
+ btf->raw_data = malloc(size);
+ if (!btf->raw_data) {
err = -ENOMEM;
goto done;
}
+ memcpy(btf->raw_data, data, size);
+ btf->raw_size = size;
- memcpy(btf->data, data, size);
- btf->data_size = size;
-
+ btf->hdr = btf->raw_data;
err = btf_parse_hdr(btf);
if (err)
goto done;
+ btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off;
+ btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off;
+
err = btf_parse_str_sec(btf);
+ err = err ?: btf_parse_type_sec(btf);
if (err)
goto done;
- err = btf_parse_type_sec(btf);
+ btf->fd = -1;
done:
if (err) {
@@ -436,17 +790,6 @@ done:
return btf;
}
-static bool btf_check_endianness(const GElf_Ehdr *ehdr)
-{
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
-#elif __BYTE_ORDER == __BIG_ENDIAN
- return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
-#else
-# error "Unrecognized __BYTE_ORDER__"
-#endif
-}
-
struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
{
Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
@@ -479,10 +822,6 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
pr_warn("failed to get EHDR from %s\n", path);
goto done;
}
- if (!btf_check_endianness(&ehdr)) {
- pr_warn("non-native ELF endianness is not supported\n");
- goto done;
- }
if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
pr_warn("failed to get e_shstrndx from %s\n", path);
goto done;
@@ -533,6 +872,18 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
if (IS_ERR(btf))
goto done;
+ switch (gelf_getclass(elf)) {
+ case ELFCLASS32:
+ btf__set_pointer_size(btf, 4);
+ break;
+ case ELFCLASS64:
+ btf__set_pointer_size(btf, 8);
+ break;
+ default:
+ pr_warn("failed to get ELF class (bitness) for %s\n", path);
+ break;
+ }
+
if (btf_ext && btf_ext_data) {
*btf_ext = btf_ext__new(btf_ext_data->d_buf,
btf_ext_data->d_size);
@@ -562,6 +913,83 @@ done:
return btf;
}
+struct btf *btf__parse_raw(const char *path)
+{
+ struct btf *btf = NULL;
+ void *data = NULL;
+ FILE *f = NULL;
+ __u16 magic;
+ int err = 0;
+ long sz;
+
+ f = fopen(path, "rb");
+ if (!f) {
+ err = -errno;
+ goto err_out;
+ }
+
+ /* check BTF magic */
+ if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) {
+ err = -EIO;
+ goto err_out;
+ }
+ if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) {
+ /* definitely not a raw BTF */
+ err = -EPROTO;
+ goto err_out;
+ }
+
+ /* get file size */
+ if (fseek(f, 0, SEEK_END)) {
+ err = -errno;
+ goto err_out;
+ }
+ sz = ftell(f);
+ if (sz < 0) {
+ err = -errno;
+ goto err_out;
+ }
+ /* rewind to the start */
+ if (fseek(f, 0, SEEK_SET)) {
+ err = -errno;
+ goto err_out;
+ }
+
+ /* pre-alloc memory and read all of BTF data */
+ data = malloc(sz);
+ if (!data) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ if (fread(data, 1, sz, f) < sz) {
+ err = -EIO;
+ goto err_out;
+ }
+
+ /* finally parse BTF data */
+ btf = btf__new(data, sz);
+
+err_out:
+ free(data);
+ if (f)
+ fclose(f);
+ return err ? ERR_PTR(err) : btf;
+}
+
+struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+{
+ struct btf *btf;
+
+ if (btf_ext)
+ *btf_ext = NULL;
+
+ btf = btf__parse_raw(path);
+ if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+ return btf;
+
+ return btf__parse_elf(path, btf_ext);
+}
+
static int compare_vsi_off(const void *_a, const void *_b)
{
const struct btf_var_secinfo *a = _a;
@@ -638,7 +1066,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
__u32 i;
for (i = 1; i <= btf->nr_types; i++) {
- struct btf_type *t = btf->types[i];
+ struct btf_type *t = btf_type_by_id(btf, i);
/* Loader needs to fix up some of the things compiler
* couldn't get its hands on while emitting BTF. This
@@ -655,10 +1083,13 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
return err;
}
+static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
+
int btf__load(struct btf *btf)
{
- __u32 log_buf_size = 0;
+ __u32 log_buf_size = 0, raw_size;
char *log_buf = NULL;
+ void *raw_data;
int err = 0;
if (btf->fd >= 0)
@@ -673,8 +1104,16 @@ retry_load:
*log_buf = 0;
}
- btf->fd = bpf_load_btf(btf->data, btf->data_size,
- log_buf, log_buf_size, false);
+ raw_data = btf_get_raw_data(btf, &raw_size, false);
+ if (!raw_data) {
+ err = -ENOMEM;
+ goto done;
+ }
+ /* cache native raw data representation */
+ btf->raw_size = raw_size;
+ btf->raw_data = raw_data;
+
+ btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false);
if (btf->fd < 0) {
if (!log_buf || errno == ENOSPC) {
log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
@@ -700,20 +1139,93 @@ int btf__fd(const struct btf *btf)
return btf->fd;
}
-const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
+void btf__set_fd(struct btf *btf, int fd)
{
- *size = btf->data_size;
- return btf->data;
+ btf->fd = fd;
}
-const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
+{
+ struct btf_header *hdr = btf->hdr;
+ struct btf_type *t;
+ void *data, *p;
+ __u32 data_sz;
+ int i;
+
+ data = swap_endian ? btf->raw_data_swapped : btf->raw_data;
+ if (data) {
+ *size = btf->raw_size;
+ return data;
+ }
+
+ data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len;
+ data = calloc(1, data_sz);
+ if (!data)
+ return NULL;
+ p = data;
+
+ memcpy(p, hdr, hdr->hdr_len);
+ if (swap_endian)
+ btf_bswap_hdr(p);
+ p += hdr->hdr_len;
+
+ memcpy(p, btf->types_data, hdr->type_len);
+ if (swap_endian) {
+ for (i = 1; i <= btf->nr_types; i++) {
+ t = p + btf->type_offs[i];
+ /* btf_bswap_type_rest() relies on native t->info, so
+ * we swap base type info after we swapped all the
+ * additional information
+ */
+ if (btf_bswap_type_rest(t))
+ goto err_out;
+ btf_bswap_type_base(t);
+ }
+ }
+ p += hdr->type_len;
+
+ memcpy(p, btf->strs_data, hdr->str_len);
+ p += hdr->str_len;
+
+ *size = data_sz;
+ return data;
+err_out:
+ free(data);
+ return NULL;
+}
+
+const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
+{
+ struct btf *btf = (struct btf *)btf_ro;
+ __u32 data_sz;
+ void *data;
+
+ data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
+ if (!data)
+ return NULL;
+
+ btf->raw_size = data_sz;
+ if (btf->swapped_endian)
+ btf->raw_data_swapped = data;
+ else
+ btf->raw_data = data;
+ *size = data_sz;
+ return data;
+}
+
+const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
{
if (offset < btf->hdr->str_len)
- return &btf->strings[offset];
+ return btf->strs_data + offset;
else
return NULL;
}
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+{
+ return btf__str_by_offset(btf, offset);
+}
+
int btf__get_from_id(__u32 id, struct btf **btf)
{
struct bpf_btf_info btf_info = { 0 };
@@ -849,6 +1361,970 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
return 0;
}
+static size_t strs_hash_fn(const void *key, void *ctx)
+{
+ struct btf *btf = ctx;
+ const char *str = btf->strs_data + (long)key;
+
+ return str_hash(str);
+}
+
+static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
+{
+ struct btf *btf = ctx;
+ const char *str1 = btf->strs_data + (long)key1;
+ const char *str2 = btf->strs_data + (long)key2;
+
+ return strcmp(str1, str2) == 0;
+}
+
+static void btf_invalidate_raw_data(struct btf *btf)
+{
+ if (btf->raw_data) {
+ free(btf->raw_data);
+ btf->raw_data = NULL;
+ }
+ if (btf->raw_data_swapped) {
+ free(btf->raw_data_swapped);
+ btf->raw_data_swapped = NULL;
+ }
+}
+
+/* Ensure BTF is ready to be modified (by splitting into a three memory
+ * regions for header, types, and strings). Also invalidate cached
+ * raw_data, if any.
+ */
+static int btf_ensure_modifiable(struct btf *btf)
+{
+ void *hdr, *types, *strs, *strs_end, *s;
+ struct hashmap *hash = NULL;
+ long off;
+ int err;
+
+ if (btf_is_modifiable(btf)) {
+ /* any BTF modification invalidates raw_data */
+ btf_invalidate_raw_data(btf);
+ return 0;
+ }
+
+ /* split raw data into three memory regions */
+ hdr = malloc(btf->hdr->hdr_len);
+ types = malloc(btf->hdr->type_len);
+ strs = malloc(btf->hdr->str_len);
+ if (!hdr || !types || !strs)
+ goto err_out;
+
+ memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
+ memcpy(types, btf->types_data, btf->hdr->type_len);
+ memcpy(strs, btf->strs_data, btf->hdr->str_len);
+
+ /* build lookup index for all strings */
+ hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
+ if (IS_ERR(hash)) {
+ err = PTR_ERR(hash);
+ hash = NULL;
+ goto err_out;
+ }
+
+ strs_end = strs + btf->hdr->str_len;
+ for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) {
+ /* hashmap__add() returns EEXIST if string with the same
+ * content already is in the hash map
+ */
+ err = hashmap__add(hash, (void *)off, (void *)off);
+ if (err == -EEXIST)
+ continue; /* duplicate */
+ if (err)
+ goto err_out;
+ }
+
+ /* only when everything was successful, update internal state */
+ btf->hdr = hdr;
+ btf->types_data = types;
+ btf->types_data_cap = btf->hdr->type_len;
+ btf->strs_data = strs;
+ btf->strs_data_cap = btf->hdr->str_len;
+ btf->strs_hash = hash;
+ /* if BTF was created from scratch, all strings are guaranteed to be
+ * unique and deduplicated
+ */
+ btf->strs_deduped = btf->hdr->str_len <= 1;
+
+ /* invalidate raw_data representation */
+ btf_invalidate_raw_data(btf);
+
+ return 0;
+
+err_out:
+ hashmap__free(hash);
+ free(hdr);
+ free(types);
+ free(strs);
+ return -ENOMEM;
+}
+
+static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
+{
+ return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
+ btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
+}
+
+/* Find an offset in BTF string section that corresponds to a given string *s*.
+ * Returns:
+ * - >0 offset into string section, if string is found;
+ * - -ENOENT, if string is not in the string section;
+ * - <0, on any other error.
+ */
+int btf__find_str(struct btf *btf, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+
+ /* BTF needs to be in a modifiable state to build string lookup index */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ /* see btf__add_str() for why we do this */
+ len = strlen(s) + 1;
+ p = btf_add_str_mem(btf, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = btf->hdr->str_len;
+ memcpy(p, s, len);
+
+ if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
+ return old_off;
+
+ return -ENOENT;
+}
+
+/* Add a string s to the BTF string section.
+ * Returns:
+ * - > 0 offset into string section, on success;
+ * - < 0, on error.
+ */
+int btf__add_str(struct btf *btf, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+ int err;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ /* Hashmap keys are always offsets within btf->strs_data, so to even
+ * look up some string from the "outside", we need to first append it
+ * at the end, so that it can be addressed with an offset. Luckily,
+ * until btf->hdr->str_len is incremented, that string is just a piece
+ * of garbage for the rest of BTF code, so no harm, no foul. On the
+ * other hand, if the string is unique, it's already appended and
+ * ready to be used, only a simple btf->hdr->str_len increment away.
+ */
+ len = strlen(s) + 1;
+ p = btf_add_str_mem(btf, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = btf->hdr->str_len;
+ memcpy(p, s, len);
+
+ /* Now attempt to add the string, but only if the string with the same
+ * contents doesn't exist already (HASHMAP_ADD strategy). If such
+ * string exists, we'll get its offset in old_off (that's old_key).
+ */
+ err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
+ HASHMAP_ADD, (const void **)&old_off, NULL);
+ if (err == -EEXIST)
+ return old_off; /* duplicated string, return existing offset */
+ if (err)
+ return err;
+
+ btf->hdr->str_len += len; /* new unique string, adjust data length */
+ return new_off;
+}
+
+static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
+{
+ return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
+ btf->hdr->type_len, UINT_MAX, add_sz);
+}
+
+static __u32 btf_type_info(int kind, int vlen, int kflag)
+{
+ return (kflag << 31) | (kind << 24) | vlen;
+}
+
+static void btf_type_inc_vlen(struct btf_type *t)
+{
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
+}
+
+/*
+ * Append new BTF_KIND_INT type with:
+ * - *name* - non-empty, non-NULL type name;
+ * - *sz* - power-of-2 (1, 2, 4, ..) size of the type, in bytes;
+ * - encoding is a combination of BTF_INT_SIGNED, BTF_INT_CHAR, BTF_INT_BOOL.
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding)
+{
+ struct btf_type *t;
+ int sz, err, name_off;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+ /* byte_sz must be power of 2 */
+ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
+ return -EINVAL;
+ if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
+ return -EINVAL;
+
+ /* deconstruct BTF, if necessary, and invalidate raw_data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type) + sizeof(int);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ /* if something goes wrong later, we might end up with an extra string,
+ * but that shouldn't be a problem, because BTF can't be constructed
+ * completely anyway and will most probably be just discarded
+ */
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_INT, 0, 0);
+ t->size = byte_sz;
+ /* set INT info, we don't allow setting legacy bit offset/size */
+ *(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8);
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/* it's completely legal to append BTF types with type IDs pointing forward to
+ * types that haven't been appended yet, so we only make sure that id looks
+ * sane, we can't guarantee that ID will always be valid
+ */
+static int validate_type_id(int id)
+{
+ if (id < 0 || id > BTF_MAX_NR_TYPES)
+ return -EINVAL;
+ return 0;
+}
+
+/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */
+static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
+{
+ struct btf_type *t;
+ int sz, name_off = 0, err;
+
+ if (validate_type_id(ref_type_id))
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ t->name_off = name_off;
+ t->info = btf_type_info(kind, 0, 0);
+ t->type = ref_type_id;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_PTR type with:
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_ptr(struct btf *btf, int ref_type_id)
+{
+ return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_ARRAY type with:
+ * - *index_type_id* - type ID of the type describing array index;
+ * - *elem_type_id* - type ID of the type describing array element;
+ * - *nr_elems* - the size of the array;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 nr_elems)
+{
+ struct btf_type *t;
+ struct btf_array *a;
+ int sz, err;
+
+ if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type) + sizeof(struct btf_array);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ t->name_off = 0;
+ t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
+ t->size = 0;
+
+ a = btf_array(t);
+ a->type = elem_type_id;
+ a->index_type = index_type_id;
+ a->nelems = nr_elems;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/* generic STRUCT/UNION append function */
+static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz)
+{
+ struct btf_type *t;
+ int sz, err, name_off = 0;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ /* start out with vlen=0 and no kflag; this will be adjusted when
+ * adding each member
+ */
+ t->name_off = name_off;
+ t->info = btf_type_info(kind, 0, 0);
+ t->size = bytes_sz;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_STRUCT type with:
+ * - *name* - name of the struct, can be NULL or empty for anonymous structs;
+ * - *byte_sz* - size of the struct, in bytes;
+ *
+ * Struct initially has no fields in it. Fields can be added by
+ * btf__add_field() right after btf__add_struct() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_struct(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ return btf_add_composite(btf, BTF_KIND_STRUCT, name, byte_sz);
+}
+
+/*
+ * Append new BTF_KIND_UNION type with:
+ * - *name* - name of the union, can be NULL or empty for anonymous union;
+ * - *byte_sz* - size of the union, in bytes;
+ *
+ * Union initially has no fields in it. Fields can be added by
+ * btf__add_field() right after btf__add_union() succeeds. All fields
+ * should have *bit_offset* of 0.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz);
+}
+
+/*
+ * Append new field for the current STRUCT/UNION type with:
+ * - *name* - name of the field, can be NULL or empty for anonymous field;
+ * - *type_id* - type ID for the type describing field type;
+ * - *bit_offset* - bit offset of the start of the field within struct/union;
+ * - *bit_size* - bit size of a bitfield, 0 for non-bitfield fields;
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_field(struct btf *btf, const char *name, int type_id,
+ __u32 bit_offset, __u32 bit_size)
+{
+ struct btf_type *t;
+ struct btf_member *m;
+ bool is_bitfield;
+ int sz, name_off = 0;
+
+ /* last type should be union/struct */
+ if (btf->nr_types == 0)
+ return -EINVAL;
+ t = btf_type_by_id(btf, btf->nr_types);
+ if (!btf_is_composite(t))
+ return -EINVAL;
+
+ if (validate_type_id(type_id))
+ return -EINVAL;
+ /* best-effort bit field offset/size enforcement */
+ is_bitfield = bit_size || (bit_offset % 8 != 0);
+ if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
+ return -EINVAL;
+
+ /* only offset 0 is allowed for unions */
+ if (btf_is_union(t) && bit_offset)
+ return -EINVAL;
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_member);
+ m = btf_add_type_mem(btf, sz);
+ if (!m)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ m->name_off = name_off;
+ m->type = type_id;
+ m->offset = bit_offset | (bit_size << 24);
+
+ /* btf_add_type_mem can invalidate t pointer */
+ t = btf_type_by_id(btf, btf->nr_types);
+ /* update parent type's vlen and kflag */
+ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
+/*
+ * Append new BTF_KIND_ENUM type with:
+ * - *name* - name of the enum, can be NULL or empty for anonymous enums;
+ * - *byte_sz* - size of the enum, in bytes.
+ *
+ * Enum initially has no enum values in it (and corresponds to enum forward
+ * declaration). Enumerator values can be added by btf__add_enum_value()
+ * immediately after btf__add_enum() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ struct btf_type *t;
+ int sz, err, name_off = 0;
+
+ /* byte_sz must be power of 2 */
+ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ /* start out with vlen=0; it will be adjusted when adding enum values */
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_ENUM, 0, 0);
+ t->size = byte_sz;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new enum value for the current ENUM type with:
+ * - *name* - name of the enumerator value, can't be NULL or empty;
+ * - *value* - integer value corresponding to enum value *name*;
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
+{
+ struct btf_type *t;
+ struct btf_enum *v;
+ int sz, name_off;
+
+ /* last type should be BTF_KIND_ENUM */
+ if (btf->nr_types == 0)
+ return -EINVAL;
+ t = btf_type_by_id(btf, btf->nr_types);
+ if (!btf_is_enum(t))
+ return -EINVAL;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+ if (value < INT_MIN || value > UINT_MAX)
+ return -E2BIG;
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_enum);
+ v = btf_add_type_mem(btf, sz);
+ if (!v)
+ return -ENOMEM;
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ v->name_off = name_off;
+ v->val = value;
+
+ /* update parent type's vlen */
+ t = btf_type_by_id(btf, btf->nr_types);
+ btf_type_inc_vlen(t);
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
+/*
+ * Append new BTF_KIND_FWD type with:
+ * - *name*, non-empty/non-NULL name;
+ * - *fwd_kind*, kind of forward declaration, one of BTF_FWD_STRUCT,
+ * BTF_FWD_UNION, or BTF_FWD_ENUM;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
+{
+ if (!name || !name[0])
+ return -EINVAL;
+
+ switch (fwd_kind) {
+ case BTF_FWD_STRUCT:
+ case BTF_FWD_UNION: {
+ struct btf_type *t;
+ int id;
+
+ id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0);
+ if (id <= 0)
+ return id;
+ t = btf_type_by_id(btf, id);
+ t->info = btf_type_info(BTF_KIND_FWD, 0, fwd_kind == BTF_FWD_UNION);
+ return id;
+ }
+ case BTF_FWD_ENUM:
+ /* enum forward in BTF currently is just an enum with no enum
+ * values; we also assume a standard 4-byte size for it
+ */
+ return btf__add_enum(btf, name, sizeof(int));
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Append new BTF_KING_TYPEDEF type with:
+ * - *name*, non-empty/non-NULL name;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
+{
+ if (!name || !name[0])
+ return -EINVAL;
+
+ return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_VOLATILE type with:
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_volatile(struct btf *btf, int ref_type_id)
+{
+ return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_CONST type with:
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_const(struct btf *btf, int ref_type_id)
+{
+ return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_RESTRICT type with:
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_restrict(struct btf *btf, int ref_type_id)
+{
+ return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id);
+}
+
+/*
+ * Append new BTF_KIND_FUNC type with:
+ * - *name*, non-empty/non-NULL name;
+ * - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_func(struct btf *btf, const char *name,
+ enum btf_func_linkage linkage, int proto_type_id)
+{
+ int id;
+
+ if (!name || !name[0])
+ return -EINVAL;
+ if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
+ linkage != BTF_FUNC_EXTERN)
+ return -EINVAL;
+
+ id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
+ if (id > 0) {
+ struct btf_type *t = btf_type_by_id(btf, id);
+
+ t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
+ }
+ return id;
+}
+
+/*
+ * Append new BTF_KIND_FUNC_PROTO with:
+ * - *ret_type_id* - type ID for return result of a function.
+ *
+ * Function prototype initially has no arguments, but they can be added by
+ * btf__add_func_param() one by one, immediately after
+ * btf__add_func_proto() succeeded.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_func_proto(struct btf *btf, int ret_type_id)
+{
+ struct btf_type *t;
+ int sz, err;
+
+ if (validate_type_id(ret_type_id))
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ /* start out with vlen=0; this will be adjusted when adding enum
+ * values, if necessary
+ */
+ t->name_off = 0;
+ t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0);
+ t->type = ret_type_id;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new function parameter for current FUNC_PROTO type with:
+ * - *name* - parameter name, can be NULL or empty;
+ * - *type_id* - type ID describing the type of the parameter.
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_func_param(struct btf *btf, const char *name, int type_id)
+{
+ struct btf_type *t;
+ struct btf_param *p;
+ int sz, name_off = 0;
+
+ if (validate_type_id(type_id))
+ return -EINVAL;
+
+ /* last type should be BTF_KIND_FUNC_PROTO */
+ if (btf->nr_types == 0)
+ return -EINVAL;
+ t = btf_type_by_id(btf, btf->nr_types);
+ if (!btf_is_func_proto(t))
+ return -EINVAL;
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_param);
+ p = btf_add_type_mem(btf, sz);
+ if (!p)
+ return -ENOMEM;
+
+ if (name && name[0]) {
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+ }
+
+ p->name_off = name_off;
+ p->type = type_id;
+
+ /* update parent type's vlen */
+ t = btf_type_by_id(btf, btf->nr_types);
+ btf_type_inc_vlen(t);
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
+/*
+ * Append new BTF_KIND_VAR type with:
+ * - *name* - non-empty/non-NULL name;
+ * - *linkage* - variable linkage, one of BTF_VAR_STATIC,
+ * BTF_VAR_GLOBAL_ALLOCATED, or BTF_VAR_GLOBAL_EXTERN;
+ * - *type_id* - type ID of the type describing the type of the variable.
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
+{
+ struct btf_type *t;
+ struct btf_var *v;
+ int sz, err, name_off;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+ if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+ linkage != BTF_VAR_GLOBAL_EXTERN)
+ return -EINVAL;
+ if (validate_type_id(type_id))
+ return -EINVAL;
+
+ /* deconstruct BTF, if necessary, and invalidate raw_data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type) + sizeof(struct btf_var);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_VAR, 0, 0);
+ t->type = type_id;
+
+ v = btf_var(t);
+ v->linkage = linkage;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new BTF_KIND_DATASEC type with:
+ * - *name* - non-empty/non-NULL name;
+ * - *byte_sz* - data section size, in bytes.
+ *
+ * Data section is initially empty. Variables info can be added with
+ * btf__add_datasec_var_info() calls, after btf__add_datasec() succeeds.
+ *
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
+{
+ struct btf_type *t;
+ int sz, err, name_off;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ /* start with vlen=0, which will be update as var_secinfos are added */
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0);
+ t->size = byte_sz;
+
+ err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+ if (err)
+ return err;
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ btf->nr_types++;
+ return btf->nr_types;
+}
+
+/*
+ * Append new data section variable information entry for current DATASEC type:
+ * - *var_type_id* - type ID, describing type of the variable;
+ * - *offset* - variable offset within data section, in bytes;
+ * - *byte_sz* - variable size, in bytes.
+ *
+ * Returns:
+ * - 0, on success;
+ * - <0, on error.
+ */
+int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz)
+{
+ struct btf_type *t;
+ struct btf_var_secinfo *v;
+ int sz;
+
+ /* last type should be BTF_KIND_DATASEC */
+ if (btf->nr_types == 0)
+ return -EINVAL;
+ t = btf_type_by_id(btf, btf->nr_types);
+ if (!btf_is_datasec(t))
+ return -EINVAL;
+
+ if (validate_type_id(var_type_id))
+ return -EINVAL;
+
+ /* decompose and invalidate raw data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_var_secinfo);
+ v = btf_add_type_mem(btf, sz);
+ if (!v)
+ return -ENOMEM;
+
+ v->type = var_type_id;
+ v->offset = offset;
+ v->size = byte_sz;
+
+ /* update parent type's vlen */
+ t = btf_type_by_id(btf, btf->nr_types);
+ btf_type_inc_vlen(t);
+
+ btf->hdr->type_len += sz;
+ btf->hdr->str_off += sz;
+ return 0;
+}
+
struct btf_ext_sec_setup_param {
__u32 off;
__u32 len;
@@ -972,14 +2448,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
return btf_ext_setup_info(btf_ext, &param);
}
-static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext)
+static int btf_ext_setup_core_relos(struct btf_ext *btf_ext)
{
struct btf_ext_sec_setup_param param = {
- .off = btf_ext->hdr->field_reloc_off,
- .len = btf_ext->hdr->field_reloc_len,
- .min_rec_size = sizeof(struct bpf_field_reloc),
- .ext_info = &btf_ext->field_reloc_info,
- .desc = "field_reloc",
+ .off = btf_ext->hdr->core_relo_off,
+ .len = btf_ext->hdr->core_relo_len,
+ .min_rec_size = sizeof(struct bpf_core_relo),
+ .ext_info = &btf_ext->core_relo_info,
+ .desc = "core_relo",
};
return btf_ext_setup_info(btf_ext, &param);
@@ -995,7 +2471,10 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
return -EINVAL;
}
- if (hdr->magic != BTF_MAGIC) {
+ if (hdr->magic == bswap_16(BTF_MAGIC)) {
+ pr_warn("BTF.ext in non-native endianness is not supported\n");
+ return -ENOTSUP;
+ } else if (hdr->magic != BTF_MAGIC) {
pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
return -EINVAL;
}
@@ -1020,7 +2499,7 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
void btf_ext__free(struct btf_ext *btf_ext)
{
- if (!btf_ext)
+ if (IS_ERR_OR_NULL(btf_ext))
return;
free(btf_ext->data);
free(btf_ext);
@@ -1058,10 +2537,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
if (err)
goto done;
- if (btf_ext->hdr->hdr_len <
- offsetofend(struct btf_ext_header, field_reloc_len))
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
goto done;
- err = btf_ext_setup_field_reloc(btf_ext);
+ err = btf_ext_setup_core_relos(btf_ext);
if (err)
goto done;
@@ -1316,6 +2794,9 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
return -EINVAL;
}
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
err = btf_dedup_strings(d);
if (err < 0) {
pr_debug("btf_dedup_strings failed:%d\n", err);
@@ -1416,7 +2897,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d,
__u32 *new_list;
d->hypot_cap += max((size_t)16, d->hypot_cap / 2);
- new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
+ new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32));
if (!new_list)
return -ENOMEM;
d->hypot_list = new_list;
@@ -1500,7 +2981,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
/* special BTF "void" type is made canonical immediately */
d->map[0] = 0;
for (i = 1; i <= btf->nr_types; i++) {
- struct btf_type *t = d->btf->types[i];
+ struct btf_type *t = btf_type_by_id(d->btf, i);
/* VAR and DATASEC are never deduped and are self-canonical */
if (btf_is_var(t) || btf_is_datasec(t))
@@ -1539,7 +3020,7 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
struct btf_type *t;
for (i = 1; i <= d->btf->nr_types; i++) {
- t = d->btf->types[i];
+ t = btf_type_by_id(d->btf, i);
r = fn(&t->name_off, ctx);
if (r)
return r;
@@ -1693,8 +3174,7 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
*/
static int btf_dedup_strings(struct btf_dedup *d)
{
- const struct btf_header *hdr = d->btf->hdr;
- char *start = (char *)d->btf->nohdr_data + hdr->str_off;
+ char *start = d->btf->strs_data;
char *end = start + d->btf->hdr->str_len;
char *p = start, *tmp_strs = NULL;
struct btf_str_ptrs strs = {
@@ -1706,14 +3186,16 @@ static int btf_dedup_strings(struct btf_dedup *d)
int i, j, err = 0, grp_idx;
bool grp_used;
+ if (d->btf->strs_deduped)
+ return 0;
+
/* build index of all strings */
while (p < end) {
if (strs.cnt + 1 > strs.cap) {
struct btf_str_ptr *new_ptrs;
strs.cap += max(strs.cnt / 2, 16U);
- new_ptrs = realloc(strs.ptrs,
- sizeof(strs.ptrs[0]) * strs.cap);
+ new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
if (!new_ptrs) {
err = -ENOMEM;
goto done;
@@ -1799,6 +3281,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
goto done;
d->btf->hdr->str_len = end - start;
+ d->btf->strs_deduped = true;
done:
free(tmp_strs);
@@ -2075,7 +3558,7 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
*/
static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
{
- struct btf_type *t = d->btf->types[type_id];
+ struct btf_type *t = btf_type_by_id(d->btf, type_id);
struct hashmap_entry *hash_entry;
struct btf_type *cand;
/* if we don't find equivalent type, then we are canonical */
@@ -2102,7 +3585,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_int(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_int(t, cand)) {
new_id = cand_id;
break;
@@ -2114,7 +3597,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_enum(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_enum(t, cand)) {
new_id = cand_id;
break;
@@ -2137,7 +3620,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
break;
@@ -2196,13 +3679,13 @@ static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
{
__u32 orig_type_id = type_id;
- if (!btf_is_fwd(d->btf->types[type_id]))
+ if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
return type_id;
while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
type_id = d->map[type_id];
- if (!btf_is_fwd(d->btf->types[type_id]))
+ if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
return type_id;
return orig_type_id;
@@ -2330,8 +3813,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
return -ENOMEM;
- cand_type = d->btf->types[cand_id];
- canon_type = d->btf->types[canon_id];
+ cand_type = btf_type_by_id(d->btf, cand_id);
+ canon_type = btf_type_by_id(d->btf, canon_id);
cand_kind = btf_kind(cand_type);
canon_kind = btf_kind(canon_type);
@@ -2482,8 +3965,8 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
targ_type_id = d->hypot_map[cand_type_id];
t_id = resolve_type_id(d, targ_type_id);
c_id = resolve_type_id(d, cand_type_id);
- t_kind = btf_kind(d->btf->types[t_id]);
- c_kind = btf_kind(d->btf->types[c_id]);
+ t_kind = btf_kind(btf__type_by_id(d->btf, t_id));
+ c_kind = btf_kind(btf__type_by_id(d->btf, c_id));
/*
* Resolve FWD into STRUCT/UNION.
* It's ok to resolve FWD into STRUCT/UNION that's not yet
@@ -2551,7 +4034,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
if (d->map[type_id] <= BTF_MAX_NR_TYPES)
return 0;
- t = d->btf->types[type_id];
+ t = btf_type_by_id(d->btf, type_id);
kind = btf_kind(t);
if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
@@ -2572,7 +4055,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
* creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
* FWD and compatible STRUCT/UNION are considered equivalent.
*/
- cand_type = d->btf->types[cand_id];
+ cand_type = btf_type_by_id(d->btf, cand_id);
if (!btf_shallow_equal_struct(t, cand_type))
continue;
@@ -2644,7 +4127,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
if (d->map[type_id] <= BTF_MAX_NR_TYPES)
return resolve_type_id(d, type_id);
- t = d->btf->types[type_id];
+ t = btf_type_by_id(d->btf, type_id);
d->map[type_id] = BTF_IN_PROGRESS_ID;
switch (btf_kind(t)) {
@@ -2662,7 +4145,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_common(t, cand)) {
new_id = cand_id;
break;
@@ -2686,7 +4169,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_array(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_array(t, cand)) {
new_id = cand_id;
break;
@@ -2718,7 +4201,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
h = btf_hash_fnproto(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
- cand = d->btf->types[cand_id];
+ cand = btf_type_by_id(d->btf, cand_id);
if (btf_equal_fnproto(t, cand)) {
new_id = cand_id;
break;
@@ -2766,9 +4249,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
*/
static int btf_dedup_compact_types(struct btf_dedup *d)
{
- struct btf_type **new_types;
+ __u32 *new_offs;
__u32 next_type_id = 1;
- char *types_start, *p;
+ void *p;
int i, len;
/* we are going to reuse hypot_map to store compaction remapping */
@@ -2776,41 +4259,34 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
for (i = 1; i <= d->btf->nr_types; i++)
d->hypot_map[i] = BTF_UNPROCESSED_ID;
- types_start = d->btf->nohdr_data + d->btf->hdr->type_off;
- p = types_start;
+ p = d->btf->types_data;
for (i = 1; i <= d->btf->nr_types; i++) {
if (d->map[i] != i)
continue;
- len = btf_type_size(d->btf->types[i]);
+ len = btf_type_size(btf__type_by_id(d->btf, i));
if (len < 0)
return len;
- memmove(p, d->btf->types[i], len);
+ memmove(p, btf__type_by_id(d->btf, i), len);
d->hypot_map[i] = next_type_id;
- d->btf->types[next_type_id] = (struct btf_type *)p;
+ d->btf->type_offs[next_type_id] = p - d->btf->types_data;
p += len;
next_type_id++;
}
/* shrink struct btf's internal types index and update btf_header */
d->btf->nr_types = next_type_id - 1;
- d->btf->types_size = d->btf->nr_types;
- d->btf->hdr->type_len = p - types_start;
- new_types = realloc(d->btf->types,
- (1 + d->btf->nr_types) * sizeof(struct btf_type *));
- if (!new_types)
+ d->btf->type_offs_cap = d->btf->nr_types + 1;
+ d->btf->hdr->type_len = p - d->btf->types_data;
+ new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
+ sizeof(*new_offs));
+ if (!new_offs)
return -ENOMEM;
- d->btf->types = new_types;
-
- /* make sure string section follows type information without gaps */
- d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data;
- memmove(p, d->btf->strings, d->btf->hdr->str_len);
- d->btf->strings = p;
- p += d->btf->hdr->str_len;
-
- d->btf->data_size = p - (char *)d->btf->data;
+ d->btf->type_offs = new_offs;
+ d->btf->hdr->str_off = d->btf->hdr->type_len;
+ d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len;
return 0;
}
@@ -2843,7 +4319,7 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
*/
static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
{
- struct btf_type *t = d->btf->types[type_id];
+ struct btf_type *t = btf_type_by_id(d->btf, type_id);
int i, r;
switch (btf_kind(t)) {
@@ -2946,41 +4422,6 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
return 0;
}
-static struct btf *btf_load_raw(const char *path)
-{
- struct btf *btf;
- size_t read_cnt;
- struct stat st;
- void *data;
- FILE *f;
-
- if (stat(path, &st))
- return ERR_PTR(-errno);
-
- data = malloc(st.st_size);
- if (!data)
- return ERR_PTR(-ENOMEM);
-
- f = fopen(path, "rb");
- if (!f) {
- btf = ERR_PTR(-errno);
- goto cleanup;
- }
-
- read_cnt = fread(data, 1, st.st_size, f);
- fclose(f);
- if (read_cnt < st.st_size) {
- btf = ERR_PTR(-EBADF);
- goto cleanup;
- }
-
- btf = btf__new(data, read_cnt);
-
-cleanup:
- free(data);
- return btf;
-}
-
/*
* Probe few well-known locations for vmlinux kernel image and try to load BTF
* data out of it to use for target BTF.
@@ -3016,7 +4457,7 @@ struct btf *libbpf_find_kernel_btf(void)
continue;
if (locations[i].raw_btf)
- btf = btf_load_raw(path);
+ btf = btf__parse_raw(path);
else
btf = btf__parse_elf(path, NULL);
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 70c1b7ec2bd0..57247240a20a 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -5,6 +5,7 @@
#define __LIBBPF_BTF_H
#include <stdarg.h>
+#include <stdbool.h>
#include <linux/btf.h>
#include <linux/types.h>
@@ -24,48 +25,17 @@ struct btf_type;
struct bpf_object;
-/*
- * The .BTF.ext ELF section layout defined as
- * struct btf_ext_header
- * func_info subsection
- *
- * The func_info subsection layout:
- * record size for struct bpf_func_info in the func_info subsection
- * struct btf_sec_func_info for section #1
- * a list of bpf_func_info records for section #1
- * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
- * but may not be identical
- * struct btf_sec_func_info for section #2
- * a list of bpf_func_info records for section #2
- * ......
- *
- * Note that the bpf_func_info record size in .BTF.ext may not
- * be the same as the one defined in include/uapi/linux/bpf.h.
- * The loader should ensure that record_size meets minimum
- * requirement and pass the record as is to the kernel. The
- * kernel will handle the func_info properly based on its contents.
- */
-struct btf_ext_header {
- __u16 magic;
- __u8 version;
- __u8 flags;
- __u32 hdr_len;
-
- /* All offsets are in bytes relative to the end of this header */
- __u32 func_info_off;
- __u32 func_info_len;
- __u32 line_info_off;
- __u32 line_info_len;
-
- /* optional part of .BTF.ext header */
- __u32 field_reloc_off;
- __u32 field_reloc_len;
+enum btf_endianness {
+ BTF_LITTLE_ENDIAN = 0,
+ BTF_BIG_ENDIAN = 1,
};
LIBBPF_API void btf__free(struct btf *btf);
-LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
-LIBBPF_API struct btf *btf__parse_elf(const char *path,
- struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
+LIBBPF_API struct btf *btf__new_empty(void);
+LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_raw(const char *path);
LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
@@ -75,12 +45,18 @@ LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
__u32 id);
+LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
+LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz);
+LIBBPF_API enum btf_endianness btf__endianness(const struct btf *btf);
+LIBBPF_API int btf__set_endianness(struct btf *btf, enum btf_endianness endian);
LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
__u32 expected_key_size,
@@ -91,19 +67,62 @@ LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
__u32 *size);
-LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **func_info, __u32 *cnt);
-LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **line_info, __u32 *cnt);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
+int btf_ext__reloc_func_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **func_info, __u32 *cnt);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
+int btf_ext__reloc_line_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **line_info, __u32 *cnt);
LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
+LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
+
+LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
+LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_array(struct btf *btf,
+ int index_type_id, int elem_type_id, __u32 nr_elems);
+/* struct/union construction APIs */
+LIBBPF_API int btf__add_struct(struct btf *btf, const char *name, __u32 sz);
+LIBBPF_API int btf__add_union(struct btf *btf, const char *name, __u32 sz);
+LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_id,
+ __u32 bit_offset, __u32 bit_size);
+
+/* enum construction APIs */
+LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz);
+LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value);
+
+enum btf_fwd_kind {
+ BTF_FWD_STRUCT = 0,
+ BTF_FWD_UNION = 1,
+ BTF_FWD_ENUM = 2,
+};
+
+LIBBPF_API int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind);
+LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id);
+LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
+LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
+
+/* func and func_proto construction APIs */
+LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
+ enum btf_func_linkage linkage, int proto_type_id);
+LIBBPF_API int btf__add_func_proto(struct btf *btf, int ret_type_id);
+LIBBPF_API int btf__add_func_param(struct btf *btf, const char *name, int type_id);
+
+/* var & datasec construction APIs */
+LIBBPF_API int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id);
+LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz);
+LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
+ __u32 offset, __u32 byte_sz);
+
struct btf_dedup_opts {
unsigned int dedup_table_size;
bool dont_resolve_fwds;
@@ -143,8 +162,10 @@ struct btf_dump_emit_type_decl_opts {
* necessary indentation already
*/
int indent_level;
+ /* strip all the const/volatile/restrict mods */
+ bool strip_mods;
};
-#define btf_dump_emit_type_decl_opts__last_field indent_level
+#define btf_dump_emit_type_decl_opts__last_field strip_mods
LIBBPF_API int
btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
@@ -168,6 +189,11 @@ static inline bool btf_kflag(const struct btf_type *t)
return BTF_INFO_KFLAG(t->info);
}
+static inline bool btf_is_void(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_UNKN;
+}
+
static inline bool btf_is_int(const struct btf_type *t)
{
return btf_kind(t) == BTF_KIND_INT;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 0c28ee82834b..2f9d685bd522 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -13,14 +13,12 @@
#include <errno.h>
#include <linux/err.h>
#include <linux/btf.h>
+#include <linux/kernel.h>
#include "btf.h"
#include "hashmap.h"
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
@@ -60,11 +58,16 @@ struct btf_dump {
const struct btf_ext *btf_ext;
btf_dump_printf_fn_t printf_fn;
struct btf_dump_opts opts;
+ int ptr_sz;
+ bool strip_mods;
+ int last_id;
/* per-type auxiliary state */
struct btf_dump_type_aux_state *type_states;
+ size_t type_states_cap;
/* per-type optional cached unique name, must be freed, if present */
const char **cached_names;
+ size_t cached_names_cap;
/* topo-sorted list of dependent type definitions */
__u32 *emit_queue;
@@ -90,14 +93,7 @@ struct btf_dump {
static size_t str_hash_fn(const void *key, void *ctx)
{
- const char *s = key;
- size_t h = 0;
-
- while (*s) {
- h = h * 31 + *s;
- s++;
- }
- return h;
+ return str_hash(key);
}
static bool str_equal_fn(const void *a, const void *b, void *ctx)
@@ -120,6 +116,7 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
}
static int btf_dump_mark_referenced(struct btf_dump *d);
+static int btf_dump_resize(struct btf_dump *d);
struct btf_dump *btf_dump__new(const struct btf *btf,
const struct btf_ext *btf_ext,
@@ -137,6 +134,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
d->btf_ext = btf_ext;
d->printf_fn = printf_fn;
d->opts.ctx = opts ? opts->ctx : NULL;
+ d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *);
d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
if (IS_ERR(d->type_names)) {
@@ -150,25 +148,8 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
d->ident_names = NULL;
goto err;
}
- d->type_states = calloc(1 + btf__get_nr_types(d->btf),
- sizeof(d->type_states[0]));
- if (!d->type_states) {
- err = -ENOMEM;
- goto err;
- }
- d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
- sizeof(d->cached_names[0]));
- if (!d->cached_names) {
- err = -ENOMEM;
- goto err;
- }
- /* VOID is special */
- d->type_states[0].order_state = ORDERED;
- d->type_states[0].emit_state = EMITTED;
-
- /* eagerly determine referenced types for anon enums */
- err = btf_dump_mark_referenced(d);
+ err = btf_dump_resize(d);
if (err)
goto err;
@@ -178,17 +159,46 @@ err:
return ERR_PTR(err);
}
+static int btf_dump_resize(struct btf_dump *d)
+{
+ int err, last_id = btf__get_nr_types(d->btf);
+
+ if (last_id <= d->last_id)
+ return 0;
+
+ if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
+ sizeof(*d->type_states), last_id + 1))
+ return -ENOMEM;
+ if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
+ sizeof(*d->cached_names), last_id + 1))
+ return -ENOMEM;
+
+ if (d->last_id == 0) {
+ /* VOID is special */
+ d->type_states[0].order_state = ORDERED;
+ d->type_states[0].emit_state = EMITTED;
+ }
+
+ /* eagerly determine referenced types for anon enums */
+ err = btf_dump_mark_referenced(d);
+ if (err)
+ return err;
+
+ d->last_id = last_id;
+ return 0;
+}
+
void btf_dump__free(struct btf_dump *d)
{
- int i, cnt;
+ int i;
- if (!d)
+ if (IS_ERR_OR_NULL(d))
return;
free(d->type_states);
if (d->cached_names) {
/* any set cached name is owned by us and should be freed */
- for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) {
+ for (i = 0; i <= d->last_id; i++) {
if (d->cached_names[i])
free((void *)d->cached_names[i]);
}
@@ -228,6 +238,10 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
if (id > btf__get_nr_types(d->btf))
return -EINVAL;
+ err = btf_dump_resize(d);
+ if (err)
+ return err;
+
d->emit_queue_cnt = 0;
err = btf_dump_order_type(d, id, false);
if (err < 0)
@@ -257,7 +271,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
const struct btf_type *t;
__u16 vlen;
- for (i = 1; i <= n; i++) {
+ for (i = d->last_id + 1; i <= n; i++) {
t = btf__type_by_id(d->btf, i);
vlen = btf_vlen(t);
@@ -312,6 +326,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
}
return 0;
}
+
static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
{
__u32 *new_queue;
@@ -319,8 +334,7 @@ static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
if (d->emit_queue_cnt >= d->emit_queue_cap) {
new_cap = max(16, d->emit_queue_cap * 3 / 2);
- new_queue = realloc(d->emit_queue,
- new_cap * sizeof(new_queue[0]));
+ new_queue = libbpf_reallocarray(d->emit_queue, new_cap, sizeof(new_queue[0]));
if (!new_queue)
return -ENOMEM;
d->emit_queue = new_queue;
@@ -548,6 +562,9 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
}
}
+static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
+ const struct btf_type *t);
+
static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
const struct btf_type *t);
static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
@@ -658,7 +675,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
if (!btf_dump_is_blacklisted(d, id)) {
btf_dump_emit_typedef_def(d, id, t, 0);
btf_dump_printf(d, ";\n\n");
- };
+ }
tstate->fwd_emitted = 1;
break;
default:
@@ -670,6 +687,9 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
switch (kind) {
case BTF_KIND_INT:
+ /* Emit type alias definitions if necessary */
+ btf_dump_emit_missing_aliases(d, id, t);
+
tstate->emit_state = EMITTED;
break;
case BTF_KIND_ENUM:
@@ -796,7 +816,7 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
int align, int lvl)
{
int off_diff = m_off - cur_off;
- int ptr_bits = sizeof(void *) * 8;
+ int ptr_bits = d->ptr_sz * 8;
if (off_diff <= 0)
/* no gap */
@@ -869,7 +889,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
btf_dump_printf(d, ": %d", m_sz);
off = m_off + m_sz;
} else {
- m_sz = max(0, btf__resolve_size(d->btf, m->type));
+ m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
off = m_off + m_sz * 8;
}
btf_dump_printf(d, ";");
@@ -889,6 +909,32 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
btf_dump_printf(d, " __attribute__((packed))");
}
+static const char *missing_base_types[][2] = {
+ /*
+ * GCC emits typedefs to its internal __PolyX_t types when compiling Arm
+ * SIMD intrinsics. Alias them to standard base types.
+ */
+ { "__Poly8_t", "unsigned char" },
+ { "__Poly16_t", "unsigned short" },
+ { "__Poly64_t", "unsigned long long" },
+ { "__Poly128_t", "unsigned __int128" },
+};
+
+static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
+ const struct btf_type *t)
+{
+ const char *name = btf_dump_type_name(d, id);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
+ if (strcmp(name, missing_base_types[i][0]) == 0) {
+ btf_dump_printf(d, "typedef %s %s;\n\n",
+ missing_base_types[i][1], name);
+ break;
+ }
+ }
+}
+
static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
const struct btf_type *t)
{
@@ -967,8 +1013,7 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
if (d->decl_stack_cnt >= d->decl_stack_cap) {
new_cap = max(16, d->decl_stack_cap * 3 / 2);
- new_stack = realloc(d->decl_stack,
- new_cap * sizeof(new_stack[0]));
+ new_stack = libbpf_reallocarray(d->decl_stack, new_cap, sizeof(new_stack[0]));
if (!new_stack)
return -ENOMEM;
d->decl_stack = new_stack;
@@ -1025,14 +1070,20 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
const struct btf_dump_emit_type_decl_opts *opts)
{
const char *fname;
- int lvl;
+ int lvl, err;
if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
return -EINVAL;
+ err = btf_dump_resize(d);
+ if (err)
+ return -EINVAL;
+
fname = OPTS_GET(opts, field_name, "");
lvl = OPTS_GET(opts, indent_level, 0);
+ d->strip_mods = OPTS_GET(opts, strip_mods, false);
btf_dump_emit_type_decl(d, id, fname, lvl);
+ d->strip_mods = false;
return 0;
}
@@ -1045,6 +1096,10 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
stack_start = d->decl_stack_cnt;
for (;;) {
+ t = btf__type_by_id(d->btf, id);
+ if (d->strip_mods && btf_is_mod(t))
+ goto skip_mod;
+
err = btf_dump_push_decl_stack_id(d, id);
if (err < 0) {
/*
@@ -1056,12 +1111,11 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
d->decl_stack_cnt = stack_start;
return;
}
-
+skip_mod:
/* VOID */
if (id == 0)
break;
- t = btf__type_by_id(d->btf, id);
switch (btf_kind(t)) {
case BTF_KIND_PTR:
case BTF_KIND_VOLATILE:
@@ -1137,6 +1191,20 @@ static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
}
}
+static void btf_dump_drop_mods(struct btf_dump *d, struct id_stack *decl_stack)
+{
+ const struct btf_type *t;
+ __u32 id;
+
+ while (decl_stack->cnt) {
+ id = decl_stack->ids[decl_stack->cnt - 1];
+ t = btf__type_by_id(d->btf, id);
+ if (!btf_is_mod(t))
+ return;
+ decl_stack->cnt--;
+ }
+}
+
static void btf_dump_emit_name(const struct btf_dump *d,
const char *name, bool last_was_ptr)
{
@@ -1235,14 +1303,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
* a const/volatile modifier for array, so we are
* going to silently skip them here.
*/
- while (decls->cnt) {
- next_id = decls->ids[decls->cnt - 1];
- next_t = btf__type_by_id(d->btf, next_id);
- if (btf_is_mod(next_t))
- decls->cnt--;
- else
- break;
- }
+ btf_dump_drop_mods(d, decls);
if (decls->cnt == 0) {
btf_dump_emit_name(d, fname, last_was_ptr);
@@ -1270,7 +1331,15 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
__u16 vlen = btf_vlen(t);
int i;
- btf_dump_emit_mods(d, decls);
+ /*
+ * GCC emits extra volatile qualifier for
+ * __attribute__((noreturn)) function pointers. Clang
+ * doesn't do it. It's a GCC quirk for backwards
+ * compatibility with code written for GCC <2.5. So,
+ * similarly to extra qualifiers for array, just drop
+ * them, instead of handling them.
+ */
+ btf_dump_drop_mods(d, decls);
if (decls->cnt) {
btf_dump_printf(d, " (");
btf_dump_emit_type_chain(d, decls, fname, lvl);
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index 54c30c802070..3c20b126d60d 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -15,6 +15,9 @@
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
/* start with 4 buckets */
#define HASHMAP_MIN_CAP_BITS 2
@@ -59,7 +62,14 @@ struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
void hashmap__clear(struct hashmap *map)
{
+ struct hashmap_entry *cur, *tmp;
+ size_t bkt;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ free(cur);
+ }
free(map->buckets);
+ map->buckets = NULL;
map->cap = map->cap_bits = map->sz = 0;
}
@@ -93,8 +103,7 @@ static int hashmap_grow(struct hashmap *map)
struct hashmap_entry **new_buckets;
struct hashmap_entry *cur, *tmp;
size_t new_cap_bits, new_cap;
- size_t h;
- int bkt;
+ size_t h, bkt;
new_cap_bits = map->cap_bits + 1;
if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index bae8879cdf58..d9b385fe808c 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -10,17 +10,31 @@
#include <stdbool.h>
#include <stddef.h>
-#ifdef __GLIBC__
-#include <bits/wordsize.h>
-#else
-#include <bits/reg.h>
-#endif
-#include "libbpf_internal.h"
+#include <limits.h>
static inline size_t hash_bits(size_t h, int bits)
{
/* shuffle bits and return requested number of upper bits */
- return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
+#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
+ /* LP64 case */
+ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
+#elif (__SIZEOF_SIZE_T__ <= __SIZEOF_LONG__)
+ return (h * 2654435769lu) >> (__SIZEOF_LONG__ * 8 - bits);
+#else
+# error "Unsupported size_t size"
+#endif
+}
+
+/* generic C-string hashing function */
+static inline size_t str_hash(const char *s)
+{
+ size_t h = 0;
+
+ while (*s) {
+ h = h * 31 + *s;
+ s++;
+ }
+ return h;
}
typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8f480e29a6b0..313034117070 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -44,7 +44,6 @@
#include <sys/vfs.h>
#include <sys/utsname.h>
#include <sys/resource.h>
-#include <tools/libc_compat.h>
#include <libelf.h>
#include <gelf.h>
#include <zlib.h>
@@ -56,9 +55,6 @@
#include "libbpf_internal.h"
#include "hashmap.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#ifndef EM_BPF
#define EM_BPF 247
#endif
@@ -67,6 +63,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define BPF_INSN_SZ (sizeof(struct bpf_insn))
+
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
* compilation if user enables corresponding warning. Disable it explicitly.
*/
@@ -75,8 +73,6 @@
#define __printf(a, b) __attribute__((format(printf, a, b)))
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
-static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj,
- int idx);
static const struct btf_type *
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
@@ -154,34 +150,37 @@ static void pr_perm_msg(int err)
___err; })
#endif
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
-#else
-# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
-#endif
-
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
-struct bpf_capabilities {
+enum kern_feature_id {
/* v4.14: kernel support for program & map names. */
- __u32 name:1;
+ FEAT_PROG_NAME,
/* v5.2: kernel support for global data sections. */
- __u32 global_data:1;
+ FEAT_GLOBAL_DATA,
+ /* BTF support */
+ FEAT_BTF,
/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
- __u32 btf_func:1;
+ FEAT_BTF_FUNC,
/* BTF_KIND_VAR and BTF_KIND_DATASEC support */
- __u32 btf_datasec:1;
- /* BPF_F_MMAPABLE is supported for arrays */
- __u32 array_mmap:1;
+ FEAT_BTF_DATASEC,
/* BTF_FUNC_GLOBAL is supported */
- __u32 btf_func_global:1;
+ FEAT_BTF_GLOBAL_FUNC,
+ /* BPF_F_MMAPABLE is supported for arrays */
+ FEAT_ARRAY_MMAP,
/* kernel support for expected_attach_type in BPF_PROG_LOAD */
- __u32 exp_attach_type:1;
+ FEAT_EXP_ATTACH_TYPE,
+ /* bpf_probe_read_{kernel,user}[_str] helpers */
+ FEAT_PROBE_READ_KERN,
+ /* BPF_PROG_BIND_MAP is supported */
+ FEAT_PROG_BIND_MAP,
+ __FEAT_CNT,
};
+static bool kernel_supports(enum kern_feature_id feat_id);
+
enum reloc_type {
RELO_LD64,
RELO_CALL,
@@ -194,6 +193,7 @@ struct reloc_desc {
int insn_idx;
int map_idx;
int sym_off;
+ bool processed;
};
struct bpf_sec_def;
@@ -209,6 +209,7 @@ struct bpf_sec_def {
bool is_exp_attach_type_optional;
bool is_attachable;
bool is_attach_btf;
+ bool is_sleepable;
attach_fn_t attach_fn;
};
@@ -217,19 +218,45 @@ struct bpf_sec_def {
* linux/filter.h.
*/
struct bpf_program {
- /* Index in elf obj file, for relocation use. */
- int idx;
- char *name;
- int prog_ifindex;
- char *section_name;
const struct bpf_sec_def *sec_def;
- /* section_name with / replaced by _; makes recursive pinning
+ char *sec_name;
+ size_t sec_idx;
+ /* this program's instruction offset (in number of instructions)
+ * within its containing ELF section
+ */
+ size_t sec_insn_off;
+ /* number of original instructions in ELF section belonging to this
+ * program, not taking into account subprogram instructions possible
+ * appended later during relocation
+ */
+ size_t sec_insn_cnt;
+ /* Offset (in number of instructions) of the start of instruction
+ * belonging to this BPF program within its containing main BPF
+ * program. For the entry-point (main) BPF program, this is always
+ * zero. For a sub-program, this gets reset before each of main BPF
+ * programs are processed and relocated and is used to determined
+ * whether sub-program was already appended to the main program, and
+ * if yes, at which instruction offset.
+ */
+ size_t sub_insn_off;
+
+ char *name;
+ /* sec_name with / replaced by _; makes recursive pinning
* in bpf_object__pin_programs easier
*/
char *pin_name;
+
+ /* instructions that belong to BPF program; insns[0] is located at
+ * sec_insn_off instruction within its ELF section in ELF file, so
+ * when mapping ELF file instruction index to the local instruction,
+ * one needs to subtract sec_insn_off; and vice versa.
+ */
struct bpf_insn *insns;
- size_t insns_cnt, main_prog_cnt;
- enum bpf_prog_type type;
+ /* actual number of instruction in this BPF program's image; for
+ * entry-point BPF programs this includes the size of main program
+ * itself plus all the used sub-programs, appended at the end
+ */
+ size_t insns_cnt;
struct reloc_desc *reloc_desc;
int nr_reloc;
@@ -245,15 +272,16 @@ struct bpf_program {
void *priv;
bpf_program_clear_priv_t clear_priv;
+ bool load;
+ enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
+ int prog_ifindex;
__u32 attach_btf_id;
__u32 attach_prog_fd;
void *func_info;
__u32 func_info_rec_size;
__u32 func_info_cnt;
- struct bpf_capabilities *caps;
-
void *line_info;
__u32 line_info_rec_size;
__u32 line_info_cnt;
@@ -285,6 +313,7 @@ struct bpf_struct_ops {
#define BSS_SEC ".bss"
#define RODATA_SEC ".rodata"
#define KCONFIG_SEC ".kconfig"
+#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
enum libbpf_map_type {
@@ -310,6 +339,8 @@ struct bpf_map {
int map_ifindex;
int inner_map_fd;
struct bpf_map_def def;
+ __u32 numa_node;
+ __u32 btf_var_idx;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
__u32 btf_vmlinux_value_type_id;
@@ -318,6 +349,9 @@ struct bpf_map {
enum libbpf_map_type libbpf_type;
void *mmaped;
struct bpf_struct_ops *st_ops;
+ struct bpf_map *inner_map;
+ void **init_slots;
+ int init_slots_sz;
char *pin_path;
bool pinned;
bool reused;
@@ -325,24 +359,45 @@ struct bpf_map {
enum extern_type {
EXT_UNKNOWN,
- EXT_CHAR,
- EXT_BOOL,
- EXT_INT,
- EXT_TRISTATE,
- EXT_CHAR_ARR,
+ EXT_KCFG,
+ EXT_KSYM,
+};
+
+enum kcfg_type {
+ KCFG_UNKNOWN,
+ KCFG_CHAR,
+ KCFG_BOOL,
+ KCFG_INT,
+ KCFG_TRISTATE,
+ KCFG_CHAR_ARR,
};
struct extern_desc {
- const char *name;
+ enum extern_type type;
int sym_idx;
int btf_id;
- enum extern_type type;
- int sz;
- int align;
- int data_off;
- bool is_signed;
- bool is_weak;
+ int sec_btf_id;
+ const char *name;
bool is_set;
+ bool is_weak;
+ union {
+ struct {
+ enum kcfg_type type;
+ int sz;
+ int align;
+ int data_off;
+ bool is_signed;
+ } kcfg;
+ struct {
+ unsigned long long addr;
+
+ /* target btf_id of the corresponding kernel var. */
+ int vmlinux_btf_id;
+
+ /* local btf_id of the ksym extern's type. */
+ __u32 type_id;
+ } ksym;
+ };
};
static LIST_HEAD(bpf_objects_list);
@@ -362,9 +417,10 @@ struct bpf_object {
struct extern_desc *externs;
int nr_extern;
int kconfig_map_idx;
+ int rodata_map_idx;
bool loaded;
- bool has_pseudo_calls;
+ bool has_subcalls;
/*
* Information when doing elf related work. Only valid if fd
@@ -381,6 +437,7 @@ struct bpf_object {
Elf_Data *rodata;
Elf_Data *bss;
Elf_Data *st_ops_data;
+ size_t shstrndx; /* section index for section name strings */
size_t strtabidx;
struct {
GElf_Shdr shdr;
@@ -389,6 +446,7 @@ struct bpf_object {
int nr_reloc_sects;
int maps_shndx;
int btf_maps_shndx;
+ __u32 btf_maps_sec_btf_id;
int text_shndx;
int symbols_shndx;
int data_shndx;
@@ -413,12 +471,20 @@ struct bpf_object {
void *priv;
bpf_object_clear_priv_t clear_priv;
- struct bpf_capabilities caps;
-
char path[];
};
#define obj_elf_valid(o) ((o)->efile.elf)
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
+ size_t off, __u32 sym_type, GElf_Sym *sym);
+
void bpf_program__unload(struct bpf_program *prog)
{
int i;
@@ -458,158 +524,160 @@ static void bpf_program__exit(struct bpf_program *prog)
bpf_program__unload(prog);
zfree(&prog->name);
- zfree(&prog->section_name);
+ zfree(&prog->sec_name);
zfree(&prog->pin_name);
zfree(&prog->insns);
zfree(&prog->reloc_desc);
prog->nr_reloc = 0;
prog->insns_cnt = 0;
- prog->idx = -1;
+ prog->sec_idx = -1;
}
static char *__bpf_program__pin_name(struct bpf_program *prog)
{
char *name, *p;
- name = p = strdup(prog->section_name);
+ name = p = strdup(prog->sec_name);
while ((p = strchr(p, '/')))
*p = '_';
return name;
}
+static bool insn_is_subprog_call(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_JMP &&
+ BPF_OP(insn->code) == BPF_CALL &&
+ BPF_SRC(insn->code) == BPF_K &&
+ insn->src_reg == BPF_PSEUDO_CALL &&
+ insn->dst_reg == 0 &&
+ insn->off == 0;
+}
+
static int
-bpf_program__init(void *data, size_t size, char *section_name, int idx,
- struct bpf_program *prog)
+bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
+ const char *name, size_t sec_idx, const char *sec_name,
+ size_t sec_off, void *insn_data, size_t insn_data_sz)
{
- const size_t bpf_insn_sz = sizeof(struct bpf_insn);
+ int i;
- if (size == 0 || size % bpf_insn_sz) {
- pr_warn("corrupted section '%s', size: %zu\n",
- section_name, size);
+ if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
+ pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
+ sec_name, name, sec_off, insn_data_sz);
return -EINVAL;
}
memset(prog, 0, sizeof(*prog));
+ prog->obj = obj;
+
+ prog->sec_idx = sec_idx;
+ prog->sec_insn_off = sec_off / BPF_INSN_SZ;
+ prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
+ /* insns_cnt can later be increased by appending used subprograms */
+ prog->insns_cnt = prog->sec_insn_cnt;
- prog->section_name = strdup(section_name);
- if (!prog->section_name) {
- pr_warn("failed to alloc name for prog under section(%d) %s\n",
- idx, section_name);
+ prog->type = BPF_PROG_TYPE_UNSPEC;
+ prog->load = true;
+
+ prog->instances.fds = NULL;
+ prog->instances.nr = -1;
+
+ prog->sec_name = strdup(sec_name);
+ if (!prog->sec_name)
+ goto errout;
+
+ prog->name = strdup(name);
+ if (!prog->name)
goto errout;
- }
prog->pin_name = __bpf_program__pin_name(prog);
- if (!prog->pin_name) {
- pr_warn("failed to alloc pin name for prog under section(%d) %s\n",
- idx, section_name);
+ if (!prog->pin_name)
goto errout;
- }
- prog->insns = malloc(size);
- if (!prog->insns) {
- pr_warn("failed to alloc insns for prog under section %s\n",
- section_name);
+ prog->insns = malloc(insn_data_sz);
+ if (!prog->insns)
goto errout;
+ memcpy(prog->insns, insn_data, insn_data_sz);
+
+ for (i = 0; i < prog->insns_cnt; i++) {
+ if (insn_is_subprog_call(&prog->insns[i])) {
+ obj->has_subcalls = true;
+ break;
+ }
}
- prog->insns_cnt = size / bpf_insn_sz;
- memcpy(prog->insns, data, size);
- prog->idx = idx;
- prog->instances.fds = NULL;
- prog->instances.nr = -1;
- prog->type = BPF_PROG_TYPE_UNSPEC;
return 0;
errout:
+ pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
bpf_program__exit(prog);
return -ENOMEM;
}
static int
-bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
- char *section_name, int idx)
+bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
+ const char *sec_name, int sec_idx)
{
- struct bpf_program prog, *progs;
+ struct bpf_program *prog, *progs;
+ void *data = sec_data->d_buf;
+ size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
int nr_progs, err;
+ const char *name;
+ GElf_Sym sym;
- err = bpf_program__init(data, size, section_name, idx, &prog);
- if (err)
- return err;
-
- prog.caps = &obj->caps;
progs = obj->programs;
nr_progs = obj->nr_programs;
+ sec_off = 0;
- progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
- if (!progs) {
- /*
- * In this case the original obj->programs
- * is still valid, so don't need special treat for
- * bpf_close_object().
- */
- pr_warn("failed to alloc a new program under section '%s'\n",
- section_name);
- bpf_program__exit(&prog);
- return -ENOMEM;
- }
-
- pr_debug("found program %s\n", prog.section_name);
- obj->programs = progs;
- obj->nr_programs = nr_progs + 1;
- prog.obj = obj;
- progs[nr_progs] = prog;
- return 0;
-}
-
-static int
-bpf_object__init_prog_names(struct bpf_object *obj)
-{
- Elf_Data *symbols = obj->efile.symbols;
- struct bpf_program *prog;
- size_t pi, si;
+ while (sec_off < sec_sz) {
+ if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
+ pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
+ sec_name, sec_off);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
- for (pi = 0; pi < obj->nr_programs; pi++) {
- const char *name = NULL;
+ prog_sz = sym.st_size;
- prog = &obj->programs[pi];
+ name = elf_sym_str(obj, sym.st_name);
+ if (!name) {
+ pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
+ sec_name, sec_off);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
- for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
- si++) {
- GElf_Sym sym;
+ if (sec_off + prog_sz > sec_sz) {
+ pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
+ sec_name, sec_off);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
- if (!gelf_getsym(symbols, si, &sym))
- continue;
- if (sym.st_shndx != prog->idx)
- continue;
- if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
- continue;
+ pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
+ sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
- name = elf_strptr(obj->efile.elf,
- obj->efile.strtabidx,
- sym.st_name);
- if (!name) {
- pr_warn("failed to get sym name string for prog %s\n",
- prog->section_name);
- return -LIBBPF_ERRNO__LIBELF;
- }
+ progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
+ if (!progs) {
+ /*
+ * In this case the original obj->programs
+ * is still valid, so don't need special treat for
+ * bpf_close_object().
+ */
+ pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
+ sec_name, name);
+ return -ENOMEM;
}
+ obj->programs = progs;
- if (!name && prog->idx == obj->efile.text_shndx)
- name = ".text";
+ prog = &progs[nr_progs];
- if (!name) {
- pr_warn("failed to find sym for prog %s\n",
- prog->section_name);
- return -EINVAL;
- }
+ err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
+ sec_off, data + sec_off, prog_sz);
+ if (err)
+ return err;
- prog->name = strdup(name);
- if (!prog->name) {
- pr_warn("failed to allocate memory for prog sym %s\n",
- name);
- return -ENOMEM;
- }
+ nr_progs++;
+ obj->nr_programs = nr_progs;
+
+ sec_off += prog_sz;
}
return 0;
@@ -1011,6 +1079,7 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.bss_shndx = -1;
obj->efile.st_ops_shndx = -1;
obj->kconfig_map_idx = -1;
+ obj->rodata_map_idx = -1;
obj->kern_version = get_kernel_version();
obj->loaded = false;
@@ -1042,13 +1111,18 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
obj->efile.obj_buf_sz = 0;
}
+/* if libelf is old and doesn't support mmap(), fall back to read() */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
static int bpf_object__elf_init(struct bpf_object *obj)
{
int err = 0;
GElf_Ehdr *ep;
if (obj_elf_valid(obj)) {
- pr_warn("elf init: internal error\n");
+ pr_warn("elf: init internal error\n");
return -LIBBPF_ERRNO__LIBELF;
}
@@ -1066,31 +1140,44 @@ static int bpf_object__elf_init(struct bpf_object *obj)
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
- pr_warn("failed to open %s: %s\n", obj->path, cp);
+ pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
return err;
}
- obj->efile.elf = elf_begin(obj->efile.fd,
- LIBBPF_ELF_C_READ_MMAP, NULL);
+ obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
}
if (!obj->efile.elf) {
- pr_warn("failed to open %s as ELF file\n", obj->path);
+ pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
err = -LIBBPF_ERRNO__LIBELF;
goto errout;
}
if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
- pr_warn("failed to get EHDR from %s\n", obj->path);
+ pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
ep = &obj->efile.ehdr;
+ if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) {
+ pr_warn("elf: failed to get section names section index for %s: %s\n",
+ obj->path, elf_errmsg(-1));
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
+ }
+
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
+ pr_warn("elf: failed to get section names strings from %s: %s\n",
+ obj->path, elf_errmsg(-1));
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
/* Old LLVM set e_machine to EM_NONE */
if (ep->e_type != ET_REL ||
(ep->e_machine && ep->e_machine != EM_BPF)) {
- pr_warn("%s is not an eBPF object file\n", obj->path);
+ pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
err = -LIBBPF_ERRNO__FORMAT;
goto errout;
}
@@ -1112,7 +1199,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
#else
# error "Unrecognized __BYTE_ORDER__"
#endif
- pr_warn("endianness mismatch.\n");
+ pr_warn("elf: endianness mismatch in %s.\n", obj->path);
return -LIBBPF_ERRNO__ENDIAN;
}
@@ -1147,55 +1234,10 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
return false;
}
-static int bpf_object_search_section_size(const struct bpf_object *obj,
- const char *name, size_t *d_size)
-{
- const GElf_Ehdr *ep = &obj->efile.ehdr;
- Elf *elf = obj->efile.elf;
- Elf_Scn *scn = NULL;
- int idx = 0;
-
- while ((scn = elf_nextscn(elf, scn)) != NULL) {
- const char *sec_name;
- Elf_Data *data;
- GElf_Shdr sh;
-
- idx++;
- if (gelf_getshdr(scn, &sh) != &sh) {
- pr_warn("failed to get section(%d) header from %s\n",
- idx, obj->path);
- return -EIO;
- }
-
- sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
- if (!sec_name) {
- pr_warn("failed to get section(%d) name from %s\n",
- idx, obj->path);
- return -EIO;
- }
-
- if (strcmp(name, sec_name))
- continue;
-
- data = elf_getdata(scn, 0);
- if (!data) {
- pr_warn("failed to get section(%d) data from %s(%s)\n",
- idx, name, obj->path);
- return -EIO;
- }
-
- *d_size = data->d_size;
- return 0;
- }
-
- return -ENOENT;
-}
-
int bpf_object__section_size(const struct bpf_object *obj, const char *name,
__u32 *size)
{
int ret = -ENOENT;
- size_t d_size;
*size = 0;
if (!name) {
@@ -1213,9 +1255,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
if (obj->efile.st_ops_data)
*size = obj->efile.st_ops_data->d_size;
} else {
- ret = bpf_object_search_section_size(obj, name, &d_size);
- if (!ret)
- *size = d_size;
+ Elf_Scn *scn = elf_sec_by_name(obj, name);
+ Elf_Data *data = elf_sec_data(obj, scn);
+
+ if (data) {
+ ret = 0; /* found it */
+ *size = data->d_size;
+ }
}
return *size ? 0 : ret;
@@ -1240,8 +1286,7 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
continue;
- sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name);
+ sname = elf_sym_str(obj, sym.st_name);
if (!sname) {
pr_warn("failed to get sym name string for var %s\n",
name);
@@ -1266,7 +1311,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
return &obj->maps[obj->nr_maps++];
new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
- new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
+ new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
if (!new_maps) {
pr_warn("alloc maps for object failed\n");
return ERR_PTR(-ENOMEM);
@@ -1393,6 +1438,8 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
obj->efile.rodata->d_size);
if (err)
return err;
+
+ obj->rodata_map_idx = obj->nr_maps - 1;
}
if (obj->efile.bss_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
@@ -1418,19 +1465,19 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
return NULL;
}
-static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
- char value)
+static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
+ char value)
{
- switch (ext->type) {
- case EXT_BOOL:
+ switch (ext->kcfg.type) {
+ case KCFG_BOOL:
if (value == 'm') {
- pr_warn("extern %s=%c should be tristate or char\n",
+ pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
ext->name, value);
return -EINVAL;
}
*(bool *)ext_val = value == 'y' ? true : false;
break;
- case EXT_TRISTATE:
+ case KCFG_TRISTATE:
if (value == 'y')
*(enum libbpf_tristate *)ext_val = TRI_YES;
else if (value == 'm')
@@ -1438,14 +1485,14 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
else /* value == 'n' */
*(enum libbpf_tristate *)ext_val = TRI_NO;
break;
- case EXT_CHAR:
+ case KCFG_CHAR:
*(char *)ext_val = value;
break;
- case EXT_UNKNOWN:
- case EXT_INT:
- case EXT_CHAR_ARR:
+ case KCFG_UNKNOWN:
+ case KCFG_INT:
+ case KCFG_CHAR_ARR:
default:
- pr_warn("extern %s=%c should be bool, tristate, or char\n",
+ pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
ext->name, value);
return -EINVAL;
}
@@ -1453,29 +1500,29 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
return 0;
}
-static int set_ext_value_str(struct extern_desc *ext, char *ext_val,
- const char *value)
+static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
+ const char *value)
{
size_t len;
- if (ext->type != EXT_CHAR_ARR) {
- pr_warn("extern %s=%s should char array\n", ext->name, value);
+ if (ext->kcfg.type != KCFG_CHAR_ARR) {
+ pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
return -EINVAL;
}
len = strlen(value);
if (value[len - 1] != '"') {
- pr_warn("extern '%s': invalid string config '%s'\n",
+ pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
ext->name, value);
return -EINVAL;
}
/* strip quotes */
len -= 2;
- if (len >= ext->sz) {
- pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
- ext->name, value, len, ext->sz - 1);
- len = ext->sz - 1;
+ if (len >= ext->kcfg.sz) {
+ pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+ ext->name, value, len, ext->kcfg.sz - 1);
+ len = ext->kcfg.sz - 1;
}
memcpy(ext_val, value + 1, len);
ext_val[len] = '\0';
@@ -1502,11 +1549,11 @@ static int parse_u64(const char *value, __u64 *res)
return 0;
}
-static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
+static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
{
- int bit_sz = ext->sz * 8;
+ int bit_sz = ext->kcfg.sz * 8;
- if (ext->sz == 8)
+ if (ext->kcfg.sz == 8)
return true;
/* Validate that value stored in u64 fits in integer of `ext->sz`
@@ -1521,26 +1568,26 @@ static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
* For unsigned target integer, check that all the (64 - Y) bits are
* zero.
*/
- if (ext->is_signed)
+ if (ext->kcfg.is_signed)
return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
else
return (v >> bit_sz) == 0;
}
-static int set_ext_value_num(struct extern_desc *ext, void *ext_val,
- __u64 value)
+static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
+ __u64 value)
{
- if (ext->type != EXT_INT && ext->type != EXT_CHAR) {
- pr_warn("extern %s=%llu should be integer\n",
+ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
+ pr_warn("extern (kcfg) %s=%llu should be integer\n",
ext->name, (unsigned long long)value);
return -EINVAL;
}
- if (!is_ext_value_in_range(ext, value)) {
- pr_warn("extern %s=%llu value doesn't fit in %d bytes\n",
- ext->name, (unsigned long long)value, ext->sz);
+ if (!is_kcfg_value_in_range(ext, value)) {
+ pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
+ ext->name, (unsigned long long)value, ext->kcfg.sz);
return -ERANGE;
}
- switch (ext->sz) {
+ switch (ext->kcfg.sz) {
case 1: *(__u8 *)ext_val = value; break;
case 2: *(__u16 *)ext_val = value; break;
case 4: *(__u32 *)ext_val = value; break;
@@ -1586,30 +1633,30 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj,
if (!ext || ext->is_set)
return 0;
- ext_val = data + ext->data_off;
+ ext_val = data + ext->kcfg.data_off;
value = sep + 1;
switch (*value) {
case 'y': case 'n': case 'm':
- err = set_ext_value_tri(ext, ext_val, *value);
+ err = set_kcfg_value_tri(ext, ext_val, *value);
break;
case '"':
- err = set_ext_value_str(ext, ext_val, value);
+ err = set_kcfg_value_str(ext, ext_val, value);
break;
default:
/* assume integer */
err = parse_u64(value, &num);
if (err) {
- pr_warn("extern %s=%s should be integer\n",
+ pr_warn("extern (kcfg) %s=%s should be integer\n",
ext->name, value);
return err;
}
- err = set_ext_value_num(ext, ext_val, num);
+ err = set_kcfg_value_num(ext, ext_val, num);
break;
}
if (err)
return err;
- pr_debug("extern %s=%s\n", ext->name, value);
+ pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
return 0;
}
@@ -1680,16 +1727,20 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
static int bpf_object__init_kconfig_map(struct bpf_object *obj)
{
- struct extern_desc *last_ext;
+ struct extern_desc *last_ext = NULL, *ext;
size_t map_sz;
- int err;
+ int i, err;
- if (obj->nr_extern == 0)
- return 0;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type == EXT_KCFG)
+ last_ext = ext;
+ }
- last_ext = &obj->externs[obj->nr_extern - 1];
- map_sz = last_ext->data_off + last_ext->sz;
+ if (!last_ext)
+ return 0;
+ map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
obj->efile.symbols_shndx,
NULL, map_sz);
@@ -1714,12 +1765,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
if (!symbols)
return -EINVAL;
- scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
- if (scn)
- data = elf_getdata(scn, NULL);
+
+ scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
+ data = elf_sec_data(obj, scn);
if (!scn || !data) {
- pr_warn("failed to get Elf_Data from map section %d\n",
- obj->efile.maps_shndx);
+ pr_warn("elf: failed to get legacy map definitions for %s\n",
+ obj->path);
return -EINVAL;
}
@@ -1741,12 +1792,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
nr_maps++;
}
/* Assume equally sized map definitions */
- pr_debug("maps in %s: %d maps in %zd bytes\n",
- obj->path, nr_maps, data->d_size);
+ pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
+ nr_maps, data->d_size, obj->path);
if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
- pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n",
- obj->path, nr_maps, data->d_size);
+ pr_warn("elf: unable to determine legacy map definition size in %s\n",
+ obj->path);
return -EINVAL;
}
map_def_sz = data->d_size / nr_maps;
@@ -1767,8 +1818,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
if (IS_ERR(map))
return PTR_ERR(map);
- map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name);
+ map_name = elf_sym_str(obj, sym.st_name);
if (!map_name) {
pr_warn("failed to get map #%d name sym string for obj %s\n",
i, obj->path);
@@ -1856,6 +1906,29 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
return btf_is_func_proto(t) ? t : NULL;
}
+static const char *btf_kind_str(const struct btf_type *t)
+{
+ switch (btf_kind(t)) {
+ case BTF_KIND_UNKN: return "void";
+ case BTF_KIND_INT: return "int";
+ case BTF_KIND_PTR: return "ptr";
+ case BTF_KIND_ARRAY: return "array";
+ case BTF_KIND_STRUCT: return "struct";
+ case BTF_KIND_UNION: return "union";
+ case BTF_KIND_ENUM: return "enum";
+ case BTF_KIND_FWD: return "fwd";
+ case BTF_KIND_TYPEDEF: return "typedef";
+ case BTF_KIND_VOLATILE: return "volatile";
+ case BTF_KIND_CONST: return "const";
+ case BTF_KIND_RESTRICT: return "restrict";
+ case BTF_KIND_FUNC: return "func";
+ case BTF_KIND_FUNC_PROTO: return "func_proto";
+ case BTF_KIND_VAR: return "var";
+ case BTF_KIND_DATASEC: return "datasec";
+ default: return "unknown";
+ }
+}
+
/*
* Fetch integer attribute of BTF map definition. Such attributes are
* represented using a pointer to an array, in which dimensionality of array
@@ -1872,8 +1945,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
const struct btf_type *arr_t;
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': attr '%s': expected PTR, got %u.\n",
- map_name, name, btf_kind(t));
+ pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
+ map_name, name, btf_kind_str(t));
return false;
}
@@ -1884,8 +1957,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
return false;
}
if (!btf_is_array(arr_t)) {
- pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n",
- map_name, name, btf_kind(arr_t));
+ pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
+ map_name, name, btf_kind_str(arr_t));
return false;
}
arr_info = btf_array(arr_t);
@@ -1896,7 +1969,7 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
static int build_map_pin_path(struct bpf_map *map, const char *path)
{
char buf[PATH_MAX];
- int err, len;
+ int len;
if (!path)
path = "/sys/fs/bpf";
@@ -1907,116 +1980,61 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
else if (len >= PATH_MAX)
return -ENAMETOOLONG;
- err = bpf_map__set_pin_path(map, buf);
- if (err)
- return err;
-
- return 0;
+ return bpf_map__set_pin_path(map, buf);
}
-static int bpf_object__init_user_btf_map(struct bpf_object *obj,
- const struct btf_type *sec,
- int var_idx, int sec_idx,
- const Elf_Data *data, bool strict,
- const char *pin_root_path)
+
+static int parse_btf_map_def(struct bpf_object *obj,
+ struct bpf_map *map,
+ const struct btf_type *def,
+ bool strict, bool is_inner,
+ const char *pin_root_path)
{
- const struct btf_type *var, *def, *t;
- const struct btf_var_secinfo *vi;
- const struct btf_var *var_extra;
+ const struct btf_type *t;
const struct btf_member *m;
- const char *map_name;
- struct bpf_map *map;
int vlen, i;
- vi = btf_var_secinfos(sec) + var_idx;
- var = btf__type_by_id(obj->btf, vi->type);
- var_extra = btf_var(var);
- map_name = btf__name_by_offset(obj->btf, var->name_off);
- vlen = btf_vlen(var);
-
- if (map_name == NULL || map_name[0] == '\0') {
- pr_warn("map #%d: empty name.\n", var_idx);
- return -EINVAL;
- }
- if ((__u64)vi->offset + vi->size > data->d_size) {
- pr_warn("map '%s' BTF data is corrupted.\n", map_name);
- return -EINVAL;
- }
- if (!btf_is_var(var)) {
- pr_warn("map '%s': unexpected var kind %u.\n",
- map_name, btf_kind(var));
- return -EINVAL;
- }
- if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
- var_extra->linkage != BTF_VAR_STATIC) {
- pr_warn("map '%s': unsupported var linkage %u.\n",
- map_name, var_extra->linkage);
- return -EOPNOTSUPP;
- }
-
- def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
- if (!btf_is_struct(def)) {
- pr_warn("map '%s': unexpected def kind %u.\n",
- map_name, btf_kind(var));
- return -EINVAL;
- }
- if (def->size > vi->size) {
- pr_warn("map '%s': invalid def size.\n", map_name);
- return -EINVAL;
- }
-
- map = bpf_object__add_map(obj);
- if (IS_ERR(map))
- return PTR_ERR(map);
- map->name = strdup(map_name);
- if (!map->name) {
- pr_warn("map '%s': failed to alloc map name.\n", map_name);
- return -ENOMEM;
- }
- map->libbpf_type = LIBBPF_MAP_UNSPEC;
- map->def.type = BPF_MAP_TYPE_UNSPEC;
- map->sec_idx = sec_idx;
- map->sec_offset = vi->offset;
- pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
- map_name, map->sec_idx, map->sec_offset);
-
vlen = btf_vlen(def);
m = btf_members(def);
for (i = 0; i < vlen; i++, m++) {
const char *name = btf__name_by_offset(obj->btf, m->name_off);
if (!name) {
- pr_warn("map '%s': invalid field #%d.\n", map_name, i);
+ pr_warn("map '%s': invalid field #%d.\n", map->name, i);
return -EINVAL;
}
if (strcmp(name, "type") == 0) {
- if (!get_map_field_int(map_name, obj->btf, m,
+ if (!get_map_field_int(map->name, obj->btf, m,
&map->def.type))
return -EINVAL;
pr_debug("map '%s': found type = %u.\n",
- map_name, map->def.type);
+ map->name, map->def.type);
} else if (strcmp(name, "max_entries") == 0) {
- if (!get_map_field_int(map_name, obj->btf, m,
+ if (!get_map_field_int(map->name, obj->btf, m,
&map->def.max_entries))
return -EINVAL;
pr_debug("map '%s': found max_entries = %u.\n",
- map_name, map->def.max_entries);
+ map->name, map->def.max_entries);
} else if (strcmp(name, "map_flags") == 0) {
- if (!get_map_field_int(map_name, obj->btf, m,
+ if (!get_map_field_int(map->name, obj->btf, m,
&map->def.map_flags))
return -EINVAL;
pr_debug("map '%s': found map_flags = %u.\n",
- map_name, map->def.map_flags);
+ map->name, map->def.map_flags);
+ } else if (strcmp(name, "numa_node") == 0) {
+ if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+ return -EINVAL;
+ pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
} else if (strcmp(name, "key_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map_name, obj->btf, m, &sz))
+ if (!get_map_field_int(map->name, obj->btf, m, &sz))
return -EINVAL;
pr_debug("map '%s': found key_size = %u.\n",
- map_name, sz);
+ map->name, sz);
if (map->def.key_size && map->def.key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %u.\n",
- map_name, map->def.key_size, sz);
+ map->name, map->def.key_size, sz);
return -EINVAL;
}
map->def.key_size = sz;
@@ -2026,25 +2044,25 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
t = btf__type_by_id(obj->btf, m->type);
if (!t) {
pr_warn("map '%s': key type [%d] not found.\n",
- map_name, m->type);
+ map->name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': key spec is not PTR: %u.\n",
- map_name, btf_kind(t));
+ pr_warn("map '%s': key spec is not PTR: %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
- map_name, t->type, (ssize_t)sz);
+ map->name, t->type, (ssize_t)sz);
return sz;
}
pr_debug("map '%s': found key [%u], sz = %zd.\n",
- map_name, t->type, (ssize_t)sz);
+ map->name, t->type, (ssize_t)sz);
if (map->def.key_size && map->def.key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %zd.\n",
- map_name, map->def.key_size, (ssize_t)sz);
+ map->name, map->def.key_size, (ssize_t)sz);
return -EINVAL;
}
map->def.key_size = sz;
@@ -2052,13 +2070,13 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
} else if (strcmp(name, "value_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map_name, obj->btf, m, &sz))
+ if (!get_map_field_int(map->name, obj->btf, m, &sz))
return -EINVAL;
pr_debug("map '%s': found value_size = %u.\n",
- map_name, sz);
+ map->name, sz);
if (map->def.value_size && map->def.value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %u.\n",
- map_name, map->def.value_size, sz);
+ map->name, map->def.value_size, sz);
return -EINVAL;
}
map->def.value_size = sz;
@@ -2068,71 +2086,207 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
t = btf__type_by_id(obj->btf, m->type);
if (!t) {
pr_warn("map '%s': value type [%d] not found.\n",
- map_name, m->type);
+ map->name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
- pr_warn("map '%s': value spec is not PTR: %u.\n",
- map_name, btf_kind(t));
+ pr_warn("map '%s': value spec is not PTR: %s.\n",
+ map->name, btf_kind_str(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
- map_name, t->type, (ssize_t)sz);
+ map->name, t->type, (ssize_t)sz);
return sz;
}
pr_debug("map '%s': found value [%u], sz = %zd.\n",
- map_name, t->type, (ssize_t)sz);
+ map->name, t->type, (ssize_t)sz);
if (map->def.value_size && map->def.value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %zd.\n",
- map_name, map->def.value_size, (ssize_t)sz);
+ map->name, map->def.value_size, (ssize_t)sz);
return -EINVAL;
}
map->def.value_size = sz;
map->btf_value_type_id = t->type;
+ }
+ else if (strcmp(name, "values") == 0) {
+ int err;
+
+ if (is_inner) {
+ pr_warn("map '%s': multi-level inner maps not supported.\n",
+ map->name);
+ return -ENOTSUP;
+ }
+ if (i != vlen - 1) {
+ pr_warn("map '%s': '%s' member should be last.\n",
+ map->name, name);
+ return -EINVAL;
+ }
+ if (!bpf_map_type__is_map_in_map(map->def.type)) {
+ pr_warn("map '%s': should be map-in-map.\n",
+ map->name);
+ return -ENOTSUP;
+ }
+ if (map->def.value_size && map->def.value_size != 4) {
+ pr_warn("map '%s': conflicting value size %u != 4.\n",
+ map->name, map->def.value_size);
+ return -EINVAL;
+ }
+ map->def.value_size = 4;
+ t = btf__type_by_id(obj->btf, m->type);
+ if (!t) {
+ pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
+ map->name, m->type);
+ return -EINVAL;
+ }
+ if (!btf_is_array(t) || btf_array(t)->nelems) {
+ pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
+ map->name);
+ return -EINVAL;
+ }
+ t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
+ NULL);
+ if (!btf_is_ptr(t)) {
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+ map->name, btf_kind_str(t));
+ return -EINVAL;
+ }
+ t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+ if (!btf_is_struct(t)) {
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
+ map->name, btf_kind_str(t));
+ return -EINVAL;
+ }
+
+ map->inner_map = calloc(1, sizeof(*map->inner_map));
+ if (!map->inner_map)
+ return -ENOMEM;
+ map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
+ map->inner_map->name = malloc(strlen(map->name) +
+ sizeof(".inner") + 1);
+ if (!map->inner_map->name)
+ return -ENOMEM;
+ sprintf(map->inner_map->name, "%s.inner", map->name);
+
+ err = parse_btf_map_def(obj, map->inner_map, t, strict,
+ true /* is_inner */, NULL);
+ if (err)
+ return err;
} else if (strcmp(name, "pinning") == 0) {
__u32 val;
int err;
- if (!get_map_field_int(map_name, obj->btf, m, &val))
+ if (is_inner) {
+ pr_debug("map '%s': inner def can't be pinned.\n",
+ map->name);
+ return -EINVAL;
+ }
+ if (!get_map_field_int(map->name, obj->btf, m, &val))
return -EINVAL;
pr_debug("map '%s': found pinning = %u.\n",
- map_name, val);
+ map->name, val);
if (val != LIBBPF_PIN_NONE &&
val != LIBBPF_PIN_BY_NAME) {
pr_warn("map '%s': invalid pinning value %u.\n",
- map_name, val);
+ map->name, val);
return -EINVAL;
}
if (val == LIBBPF_PIN_BY_NAME) {
err = build_map_pin_path(map, pin_root_path);
if (err) {
pr_warn("map '%s': couldn't build pin path.\n",
- map_name);
+ map->name);
return err;
}
}
} else {
if (strict) {
pr_warn("map '%s': unknown field '%s'.\n",
- map_name, name);
+ map->name, name);
return -ENOTSUP;
}
pr_debug("map '%s': ignoring unknown field '%s'.\n",
- map_name, name);
+ map->name, name);
}
}
if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
- pr_warn("map '%s': map type isn't specified.\n", map_name);
+ pr_warn("map '%s': map type isn't specified.\n", map->name);
return -EINVAL;
}
return 0;
}
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
+ const struct btf_type *sec,
+ int var_idx, int sec_idx,
+ const Elf_Data *data, bool strict,
+ const char *pin_root_path)
+{
+ const struct btf_type *var, *def;
+ const struct btf_var_secinfo *vi;
+ const struct btf_var *var_extra;
+ const char *map_name;
+ struct bpf_map *map;
+
+ vi = btf_var_secinfos(sec) + var_idx;
+ var = btf__type_by_id(obj->btf, vi->type);
+ var_extra = btf_var(var);
+ map_name = btf__name_by_offset(obj->btf, var->name_off);
+
+ if (map_name == NULL || map_name[0] == '\0') {
+ pr_warn("map #%d: empty name.\n", var_idx);
+ return -EINVAL;
+ }
+ if ((__u64)vi->offset + vi->size > data->d_size) {
+ pr_warn("map '%s' BTF data is corrupted.\n", map_name);
+ return -EINVAL;
+ }
+ if (!btf_is_var(var)) {
+ pr_warn("map '%s': unexpected var kind %s.\n",
+ map_name, btf_kind_str(var));
+ return -EINVAL;
+ }
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+ var_extra->linkage != BTF_VAR_STATIC) {
+ pr_warn("map '%s': unsupported var linkage %u.\n",
+ map_name, var_extra->linkage);
+ return -EOPNOTSUPP;
+ }
+
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+ if (!btf_is_struct(def)) {
+ pr_warn("map '%s': unexpected def kind %s.\n",
+ map_name, btf_kind_str(var));
+ return -EINVAL;
+ }
+ if (def->size > vi->size) {
+ pr_warn("map '%s': invalid def size.\n", map_name);
+ return -EINVAL;
+ }
+
+ map = bpf_object__add_map(obj);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warn("map '%s': failed to alloc map name.\n", map_name);
+ return -ENOMEM;
+ }
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
+ map->def.type = BPF_MAP_TYPE_UNSPEC;
+ map->sec_idx = sec_idx;
+ map->sec_offset = vi->offset;
+ map->btf_var_idx = var_idx;
+ pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
+ map_name, map->sec_idx, map->sec_offset);
+
+ return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
+}
+
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
const char *pin_root_path)
{
@@ -2146,12 +2300,11 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
if (obj->efile.btf_maps_shndx < 0)
return 0;
- scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
- if (scn)
- data = elf_getdata(scn, NULL);
+ scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
+ data = elf_sec_data(obj, scn);
if (!scn || !data) {
- pr_warn("failed to get Elf_Data from map section %d (%s)\n",
- obj->efile.maps_shndx, MAPS_ELF_SEC);
+ pr_warn("elf: failed to get %s map definitions for %s\n",
+ MAPS_ELF_SEC, obj->path);
return -EINVAL;
}
@@ -2163,6 +2316,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
name = btf__name_by_offset(obj->btf, t->name_off);
if (strcmp(name, MAPS_ELF_SEC) == 0) {
sec = t;
+ obj->efile.btf_maps_sec_btf_id = i;
break;
}
}
@@ -2208,34 +2362,31 @@ static int bpf_object__init_maps(struct bpf_object *obj,
static bool section_have_execinstr(struct bpf_object *obj, int idx)
{
- Elf_Scn *scn;
GElf_Shdr sh;
- scn = elf_getscn(obj->efile.elf, idx);
- if (!scn)
+ if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))
return false;
- if (gelf_getshdr(scn, &sh) != &sh)
- return false;
+ return sh.sh_flags & SHF_EXECINSTR;
+}
- if (sh.sh_flags & SHF_EXECINSTR)
- return true;
+static bool btf_needs_sanitization(struct bpf_object *obj)
+{
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
- return false;
+ return !has_func || !has_datasec || !has_func_global;
}
-static void bpf_object__sanitize_btf(struct bpf_object *obj)
+static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
- bool has_func_global = obj->caps.btf_func_global;
- bool has_datasec = obj->caps.btf_datasec;
- bool has_func = obj->caps.btf_func;
- struct btf *btf = obj->btf;
+ bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
+ bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_func = kernel_supports(FEAT_BTF_FUNC);
struct btf_type *t;
int i, j, vlen;
- if (!obj->btf || (has_func && has_datasec && has_func_global))
- return;
-
for (i = 1; i <= btf__get_nr_types(btf); i++) {
t = (struct btf_type *)btf__type_by_id(btf, i);
@@ -2288,17 +2439,6 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj)
}
}
-static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
-{
- if (!obj->btf_ext)
- return;
-
- if (!obj->caps.btf_func) {
- btf_ext__free(obj->btf_ext);
- obj->btf_ext = NULL;
- }
-}
-
static bool libbpf_needs_btf(const struct bpf_object *obj)
{
return obj->efile.btf_maps_shndx >= 0 ||
@@ -2326,6 +2466,8 @@ static int bpf_object__init_btf(struct bpf_object *obj,
BTF_ELF_SEC, err);
goto out;
}
+ /* enforce 8-byte pointers for BPF-targeted BTFs */
+ btf__set_pointer_size(obj->btf, 8);
err = 0;
}
if (btf_ext_data) {
@@ -2359,19 +2501,11 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
return 0;
err = btf__finalize_data(obj, obj->btf);
- if (!err)
- return 0;
-
- pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
- btf__free(obj->btf);
- obj->btf = NULL;
- btf_ext__free(obj->btf_ext);
- obj->btf_ext = NULL;
-
- if (libbpf_needs_btf(obj)) {
- pr_warn("BTF is required, but is missing or corrupted.\n");
- return -ENOENT;
+ if (err) {
+ pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
+ return err;
}
+
return 0;
}
@@ -2392,108 +2526,351 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
{
+ bool need_vmlinux_btf = false;
struct bpf_program *prog;
- int err;
+ int i, err;
+
+ /* CO-RE relocations need kernel BTF */
+ if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
+ need_vmlinux_btf = true;
+
+ /* Support for typed ksyms needs kernel BTF */
+ for (i = 0; i < obj->nr_extern; i++) {
+ const struct extern_desc *ext;
+
+ ext = &obj->externs[i];
+ if (ext->type == EXT_KSYM && ext->ksym.type_id) {
+ need_vmlinux_btf = true;
+ break;
+ }
+ }
bpf_object__for_each_program(prog, obj) {
+ if (!prog->load)
+ continue;
if (libbpf_prog_needs_vmlinux_btf(prog)) {
- obj->btf_vmlinux = libbpf_find_kernel_btf();
- if (IS_ERR(obj->btf_vmlinux)) {
- err = PTR_ERR(obj->btf_vmlinux);
- pr_warn("Error loading vmlinux BTF: %d\n", err);
- obj->btf_vmlinux = NULL;
- return err;
- }
- return 0;
+ need_vmlinux_btf = true;
+ break;
}
}
+ if (!need_vmlinux_btf)
+ return 0;
+
+ obj->btf_vmlinux = libbpf_find_kernel_btf();
+ if (IS_ERR(obj->btf_vmlinux)) {
+ err = PTR_ERR(obj->btf_vmlinux);
+ pr_warn("Error loading vmlinux BTF: %d\n", err);
+ obj->btf_vmlinux = NULL;
+ return err;
+ }
return 0;
}
static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
{
+ struct btf *kern_btf = obj->btf;
+ bool btf_mandatory, sanitize;
int err = 0;
if (!obj->btf)
return 0;
- bpf_object__sanitize_btf(obj);
- bpf_object__sanitize_btf_ext(obj);
+ if (!kernel_supports(FEAT_BTF)) {
+ if (kernel_needs_btf(obj)) {
+ err = -EOPNOTSUPP;
+ goto report;
+ }
+ pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
+ return 0;
+ }
+
+ sanitize = btf_needs_sanitization(obj);
+ if (sanitize) {
+ const void *raw_data;
+ __u32 sz;
- err = btf__load(obj->btf);
- if (err) {
- pr_warn("Error loading %s into kernel: %d.\n",
- BTF_ELF_SEC, err);
- btf__free(obj->btf);
- obj->btf = NULL;
- /* btf_ext can't exist without btf, so free it as well */
- if (obj->btf_ext) {
- btf_ext__free(obj->btf_ext);
- obj->btf_ext = NULL;
+ /* clone BTF to sanitize a copy and leave the original intact */
+ raw_data = btf__get_raw_data(obj->btf, &sz);
+ kern_btf = btf__new(raw_data, sz);
+ if (IS_ERR(kern_btf))
+ return PTR_ERR(kern_btf);
+
+ /* enforce 8-byte pointers for BPF-targeted BTFs */
+ btf__set_pointer_size(obj->btf, 8);
+ bpf_object__sanitize_btf(obj, kern_btf);
+ }
+
+ err = btf__load(kern_btf);
+ if (sanitize) {
+ if (!err) {
+ /* move fd to libbpf's BTF */
+ btf__set_fd(obj->btf, btf__fd(kern_btf));
+ btf__set_fd(kern_btf, -1);
}
+ btf__free(kern_btf);
+ }
+report:
+ if (err) {
+ btf_mandatory = kernel_needs_btf(obj);
+ pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
+ btf_mandatory ? "BTF is mandatory, can't proceed."
+ : "BTF is optional, ignoring.");
+ if (!btf_mandatory)
+ err = 0;
+ }
+ return err;
+}
- if (kernel_needs_btf(obj))
- return err;
+static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
+{
+ const char *name;
+
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
+ if (!name) {
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+ off, obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return name;
+}
+
+static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
+{
+ const char *name;
+
+ name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
+ if (!name) {
+ pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
+ off, obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return name;
+}
+
+static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
+{
+ Elf_Scn *scn;
+
+ scn = elf_getscn(obj->efile.elf, idx);
+ if (!scn) {
+ pr_warn("elf: failed to get section(%zu) from %s: %s\n",
+ idx, obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+ return scn;
+}
+
+static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
+{
+ Elf_Scn *scn = NULL;
+ Elf *elf = obj->efile.elf;
+ const char *sec_name;
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ sec_name = elf_sec_name(obj, scn);
+ if (!sec_name)
+ return NULL;
+
+ if (strcmp(sec_name, name) != 0)
+ continue;
+
+ return scn;
+ }
+ return NULL;
+}
+
+static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr)
+{
+ if (!scn)
+ return -EINVAL;
+
+ if (gelf_getshdr(scn, hdr) != hdr) {
+ pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+ return -EINVAL;
}
+
return 0;
}
+static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
+{
+ const char *name;
+ GElf_Shdr sh;
+
+ if (!scn)
+ return NULL;
+
+ if (elf_sec_hdr(obj, scn, &sh))
+ return NULL;
+
+ name = elf_sec_str(obj, sh.sh_name);
+ if (!name) {
+ pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
+ elf_ndxscn(scn), obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return name;
+}
+
+static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
+{
+ Elf_Data *data;
+
+ if (!scn)
+ return NULL;
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
+ elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
+ obj->path, elf_errmsg(-1));
+ return NULL;
+ }
+
+ return data;
+}
+
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
+ size_t off, __u32 sym_type, GElf_Sym *sym)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ size_t n = symbols->d_size / sizeof(GElf_Sym);
+ int i;
+
+ for (i = 0; i < n; i++) {
+ if (!gelf_getsym(symbols, i, sym))
+ continue;
+ if (sym->st_shndx != sec_idx || sym->st_value != off)
+ continue;
+ if (GELF_ST_TYPE(sym->st_info) != sym_type)
+ continue;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static bool is_sec_name_dwarf(const char *name)
+{
+ /* approximation, but the actual list is too long */
+ return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+}
+
+static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
+{
+ /* no special handling of .strtab */
+ if (hdr->sh_type == SHT_STRTAB)
+ return true;
+
+ /* ignore .llvm_addrsig section as well */
+ if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
+ return true;
+
+ /* no subprograms will lead to an empty .text section, ignore it */
+ if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
+ strcmp(name, ".text") == 0)
+ return true;
+
+ /* DWARF sections */
+ if (is_sec_name_dwarf(name))
+ return true;
+
+ if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+ name += sizeof(".rel") - 1;
+ /* DWARF section relocations */
+ if (is_sec_name_dwarf(name))
+ return true;
+
+ /* .BTF and .BTF.ext don't need relocations */
+ if (strcmp(name, BTF_ELF_SEC) == 0 ||
+ strcmp(name, BTF_EXT_ELF_SEC) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+static int cmp_progs(const void *_a, const void *_b)
+{
+ const struct bpf_program *a = _a;
+ const struct bpf_program *b = _b;
+
+ if (a->sec_idx != b->sec_idx)
+ return a->sec_idx < b->sec_idx ? -1 : 1;
+
+ /* sec_insn_off can't be the same within the section */
+ return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
+}
+
static int bpf_object__elf_collect(struct bpf_object *obj)
{
Elf *elf = obj->efile.elf;
- GElf_Ehdr *ep = &obj->efile.ehdr;
Elf_Data *btf_ext_data = NULL;
Elf_Data *btf_data = NULL;
- Elf_Scn *scn = NULL;
int idx = 0, err = 0;
+ const char *name;
+ Elf_Data *data;
+ Elf_Scn *scn;
+ GElf_Shdr sh;
- /* Elf is corrupted/truncated, avoid calling elf_strptr. */
- if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
- pr_warn("failed to get e_shstrndx from %s\n", obj->path);
- return -LIBBPF_ERRNO__FORMAT;
+ /* a bunch of ELF parsing functionality depends on processing symbols,
+ * so do the first pass and find the symbol table
+ */
+ scn = NULL;
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ if (elf_sec_hdr(obj, scn, &sh))
+ return -LIBBPF_ERRNO__FORMAT;
+
+ if (sh.sh_type == SHT_SYMTAB) {
+ if (obj->efile.symbols) {
+ pr_warn("elf: multiple symbol tables in %s\n", obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ data = elf_sec_data(obj, scn);
+ if (!data)
+ return -LIBBPF_ERRNO__FORMAT;
+
+ obj->efile.symbols = data;
+ obj->efile.symbols_shndx = elf_ndxscn(scn);
+ obj->efile.strtabidx = sh.sh_link;
+ }
}
+ scn = NULL;
while ((scn = elf_nextscn(elf, scn)) != NULL) {
- char *name;
- GElf_Shdr sh;
- Elf_Data *data;
-
idx++;
- if (gelf_getshdr(scn, &sh) != &sh) {
- pr_warn("failed to get section(%d) header from %s\n",
- idx, obj->path);
+
+ if (elf_sec_hdr(obj, scn, &sh))
return -LIBBPF_ERRNO__FORMAT;
- }
- name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
- if (!name) {
- pr_warn("failed to get section(%d) name from %s\n",
- idx, obj->path);
+ name = elf_sec_str(obj, sh.sh_name);
+ if (!name)
return -LIBBPF_ERRNO__FORMAT;
- }
- data = elf_getdata(scn, 0);
- if (!data) {
- pr_warn("failed to get section(%d) data from %s(%s)\n",
- idx, name, obj->path);
+ if (ignore_elf_section(&sh, name))
+ continue;
+
+ data = elf_sec_data(obj, scn);
+ if (!data)
return -LIBBPF_ERRNO__FORMAT;
- }
- pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+
+ pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
idx, name, (unsigned long)data->d_size,
(int)sh.sh_link, (unsigned long)sh.sh_flags,
(int)sh.sh_type);
if (strcmp(name, "license") == 0) {
- err = bpf_object__init_license(obj,
- data->d_buf,
- data->d_size);
+ err = bpf_object__init_license(obj, data->d_buf, data->d_size);
if (err)
return err;
} else if (strcmp(name, "version") == 0) {
- err = bpf_object__init_kversion(obj,
- data->d_buf,
- data->d_size);
+ err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
if (err)
return err;
} else if (strcmp(name, "maps") == 0) {
@@ -2505,31 +2882,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
btf_ext_data = data;
} else if (sh.sh_type == SHT_SYMTAB) {
- if (obj->efile.symbols) {
- pr_warn("bpf: multiple SYMTAB in %s\n",
- obj->path);
- return -LIBBPF_ERRNO__FORMAT;
- }
- obj->efile.symbols = data;
- obj->efile.symbols_shndx = idx;
- obj->efile.strtabidx = sh.sh_link;
+ /* already processed during the first pass above */
} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
if (sh.sh_flags & SHF_EXECINSTR) {
if (strcmp(name, ".text") == 0)
obj->efile.text_shndx = idx;
- err = bpf_object__add_program(obj, data->d_buf,
- data->d_size,
- name, idx);
- if (err) {
- char errmsg[STRERR_BUFSIZE];
- char *cp;
-
- cp = libbpf_strerror_r(-err, errmsg,
- sizeof(errmsg));
- pr_warn("failed to alloc program %s (%s): %s",
- name, obj->path, cp);
+ err = bpf_object__add_programs(obj, data, name, idx);
+ if (err)
return err;
- }
} else if (strcmp(name, DATA_SEC) == 0) {
obj->efile.data = data;
obj->efile.data_shndx = idx;
@@ -2540,7 +2900,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
obj->efile.st_ops_data = data;
obj->efile.st_ops_shndx = idx;
} else {
- pr_debug("skip section(%d) %s\n", idx, name);
+ pr_info("elf: skipping unrecognized data section(%d) %s\n",
+ idx, name);
}
} else if (sh.sh_type == SHT_REL) {
int nr_sects = obj->efile.nr_reloc_sects;
@@ -2549,37 +2910,42 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
/* Only do relo for section with exec instructions */
if (!section_have_execinstr(obj, sec) &&
- strcmp(name, ".rel" STRUCT_OPS_SEC)) {
- pr_debug("skip relo %s(%d) for section(%d)\n",
- name, idx, sec);
+ strcmp(name, ".rel" STRUCT_OPS_SEC) &&
+ strcmp(name, ".rel" MAPS_ELF_SEC)) {
+ pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
+ idx, name, sec,
+ elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");
continue;
}
- sects = reallocarray(sects, nr_sects + 1,
- sizeof(*obj->efile.reloc_sects));
- if (!sects) {
- pr_warn("reloc_sects realloc failed\n");
+ sects = libbpf_reallocarray(sects, nr_sects + 1,
+ sizeof(*obj->efile.reloc_sects));
+ if (!sects)
return -ENOMEM;
- }
obj->efile.reloc_sects = sects;
obj->efile.nr_reloc_sects++;
obj->efile.reloc_sects[nr_sects].shdr = sh;
obj->efile.reloc_sects[nr_sects].data = data;
- } else if (sh.sh_type == SHT_NOBITS &&
- strcmp(name, BSS_SEC) == 0) {
+ } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
obj->efile.bss = data;
obj->efile.bss_shndx = idx;
} else {
- pr_debug("skip section(%d) %s\n", idx, name);
+ pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
+ (size_t)sh.sh_size);
}
}
if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
- pr_warn("Corrupted ELF file: index of strtab invalid\n");
+ pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
+
+ /* sort BPF programs by section name and in-section instruction offset
+ * for faster search */
+ qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
+
return bpf_object__init_btf(obj, btf_data, btf_ext_data);
}
@@ -2621,8 +2987,33 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
return -ENOENT;
}
-static enum extern_type find_extern_type(const struct btf *btf, int id,
- bool *is_signed)
+static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
+ const struct btf_var_secinfo *vs;
+ const struct btf_type *t;
+ int i, j, n;
+
+ if (!btf)
+ return -ESRCH;
+
+ n = btf__get_nr_types(btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ vs = btf_var_secinfos(t);
+ for (j = 0; j < btf_vlen(t); j++, vs++) {
+ if (vs->type == ext_btf_id)
+ return i;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
+ bool *is_signed)
{
const struct btf_type *t;
const char *name;
@@ -2637,29 +3028,29 @@ static enum extern_type find_extern_type(const struct btf *btf, int id,
int enc = btf_int_encoding(t);
if (enc & BTF_INT_BOOL)
- return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN;
+ return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
if (is_signed)
*is_signed = enc & BTF_INT_SIGNED;
if (t->size == 1)
- return EXT_CHAR;
+ return KCFG_CHAR;
if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
- return EXT_UNKNOWN;
- return EXT_INT;
+ return KCFG_UNKNOWN;
+ return KCFG_INT;
}
case BTF_KIND_ENUM:
if (t->size != 4)
- return EXT_UNKNOWN;
+ return KCFG_UNKNOWN;
if (strcmp(name, "libbpf_tristate"))
- return EXT_UNKNOWN;
- return EXT_TRISTATE;
+ return KCFG_UNKNOWN;
+ return KCFG_TRISTATE;
case BTF_KIND_ARRAY:
if (btf_array(t)->nelems == 0)
- return EXT_UNKNOWN;
- if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR)
- return EXT_UNKNOWN;
- return EXT_CHAR_ARR;
+ return KCFG_UNKNOWN;
+ if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
+ return KCFG_UNKNOWN;
+ return KCFG_CHAR_ARR;
default:
- return EXT_UNKNOWN;
+ return KCFG_UNKNOWN;
}
}
@@ -2668,37 +3059,58 @@ static int cmp_externs(const void *_a, const void *_b)
const struct extern_desc *a = _a;
const struct extern_desc *b = _b;
- /* descending order by alignment requirements */
- if (a->align != b->align)
- return a->align > b->align ? -1 : 1;
- /* ascending order by size, within same alignment class */
- if (a->sz != b->sz)
- return a->sz < b->sz ? -1 : 1;
+ if (a->type != b->type)
+ return a->type < b->type ? -1 : 1;
+
+ if (a->type == EXT_KCFG) {
+ /* descending order by alignment requirements */
+ if (a->kcfg.align != b->kcfg.align)
+ return a->kcfg.align > b->kcfg.align ? -1 : 1;
+ /* ascending order by size, within same alignment class */
+ if (a->kcfg.sz != b->kcfg.sz)
+ return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
+ }
+
/* resolve ties by name */
return strcmp(a->name, b->name);
}
+static int find_int_btf_id(const struct btf *btf)
+{
+ const struct btf_type *t;
+ int i, n;
+
+ n = btf__get_nr_types(btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(btf, i);
+
+ if (btf_is_int(t) && btf_int_bits(t) == 32)
+ return i;
+ }
+
+ return 0;
+}
+
static int bpf_object__collect_externs(struct bpf_object *obj)
{
+ struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
const struct btf_type *t;
struct extern_desc *ext;
- int i, n, off, btf_id;
- struct btf_type *sec;
- const char *ext_name;
+ int i, n, off;
+ const char *ext_name, *sec_name;
Elf_Scn *scn;
GElf_Shdr sh;
if (!obj->efile.symbols)
return 0;
- scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
- if (!scn)
- return -LIBBPF_ERRNO__FORMAT;
- if (gelf_getshdr(scn, &sh) != &sh)
+ scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
+ if (elf_sec_hdr(obj, scn, &sh))
return -LIBBPF_ERRNO__FORMAT;
- n = sh.sh_size / sh.sh_entsize;
+ n = sh.sh_size / sh.sh_entsize;
pr_debug("looking for externs among %d symbols...\n", n);
+
for (i = 0; i < n; i++) {
GElf_Sym sym;
@@ -2706,13 +3118,12 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -LIBBPF_ERRNO__FORMAT;
if (!sym_is_extern(&sym))
continue;
- ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name);
+ ext_name = elf_sym_str(obj, sym.st_name);
if (!ext_name || !ext_name[0])
continue;
ext = obj->externs;
- ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
+ ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
if (!ext)
return -ENOMEM;
obj->externs = ext;
@@ -2730,22 +3141,44 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
ext->name = btf__name_by_offset(obj->btf, t->name_off);
ext->sym_idx = i;
ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
- ext->sz = btf__resolve_size(obj->btf, t->type);
- if (ext->sz <= 0) {
- pr_warn("failed to resolve size of extern '%s': %d\n",
- ext_name, ext->sz);
- return ext->sz;
- }
- ext->align = btf__align_of(obj->btf, t->type);
- if (ext->align <= 0) {
- pr_warn("failed to determine alignment of extern '%s': %d\n",
- ext_name, ext->align);
- return -EINVAL;
- }
- ext->type = find_extern_type(obj->btf, t->type,
- &ext->is_signed);
- if (ext->type == EXT_UNKNOWN) {
- pr_warn("extern '%s' type is unsupported\n", ext_name);
+
+ ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
+ if (ext->sec_btf_id <= 0) {
+ pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
+ ext_name, ext->btf_id, ext->sec_btf_id);
+ return ext->sec_btf_id;
+ }
+ sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
+ sec_name = btf__name_by_offset(obj->btf, sec->name_off);
+
+ if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+ kcfg_sec = sec;
+ ext->type = EXT_KCFG;
+ ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
+ if (ext->kcfg.sz <= 0) {
+ pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
+ ext_name, ext->kcfg.sz);
+ return ext->kcfg.sz;
+ }
+ ext->kcfg.align = btf__align_of(obj->btf, t->type);
+ if (ext->kcfg.align <= 0) {
+ pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
+ ext_name, ext->kcfg.align);
+ return -EINVAL;
+ }
+ ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
+ &ext->kcfg.is_signed);
+ if (ext->kcfg.type == KCFG_UNKNOWN) {
+ pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
+ return -ENOTSUP;
+ }
+ } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+ ksym_sec = sec;
+ ext->type = EXT_KSYM;
+ skip_mods_and_typedefs(obj->btf, t->type,
+ &ext->ksym.type_id);
+ } else {
+ pr_warn("unrecognized extern section '%s'\n", sec_name);
return -ENOTSUP;
}
}
@@ -2754,57 +3187,81 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
if (!obj->nr_extern)
return 0;
- /* sort externs by (alignment, size, name) and calculate their offsets
- * within a map */
+ /* sort externs by type, for kcfg ones also by (align, size, name) */
qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
- off = 0;
- for (i = 0; i < obj->nr_extern; i++) {
- ext = &obj->externs[i];
- ext->data_off = roundup(off, ext->align);
- off = ext->data_off + ext->sz;
- pr_debug("extern #%d: symbol %d, off %u, name %s\n",
- i, ext->sym_idx, ext->data_off, ext->name);
- }
- btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC);
- if (btf_id <= 0) {
- pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC);
- return -ESRCH;
- }
+ /* for .ksyms section, we need to turn all externs into allocated
+ * variables in BTF to pass kernel verification; we do this by
+ * pretending that each extern is a 8-byte variable
+ */
+ if (ksym_sec) {
+ /* find existing 4-byte integer type in BTF to use for fake
+ * extern variables in DATASEC
+ */
+ int int_btf_id = find_int_btf_id(obj->btf);
- sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id);
- sec->size = off;
- n = btf_vlen(sec);
- for (i = 0; i < n; i++) {
- struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
-
- t = btf__type_by_id(obj->btf, vs->type);
- ext_name = btf__name_by_offset(obj->btf, t->name_off);
- ext = find_extern_by_name(obj, ext_name);
- if (!ext) {
- pr_warn("failed to find extern definition for BTF var '%s'\n",
- ext_name);
- return -ESRCH;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KSYM)
+ continue;
+ pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
+ i, ext->sym_idx, ext->name);
}
- vs->offset = ext->data_off;
- btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
- }
- return 0;
-}
+ sec = ksym_sec;
+ n = btf_vlen(sec);
+ for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+ struct btf_type *vt;
-static struct bpf_program *
-bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
-{
- struct bpf_program *prog;
- size_t i;
+ vt = (void *)btf__type_by_id(obj->btf, vs->type);
+ ext_name = btf__name_by_offset(obj->btf, vt->name_off);
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
+ ext_name);
+ return -ESRCH;
+ }
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vt->type = int_btf_id;
+ vs->offset = off;
+ vs->size = sizeof(int);
+ }
+ sec->size = off;
+ }
- for (i = 0; i < obj->nr_programs; i++) {
- prog = &obj->programs[i];
- if (prog->idx == idx)
- return prog;
+ if (kcfg_sec) {
+ sec = kcfg_sec;
+ /* for kcfg externs calculate their offsets within a .kconfig map */
+ off = 0;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KCFG)
+ continue;
+
+ ext->kcfg.data_off = roundup(off, ext->kcfg.align);
+ off = ext->kcfg.data_off + ext->kcfg.sz;
+ pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
+ i, ext->sym_idx, ext->kcfg.data_off, ext->name);
+ }
+ sec->size = off;
+ n = btf_vlen(sec);
+ for (i = 0; i < n; i++) {
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+
+ t = btf__type_by_id(obj->btf, vs->type);
+ ext_name = btf__name_by_offset(obj->btf, t->name_off);
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
+ ext_name);
+ return -ESRCH;
+ }
+ btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vs->offset = ext->kcfg.data_off;
+ }
}
- return NULL;
+ return 0;
}
struct bpf_program *
@@ -2814,12 +3271,18 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
struct bpf_program *pos;
bpf_object__for_each_program(pos, obj) {
- if (pos->section_name && !strcmp(pos->section_name, title))
+ if (pos->sec_name && !strcmp(pos->sec_name, title))
return pos;
}
return NULL;
}
+static bool prog_is_subprog(const struct bpf_object *obj,
+ const struct bpf_program *prog)
+{
+ return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls;
+}
+
struct bpf_program *
bpf_object__find_program_by_name(const struct bpf_object *obj,
const char *name)
@@ -2827,6 +3290,8 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,
struct bpf_program *prog;
bpf_object__for_each_program(prog, obj) {
+ if (prog_is_subprog(obj, prog))
+ continue;
if (!strcmp(prog->name, name))
return prog;
}
@@ -2865,7 +3330,7 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
static int bpf_program__record_reloc(struct bpf_program *prog,
struct reloc_desc *reloc_desc,
- __u32 insn_idx, const char *name,
+ __u32 insn_idx, const char *sym_name,
const GElf_Sym *sym, const GElf_Rel *rel)
{
struct bpf_insn *insn = &prog->insns[insn_idx];
@@ -2873,34 +3338,38 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
struct bpf_object *obj = prog->obj;
__u32 shdr_idx = sym->st_shndx;
enum libbpf_map_type type;
+ const char *sym_sec_name;
struct bpf_map *map;
+ reloc_desc->processed = false;
+
/* sub-program call relocation */
if (insn->code == (BPF_JMP | BPF_CALL)) {
if (insn->src_reg != BPF_PSEUDO_CALL) {
- pr_warn("incorrect bpf_call opcode\n");
+ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
return -LIBBPF_ERRNO__RELOC;
}
/* text_shndx can be 0, if no default "main" program exists */
if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
- pr_warn("bad call relo against section %u\n", shdr_idx);
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+ pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
+ prog->name, sym_name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
- if (sym->st_value % 8) {
- pr_warn("bad call relo offset: %zu\n",
- (size_t)sym->st_value);
+ if (sym->st_value % BPF_INSN_SZ) {
+ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+ prog->name, sym_name, (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_CALL;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = sym->st_value;
- obj->has_pseudo_calls = true;
return 0;
}
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
- pr_warn("invalid relo for insns[%d].code 0x%x\n",
- insn_idx, insn->code);
+ pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
+ prog->name, sym_name, insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
}
@@ -2915,31 +3384,32 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
break;
}
if (i >= n) {
- pr_warn("extern relo failed to find extern for sym %d\n",
- sym_idx);
+ pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
+ prog->name, sym_name, sym_idx);
return -LIBBPF_ERRNO__RELOC;
}
- pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n",
- i, ext->name, ext->sym_idx, ext->data_off, insn_idx);
+ pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
+ prog->name, i, ext->name, ext->sym_idx, insn_idx);
reloc_desc->type = RELO_EXTERN;
reloc_desc->insn_idx = insn_idx;
- reloc_desc->sym_off = ext->data_off;
+ reloc_desc->sym_off = i; /* sym_off stores extern index */
return 0;
}
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
- pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
- name, shdr_idx);
+ pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
+ prog->name, sym_name, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
/* generic map reference relocation */
if (type == LIBBPF_MAP_UNSPEC) {
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
- pr_warn("bad map relo against section %u\n",
- shdr_idx);
+ pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
+ prog->name, sym_name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
@@ -2948,14 +3418,14 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
map->sec_idx != sym->st_shndx ||
map->sec_offset != sym->st_value)
continue;
- pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n",
- map_idx, map->name, map->sec_idx,
+ pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
+ prog->name, map_idx, map->name, map->sec_idx,
map->sec_offset, insn_idx);
break;
}
if (map_idx >= nr_maps) {
- pr_warn("map relo failed to find map for sec %u, off %zu\n",
- shdr_idx, (size_t)sym->st_value);
+ pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
+ prog->name, sym_sec_name, (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_LD64;
@@ -2967,21 +3437,22 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
/* global data map relocation */
if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
- pr_warn("bad data relo against section %u\n", shdr_idx);
+ pr_warn("prog '%s': bad data relo against section '%s'\n",
+ prog->name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
map = &obj->maps[map_idx];
if (map->libbpf_type != type)
continue;
- pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n",
- map_idx, map->name, map->sec_idx, map->sec_offset,
- insn_idx);
+ pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
+ prog->name, map_idx, map->name, map->sec_idx,
+ map->sec_offset, insn_idx);
break;
}
if (map_idx >= nr_maps) {
- pr_warn("data relo failed to find map for sec %u\n",
- shdr_idx);
+ pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
+ prog->name, sym_sec_name);
return -LIBBPF_ERRNO__RELOC;
}
@@ -2992,55 +3463,113 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return 0;
}
+static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
+{
+ return insn_idx >= prog->sec_insn_off &&
+ insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
+}
+
+static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
+ size_t sec_idx, size_t insn_idx)
+{
+ int l = 0, r = obj->nr_programs - 1, m;
+ struct bpf_program *prog;
+
+ while (l < r) {
+ m = l + (r - l + 1) / 2;
+ prog = &obj->programs[m];
+
+ if (prog->sec_idx < sec_idx ||
+ (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
+ l = m;
+ else
+ r = m - 1;
+ }
+ /* matching program could be at index l, but it still might be the
+ * wrong one, so we need to double check conditions for the last time
+ */
+ prog = &obj->programs[l];
+ if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
+ return prog;
+ return NULL;
+}
+
static int
-bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
- Elf_Data *data, struct bpf_object *obj)
+bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
{
Elf_Data *symbols = obj->efile.symbols;
+ const char *relo_sec_name, *sec_name;
+ size_t sec_idx = shdr->sh_info;
+ struct bpf_program *prog;
+ struct reloc_desc *relos;
int err, i, nrels;
+ const char *sym_name;
+ __u32 insn_idx;
+ GElf_Sym sym;
+ GElf_Rel rel;
- pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
- nrels = shdr->sh_size / shdr->sh_entsize;
+ relo_sec_name = elf_sec_str(obj, shdr->sh_name);
+ sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ if (!relo_sec_name || !sec_name)
+ return -EINVAL;
- prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
- if (!prog->reloc_desc) {
- pr_warn("failed to alloc memory in relocation\n");
- return -ENOMEM;
- }
- prog->nr_reloc = nrels;
+ pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
+ relo_sec_name, sec_idx, sec_name);
+ nrels = shdr->sh_size / shdr->sh_entsize;
for (i = 0; i < nrels; i++) {
- const char *name;
- __u32 insn_idx;
- GElf_Sym sym;
- GElf_Rel rel;
-
if (!gelf_getrel(data, i, &rel)) {
- pr_warn("relocation: failed to get %d reloc\n", i);
+ pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
return -LIBBPF_ERRNO__FORMAT;
}
if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
- pr_warn("relocation: symbol %"PRIx64" not found\n",
- GELF_R_SYM(rel.r_info));
+ pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (rel.r_offset % sizeof(struct bpf_insn))
+ if (rel.r_offset % BPF_INSN_SZ) {
+ pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
+ relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
+ }
- insn_idx = rel.r_offset / sizeof(struct bpf_insn);
- name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name) ? : "<?>";
+ insn_idx = rel.r_offset / BPF_INSN_SZ;
+ /* relocations against static functions are recorded as
+ * relocations against the section that contains a function;
+ * in such case, symbol will be STT_SECTION and sym.st_name
+ * will point to empty string (0), so fetch section name
+ * instead
+ */
+ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
+ sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
+ else
+ sym_name = elf_sym_str(obj, sym.st_name);
+ sym_name = sym_name ?: "<?";
- pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
- (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
- (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info),
- GELF_ST_BIND(sym.st_info), sym.st_name, name,
- insn_idx);
+ pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
+ relo_sec_name, i, insn_idx, sym_name);
- err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
- insn_idx, name, &sym, &rel);
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+ if (!prog) {
+ pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
+ relo_sec_name, i, sec_name, insn_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ relos = libbpf_reallocarray(prog->reloc_desc,
+ prog->nr_reloc + 1, sizeof(*relos));
+ if (!relos)
+ return -ENOMEM;
+ prog->reloc_desc = relos;
+
+ /* adjust insn_idx to local BPF program frame of reference */
+ insn_idx -= prog->sec_insn_off;
+ err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
+ insn_idx, sym_name, &sym, &rel);
if (err)
return err;
+
+ prog->nr_reloc++;
}
return 0;
}
@@ -3134,22 +3663,29 @@ err_free_new_name:
return err;
}
-int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+__u32 bpf_map__max_entries(const struct bpf_map *map)
{
- if (!map || !max_entries)
- return -EINVAL;
+ return map->def.max_entries;
+}
- /* If map already created, its attributes can't be changed. */
+int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
+{
if (map->fd >= 0)
return -EBUSY;
-
map->def.max_entries = max_entries;
-
return 0;
}
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+{
+ if (!map || !max_entries)
+ return -EINVAL;
+
+ return bpf_map__set_max_entries(map, max_entries);
+}
+
static int
-bpf_object__probe_name(struct bpf_object *obj)
+bpf_object__probe_loading(struct bpf_object *obj)
{
struct bpf_load_program_attr attr;
char *cp, errmsg[STRERR_BUFSIZE];
@@ -3169,27 +3705,48 @@ bpf_object__probe_name(struct bpf_object *obj)
ret = bpf_load_program_xattr(&attr, NULL, 0);
if (ret < 0) {
- cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
- __func__, cp, errno);
- return -errno;
+ ret = errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
+ "program. Make sure your kernel supports BPF "
+ "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
+ "set to big enough value.\n", __func__, cp, ret);
+ return -ret;
}
close(ret);
- /* now try the same program, but with the name */
+ return 0;
+}
+
+static int probe_fd(int fd)
+{
+ if (fd >= 0)
+ close(fd);
+ return fd >= 0;
+}
+static int probe_kern_prog_name(void)
+{
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int ret;
+
+ /* make sure loading with name works */
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
attr.name = "test";
ret = bpf_load_program_xattr(&attr, NULL, 0);
- if (ret >= 0) {
- obj->caps.name = 1;
- close(ret);
- }
-
- return 0;
+ return probe_fd(ret);
}
-static int
-bpf_object__probe_global_data(struct bpf_object *obj)
+static int probe_kern_global_data(void)
{
struct bpf_load_program_attr prg_attr;
struct bpf_create_map_attr map_attr;
@@ -3210,10 +3767,11 @@ bpf_object__probe_global_data(struct bpf_object *obj)
map = bpf_create_map_xattr(&map_attr);
if (map < 0) {
- cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, errno);
- return -errno;
+ __func__, cp, -ret);
+ return ret;
}
insns[0].imm = map;
@@ -3225,16 +3783,23 @@ bpf_object__probe_global_data(struct bpf_object *obj)
prg_attr.license = "GPL";
ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
- if (ret >= 0) {
- obj->caps.global_data = 1;
- close(ret);
- }
-
close(map);
- return 0;
+ return probe_fd(ret);
}
-static int bpf_object__probe_btf_func(struct bpf_object *obj)
+static int probe_kern_btf(void)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_func(void)
{
static const char strs[] = "\0int\0x\0a";
/* void x(int a) {} */
@@ -3247,20 +3812,12 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj)
/* FUNC x */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
};
- int btf_fd;
-
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
- if (btf_fd >= 0) {
- obj->caps.btf_func = 1;
- close(btf_fd);
- return 1;
- }
- return 0;
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
+static int probe_kern_btf_func_global(void)
{
static const char strs[] = "\0int\0x\0a";
/* static void x(int a) {} */
@@ -3273,20 +3830,12 @@ static int bpf_object__probe_btf_func_global(struct bpf_object *obj)
/* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
};
- int btf_fd;
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
- if (btf_fd >= 0) {
- obj->caps.btf_func_global = 1;
- close(btf_fd);
- return 1;
- }
-
- return 0;
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
+static int probe_kern_btf_datasec(void)
{
static const char strs[] = "\0x\0.data";
/* static int a; */
@@ -3300,20 +3849,12 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
BTF_VAR_SECINFO_ENC(2, 0, 4),
};
- int btf_fd;
- btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
- if (btf_fd >= 0) {
- obj->caps.btf_datasec = 1;
- close(btf_fd);
- return 1;
- }
-
- return 0;
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
}
-static int bpf_object__probe_array_mmap(struct bpf_object *obj)
+static int probe_kern_array_mmap(void)
{
struct bpf_create_map_attr attr = {
.map_type = BPF_MAP_TYPE_ARRAY,
@@ -3322,27 +3863,17 @@ static int bpf_object__probe_array_mmap(struct bpf_object *obj)
.value_size = sizeof(int),
.max_entries = 1,
};
- int fd;
- fd = bpf_create_map_xattr(&attr);
- if (fd >= 0) {
- obj->caps.array_mmap = 1;
- close(fd);
- return 1;
- }
-
- return 0;
+ return probe_fd(bpf_create_map_xattr(&attr));
}
-static int
-bpf_object__probe_exp_attach_type(struct bpf_object *obj)
+static int probe_kern_exp_attach_type(void)
{
struct bpf_load_program_attr attr;
struct bpf_insn insns[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- int fd;
memset(&attr, 0, sizeof(attr));
/* use any valid combination of program type and (optional)
@@ -3356,36 +3887,140 @@ bpf_object__probe_exp_attach_type(struct bpf_object *obj)
attr.insns_cnt = ARRAY_SIZE(insns);
attr.license = "GPL";
- fd = bpf_load_program_xattr(&attr, NULL, 0);
- if (fd >= 0) {
- obj->caps.exp_attach_type = 1;
- close(fd);
- return 1;
- }
- return 0;
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
}
-static int
-bpf_object__probe_caps(struct bpf_object *obj)
-{
- int (*probe_fn[])(struct bpf_object *obj) = {
- bpf_object__probe_name,
- bpf_object__probe_global_data,
- bpf_object__probe_btf_func,
- bpf_object__probe_btf_func_global,
- bpf_object__probe_btf_datasec,
- bpf_object__probe_array_mmap,
- bpf_object__probe_exp_attach_type,
+static int probe_kern_probe_read_kernel(void)
+{
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
+ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
+ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+ BPF_EXIT_INSN(),
+ };
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
+
+ return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
+}
+
+static int probe_prog_bind_map(void)
+{
+ struct bpf_load_program_attr prg_attr;
+ struct bpf_create_map_attr map_attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
};
- int i, ret;
+ int ret, map, prog;
- for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
- ret = probe_fn[i](obj);
- if (ret < 0)
- pr_debug("Probe #%d failed with %d.\n", i, ret);
+ memset(&map_attr, 0, sizeof(map_attr));
+ map_attr.map_type = BPF_MAP_TYPE_ARRAY;
+ map_attr.key_size = sizeof(int);
+ map_attr.value_size = 32;
+ map_attr.max_entries = 1;
+
+ map = bpf_create_map_xattr(&map_attr);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
}
- return 0;
+ memset(&prg_attr, 0, sizeof(prg_attr));
+ prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ prg_attr.insns = insns;
+ prg_attr.insns_cnt = ARRAY_SIZE(insns);
+ prg_attr.license = "GPL";
+
+ prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
+ if (prog < 0) {
+ close(map);
+ return 0;
+ }
+
+ ret = bpf_prog_bind_map(prog, map, NULL);
+
+ close(map);
+ close(prog);
+
+ return ret >= 0;
+}
+
+enum kern_feature_result {
+ FEAT_UNKNOWN = 0,
+ FEAT_SUPPORTED = 1,
+ FEAT_MISSING = 2,
+};
+
+typedef int (*feature_probe_fn)(void);
+
+static struct kern_feature_desc {
+ const char *desc;
+ feature_probe_fn probe;
+ enum kern_feature_result res;
+} feature_probes[__FEAT_CNT] = {
+ [FEAT_PROG_NAME] = {
+ "BPF program name", probe_kern_prog_name,
+ },
+ [FEAT_GLOBAL_DATA] = {
+ "global variables", probe_kern_global_data,
+ },
+ [FEAT_BTF] = {
+ "minimal BTF", probe_kern_btf,
+ },
+ [FEAT_BTF_FUNC] = {
+ "BTF functions", probe_kern_btf_func,
+ },
+ [FEAT_BTF_GLOBAL_FUNC] = {
+ "BTF global function", probe_kern_btf_func_global,
+ },
+ [FEAT_BTF_DATASEC] = {
+ "BTF data section and variable", probe_kern_btf_datasec,
+ },
+ [FEAT_ARRAY_MMAP] = {
+ "ARRAY map mmap()", probe_kern_array_mmap,
+ },
+ [FEAT_EXP_ATTACH_TYPE] = {
+ "BPF_PROG_LOAD expected_attach_type attribute",
+ probe_kern_exp_attach_type,
+ },
+ [FEAT_PROBE_READ_KERN] = {
+ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+ },
+ [FEAT_PROG_BIND_MAP] = {
+ "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
+ }
+};
+
+static bool kernel_supports(enum kern_feature_id feat_id)
+{
+ struct kern_feature_desc *feat = &feature_probes[feat_id];
+ int ret;
+
+ if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
+ ret = feat->probe();
+ if (ret > 0) {
+ WRITE_ONCE(feat->res, FEAT_SUPPORTED);
+ } else if (ret == 0) {
+ WRITE_ONCE(feat->res, FEAT_MISSING);
+ } else {
+ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+ WRITE_ONCE(feat->res, FEAT_MISSING);
+ }
+ }
+
+ return READ_ONCE(feat->res) == FEAT_SUPPORTED;
}
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
@@ -3455,10 +4090,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
char *cp, errmsg[STRERR_BUFSIZE];
int err, zero = 0;
- /* kernel already zero-initializes .bss map. */
- if (map_type == LIBBPF_MAP_BSS)
- return 0;
-
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
if (err) {
err = -errno;
@@ -3482,193 +4113,194 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
return 0;
}
-static int
-bpf_object__create_maps(struct bpf_object *obj)
-{
- struct bpf_create_map_attr create_attr = {};
- int nr_cpus = 0;
- unsigned int i;
- int err;
-
- for (i = 0; i < obj->nr_maps; i++) {
- struct bpf_map *map = &obj->maps[i];
- struct bpf_map_def *def = &map->def;
- char *cp, errmsg[STRERR_BUFSIZE];
- int *pfd = &map->fd;
+static void bpf_map__destroy(struct bpf_map *map);
- if (map->pin_path) {
- err = bpf_object__reuse_map(map);
- if (err) {
- pr_warn("error reusing pinned map %s\n",
- map->name);
- return err;
- }
- }
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
+{
+ struct bpf_create_map_attr create_attr;
+ struct bpf_map_def *def = &map->def;
- if (map->fd >= 0) {
- pr_debug("skip map create (preset) %s: fd=%d\n",
- map->name, map->fd);
- continue;
- }
+ memset(&create_attr, 0, sizeof(create_attr));
- if (obj->caps.name)
- create_attr.name = map->name;
- create_attr.map_ifindex = map->map_ifindex;
- create_attr.map_type = def->type;
- create_attr.map_flags = def->map_flags;
- create_attr.key_size = def->key_size;
- create_attr.value_size = def->value_size;
- if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
- !def->max_entries) {
- if (!nr_cpus)
- nr_cpus = libbpf_num_possible_cpus();
- if (nr_cpus < 0) {
- pr_warn("failed to determine number of system CPUs: %d\n",
- nr_cpus);
- err = nr_cpus;
- goto err_out;
- }
- pr_debug("map '%s': setting size to %d\n",
- map->name, nr_cpus);
- create_attr.max_entries = nr_cpus;
- } else {
- create_attr.max_entries = def->max_entries;
- }
- create_attr.btf_fd = 0;
- create_attr.btf_key_type_id = 0;
- create_attr.btf_value_type_id = 0;
- if (bpf_map_type__is_map_in_map(def->type) &&
- map->inner_map_fd >= 0)
- create_attr.inner_map_fd = map->inner_map_fd;
- if (bpf_map__is_struct_ops(map))
- create_attr.btf_vmlinux_value_type_id =
- map->btf_vmlinux_value_type_id;
+ if (kernel_supports(FEAT_PROG_NAME))
+ create_attr.name = map->name;
+ create_attr.map_ifindex = map->map_ifindex;
+ create_attr.map_type = def->type;
+ create_attr.map_flags = def->map_flags;
+ create_attr.key_size = def->key_size;
+ create_attr.value_size = def->value_size;
+ create_attr.numa_node = map->numa_node;
- if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
- create_attr.btf_fd = btf__fd(obj->btf);
- create_attr.btf_key_type_id = map->btf_key_type_id;
- create_attr.btf_value_type_id = map->btf_value_type_id;
- }
+ if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
+ int nr_cpus;
- *pfd = bpf_create_map_xattr(&create_attr);
- if (*pfd < 0 && (create_attr.btf_key_type_id ||
- create_attr.btf_value_type_id)) {
- err = -errno;
- cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
- pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
- map->name, cp, err);
- create_attr.btf_fd = 0;
- create_attr.btf_key_type_id = 0;
- create_attr.btf_value_type_id = 0;
- map->btf_key_type_id = 0;
- map->btf_value_type_id = 0;
- *pfd = bpf_create_map_xattr(&create_attr);
+ nr_cpus = libbpf_num_possible_cpus();
+ if (nr_cpus < 0) {
+ pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
+ map->name, nr_cpus);
+ return nr_cpus;
}
+ pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
+ create_attr.max_entries = nr_cpus;
+ } else {
+ create_attr.max_entries = def->max_entries;
+ }
- if (*pfd < 0) {
- size_t j;
+ if (bpf_map__is_struct_ops(map))
+ create_attr.btf_vmlinux_value_type_id =
+ map->btf_vmlinux_value_type_id;
- err = -errno;
-err_out:
- cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
- pr_warn("failed to create map (name: '%s'): %s(%d)\n",
- map->name, cp, err);
- pr_perm_msg(err);
- for (j = 0; j < i; j++)
- zclose(obj->maps[j].fd);
- return err;
- }
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
+ create_attr.btf_fd = btf__fd(obj->btf);
+ create_attr.btf_key_type_id = map->btf_key_type_id;
+ create_attr.btf_value_type_id = map->btf_value_type_id;
+ }
- if (bpf_map__is_internal(map)) {
- err = bpf_object__populate_internal_map(obj, map);
- if (err < 0) {
- zclose(*pfd);
- goto err_out;
- }
- }
+ if (bpf_map_type__is_map_in_map(def->type)) {
+ if (map->inner_map) {
+ int err;
- if (map->pin_path && !map->pinned) {
- err = bpf_map__pin(map, NULL);
+ err = bpf_object__create_map(obj, map->inner_map);
if (err) {
- pr_warn("failed to auto-pin map name '%s' at '%s'\n",
- map->name, map->pin_path);
+ pr_warn("map '%s': failed to create inner map: %d\n",
+ map->name, err);
return err;
}
+ map->inner_map_fd = bpf_map__fd(map->inner_map);
}
+ if (map->inner_map_fd >= 0)
+ create_attr.inner_map_fd = map->inner_map_fd;
+ }
+
+ map->fd = bpf_create_map_xattr(&create_attr);
+ if (map->fd < 0 && (create_attr.btf_key_type_id ||
+ create_attr.btf_value_type_id)) {
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err = -errno;
+
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+ map->name, cp, err);
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = 0;
+ map->fd = bpf_create_map_xattr(&create_attr);
+ }
- pr_debug("created map %s: fd=%d\n", map->name, *pfd);
+ if (map->fd < 0)
+ return -errno;
+
+ if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
+ bpf_map__destroy(map->inner_map);
+ zfree(&map->inner_map);
}
return 0;
}
-static int
-check_btf_ext_reloc_err(struct bpf_program *prog, int err,
- void *btf_prog_info, const char *info_name)
+static int init_map_slots(struct bpf_map *map)
{
- if (err != -ENOENT) {
- pr_warn("Error in loading %s for sec %s.\n",
- info_name, prog->section_name);
- return err;
- }
+ const struct bpf_map *targ_map;
+ unsigned int i;
+ int fd, err;
- /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */
+ for (i = 0; i < map->init_slots_sz; i++) {
+ if (!map->init_slots[i])
+ continue;
- if (btf_prog_info) {
- /*
- * Some info has already been found but has problem
- * in the last btf_ext reloc. Must have to error out.
- */
- pr_warn("Error in relocating %s for sec %s.\n",
- info_name, prog->section_name);
- return err;
+ targ_map = map->init_slots[i];
+ fd = bpf_map__fd(targ_map);
+ err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+ if (err) {
+ err = -errno;
+ pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
+ map->name, i, targ_map->name,
+ fd, err);
+ return err;
+ }
+ pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
+ map->name, i, targ_map->name, fd);
}
- /* Have problem loading the very first info. Ignore the rest. */
- pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n",
- info_name, prog->section_name, info_name);
+ zfree(&map->init_slots);
+ map->init_slots_sz = 0;
+
return 0;
}
static int
-bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
- const char *section_name, __u32 insn_offset)
+bpf_object__create_maps(struct bpf_object *obj)
{
+ struct bpf_map *map;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ unsigned int i, j;
int err;
- if (!insn_offset || prog->func_info) {
- /*
- * !insn_offset => main program
- *
- * For sub prog, the main program's func_info has to
- * be loaded first (i.e. prog->func_info != NULL)
- */
- err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext,
- section_name, insn_offset,
- &prog->func_info,
- &prog->func_info_cnt);
- if (err)
- return check_btf_ext_reloc_err(prog, err,
- prog->func_info,
- "bpf_func_info");
+ for (i = 0; i < obj->nr_maps; i++) {
+ map = &obj->maps[i];
- prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext);
- }
+ if (map->pin_path) {
+ err = bpf_object__reuse_map(map);
+ if (err) {
+ pr_warn("map '%s': error reusing pinned map\n",
+ map->name);
+ goto err_out;
+ }
+ }
- if (!insn_offset || prog->line_info) {
- err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext,
- section_name, insn_offset,
- &prog->line_info,
- &prog->line_info_cnt);
- if (err)
- return check_btf_ext_reloc_err(prog, err,
- prog->line_info,
- "bpf_line_info");
+ if (map->fd >= 0) {
+ pr_debug("map '%s': skipping creation (preset fd=%d)\n",
+ map->name, map->fd);
+ } else {
+ err = bpf_object__create_map(obj, map);
+ if (err)
+ goto err_out;
- prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
+ pr_debug("map '%s': created successfully, fd=%d\n",
+ map->name, map->fd);
+
+ if (bpf_map__is_internal(map)) {
+ err = bpf_object__populate_internal_map(obj, map);
+ if (err < 0) {
+ zclose(map->fd);
+ goto err_out;
+ }
+ }
+
+ if (map->init_slots_sz) {
+ err = init_map_slots(map);
+ if (err < 0) {
+ zclose(map->fd);
+ goto err_out;
+ }
+ }
+ }
+
+ if (map->pin_path && !map->pinned) {
+ err = bpf_map__pin(map, NULL);
+ if (err) {
+ pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
+ map->name, map->pin_path, err);
+ zclose(map->fd);
+ goto err_out;
+ }
+ }
}
return 0;
+
+err_out:
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
+ pr_perm_msg(err);
+ for (j = 0; j < i; j++)
+ zclose(obj->maps[j].fd);
+ return err;
}
#define BPF_CORE_SPEC_MAX_LEN 64
@@ -3684,6 +4316,10 @@ struct bpf_core_spec {
const struct btf *btf;
/* high-level spec: named fields and array indices only */
struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
+ __u32 root_type_id;
+ /* CO-RE relocation kind */
+ enum bpf_core_relo_kind relo_kind;
/* high-level spec length */
int len;
/* raw, low-level spec: 1-to-1 with accessor spec string */
@@ -3714,8 +4350,66 @@ static bool is_flex_arr(const struct btf *btf,
return acc->idx == btf_vlen(t) - 1;
}
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+ case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+ case BPF_FIELD_EXISTS: return "field_exists";
+ case BPF_FIELD_SIGNED: return "signed";
+ case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+ case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+ case BPF_TYPE_ID_LOCAL: return "local_type_id";
+ case BPF_TYPE_ID_TARGET: return "target_type_id";
+ case BPF_TYPE_EXISTS: return "type_exists";
+ case BPF_TYPE_SIZE: return "type_size";
+ case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+ case BPF_ENUMVAL_VALUE: return "enumval_value";
+ default: return "unknown";
+ }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ case BPF_FIELD_BYTE_SIZE:
+ case BPF_FIELD_EXISTS:
+ case BPF_FIELD_SIGNED:
+ case BPF_FIELD_LSHIFT_U64:
+ case BPF_FIELD_RSHIFT_U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_TYPE_ID_LOCAL:
+ case BPF_TYPE_ID_TARGET:
+ case BPF_TYPE_EXISTS:
+ case BPF_TYPE_SIZE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_ENUMVAL_EXISTS:
+ case BPF_ENUMVAL_VALUE:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
- * Turn bpf_field_reloc into a low- and high-level spec representation,
+ * Turn bpf_core_relo into a low- and high-level spec representation,
* validating correctness along the way, as well as calculating resulting
* field bit offset, specified by accessor string. Low-level spec captures
* every single level of nestedness, including traversing anonymous
@@ -3744,10 +4438,17 @@ static bool is_flex_arr(const struct btf *btf,
* - field 'a' access (corresponds to '2' in low-level spec);
* - array element #3 access (corresponds to '3' in low-level spec).
*
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
*/
-static int bpf_core_spec_parse(const struct btf *btf,
+static int bpf_core_parse_spec(const struct btf *btf,
__u32 type_id,
const char *spec_str,
+ enum bpf_core_relo_kind relo_kind,
struct bpf_core_spec *spec)
{
int access_idx, parsed_len, i;
@@ -3762,6 +4463,15 @@ static int bpf_core_spec_parse(const struct btf *btf,
memset(spec, 0, sizeof(*spec));
spec->btf = btf;
+ spec->root_type_id = type_id;
+ spec->relo_kind = relo_kind;
+
+ /* type-based relocations don't have a field access string */
+ if (core_relo_is_type_based(relo_kind)) {
+ if (strcmp(spec_str, "0"))
+ return -EINVAL;
+ return 0;
+ }
/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
while (*spec_str) {
@@ -3778,16 +4488,28 @@ static int bpf_core_spec_parse(const struct btf *btf,
if (spec->raw_len == 0)
return -EINVAL;
- /* first spec value is always reloc type array index */
t = skip_mods_and_typedefs(btf, type_id, &id);
if (!t)
return -EINVAL;
access_idx = spec->raw_spec[0];
- spec->spec[0].type_id = id;
- spec->spec[0].idx = access_idx;
+ acc = &spec->spec[0];
+ acc->type_id = id;
+ acc->idx = access_idx;
spec->len++;
+ if (core_relo_is_enumval_based(relo_kind)) {
+ if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ /* record enumerator name in a first accessor */
+ acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(relo_kind))
+ return -EINVAL;
+
sz = btf__resolve_size(btf, id);
if (sz < 0)
return sz;
@@ -3845,8 +4567,8 @@ static int bpf_core_spec_parse(const struct btf *btf,
return sz;
spec->bit_offset += access_idx * sz * 8;
} else {
- pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
- type_id, spec_str, i, id, btf_kind(t));
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+ type_id, spec_str, i, id, btf_kind_str(t));
return -EINVAL;
}
}
@@ -3896,16 +4618,16 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
{
size_t local_essent_len, targ_essent_len;
const char *local_name, *targ_name;
- const struct btf_type *t;
+ const struct btf_type *t, *local_t;
struct ids_vec *cand_ids;
__u32 *new_ids;
int i, err, n;
- t = btf__type_by_id(local_btf, local_type_id);
- if (!t)
+ local_t = btf__type_by_id(local_btf, local_type_id);
+ if (!local_t)
return ERR_PTR(-EINVAL);
- local_name = btf__name_by_offset(local_btf, t->name_off);
+ local_name = btf__name_by_offset(local_btf, local_t->name_off);
if (str_is_empty(local_name))
return ERR_PTR(-EINVAL);
local_essent_len = bpf_core_essential_name_len(local_name);
@@ -3917,12 +4639,11 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
n = btf__get_nr_types(targ_btf);
for (i = 1; i <= n; i++) {
t = btf__type_by_id(targ_btf, i);
- targ_name = btf__name_by_offset(targ_btf, t->name_off);
- if (str_is_empty(targ_name))
+ if (btf_kind(t) != btf_kind(local_t))
continue;
- t = skip_mods_and_typedefs(targ_btf, i, NULL);
- if (!btf_is_composite(t) && !btf_is_array(t))
+ targ_name = btf__name_by_offset(targ_btf, t->name_off);
+ if (str_is_empty(targ_name))
continue;
targ_essent_len = bpf_core_essential_name_len(targ_name);
@@ -3930,11 +4651,12 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
continue;
if (strncmp(local_name, targ_name, local_essent_len) == 0) {
- pr_debug("[%d] %s: found candidate [%d] %s\n",
- local_type_id, local_name, i, targ_name);
- new_ids = reallocarray(cand_ids->data,
- cand_ids->len + 1,
- sizeof(*cand_ids->data));
+ pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
+ local_type_id, btf_kind_str(local_t),
+ local_name, i, btf_kind_str(t), targ_name);
+ new_ids = libbpf_reallocarray(cand_ids->data,
+ cand_ids->len + 1,
+ sizeof(*cand_ids->data));
if (!new_ids) {
err = -ENOMEM;
goto err_out;
@@ -3949,8 +4671,9 @@ err_out:
return ERR_PTR(err);
}
-/* Check two types for compatibility, skipping const/volatile/restrict and
- * typedefs, to ensure we are relocating compatible entities:
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
* - any two STRUCTs/UNIONs are compatible and can be mixed;
* - any two FWDs are compatible, if their names match (modulo flavor suffix);
* - any two PTRs are always compatible;
@@ -4105,6 +4828,100 @@ static int bpf_core_match_member(const struct btf *local_btf,
return 0;
}
+/* Check local and target types for compatibility. This check is used for
+ * type-based CO-RE relocations and follow slightly different rules than
+ * field-based relocations. This function assumes that root types were already
+ * checked for name match. Beyond that initial root-level name check, names
+ * are completely ignored. Compatibility rules are as follows:
+ * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
+ * kind should match for local and target types (i.e., STRUCT is not
+ * compatible with UNION);
+ * - for ENUMs, the size is ignored;
+ * - for INT, size and signedness are ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - CONST/VOLATILE/RESTRICT modifiers are ignored;
+ * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
+ * - FUNC_PROTOs are compatible if they have compatible signature: same
+ * number of input args and compatible return and argument types.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ int depth = 32; /* max recursion depth */
+
+ /* caller made sure that names match (ignoring flavor suffix) */
+ local_type = btf__type_by_id(local_btf, local_id);
+ targ_type = btf__type_by_id(targ_btf, targ_id);
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+recur:
+ depth--;
+ if (depth < 0)
+ return -EINVAL;
+
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ return 1;
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+ case BTF_KIND_PTR:
+ local_id = local_type->type;
+ targ_id = targ_type->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *local_p = btf_params(local_type);
+ struct btf_param *targ_p = btf_params(targ_type);
+ __u16 local_vlen = btf_vlen(local_type);
+ __u16 targ_vlen = btf_vlen(targ_type);
+ int i, err;
+
+ if (local_vlen != targ_vlen)
+ return 0;
+
+ for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+ skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+ err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
+ if (err <= 0)
+ return err;
+ }
+
+ /* tail recurse for return type check */
+ skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+ skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+ goto recur;
+ }
+ default:
+ pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+ btf_kind_str(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
/*
* Try to match local spec to a target type and, if successful, produce full
* target spec (high-level, low-level + bit offset).
@@ -4120,10 +4937,51 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
memset(targ_spec, 0, sizeof(*targ_spec));
targ_spec->btf = targ_btf;
+ targ_spec->root_type_id = targ_id;
+ targ_spec->relo_kind = local_spec->relo_kind;
+
+ if (core_relo_is_type_based(local_spec->relo_kind)) {
+ return bpf_core_types_are_compat(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
+ }
local_acc = &local_spec->spec[0];
targ_acc = &targ_spec->spec[0];
+ if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+ size_t local_essent_len, targ_essent_len;
+ const struct btf_enum *e;
+ const char *targ_name;
+
+ /* has to resolve to an enum */
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+ if (!btf_is_enum(targ_type))
+ return 0;
+
+ local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+ for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+ targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(local_spec->relo_kind))
+ return -EINVAL;
+
for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
&targ_id);
@@ -4180,22 +5038,42 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
}
static int bpf_core_calc_field_relo(const struct bpf_program *prog,
- const struct bpf_field_reloc *relo,
+ const struct bpf_core_relo *relo,
const struct bpf_core_spec *spec,
- __u32 *val, bool *validate)
+ __u32 *val, __u32 *field_sz, __u32 *type_id,
+ bool *validate)
{
- const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1];
- const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id);
- __u32 byte_off, byte_sz, bit_off, bit_sz;
+ const struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
const struct btf_member *m;
const struct btf_type *mt;
bool bitfield;
__s64 sz;
+ *field_sz = 0;
+
+ if (relo->kind == BPF_FIELD_EXISTS) {
+ *val = spec ? 1 : 0;
+ return 0;
+ }
+
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+
+ acc = &spec->spec[spec->len - 1];
+ t = btf__type_by_id(spec->btf, acc->type_id);
+
/* a[n] accessor needs special handling */
if (!acc->name) {
if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
*val = spec->bit_offset / 8;
+ /* remember field size for load/store mem size */
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *field_sz = sz;
+ *type_id = acc->type_id;
} else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
sz = btf__resolve_size(spec->btf, acc->type_id);
if (sz < 0)
@@ -4203,8 +5081,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
*val = sz;
} else {
pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
- bpf_program__title(prog, false),
- relo->kind, relo->insn_off / 8);
+ prog->name, relo->kind, relo->insn_off / 8);
return -EINVAL;
}
if (validate)
@@ -4213,7 +5090,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
}
m = btf_members(t) + acc->idx;
- mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
+ mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
bit_off = spec->bit_offset;
bit_sz = btf_member_bitfield_size(t, acc->idx);
@@ -4226,15 +5103,14 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
if (byte_sz >= 8) {
/* bitfield can't be read with 64-bit read */
pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
- bpf_program__title(prog, false),
- relo->kind, relo->insn_off / 8);
+ prog->name, relo->kind, relo->insn_off / 8);
return -E2BIG;
}
byte_sz *= 2;
byte_off = bit_off / 8 / byte_sz * byte_sz;
}
} else {
- sz = btf__resolve_size(spec->btf, m->type);
+ sz = btf__resolve_size(spec->btf, field_type_id);
if (sz < 0)
return -EINVAL;
byte_sz = sz;
@@ -4252,6 +5128,10 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
switch (relo->kind) {
case BPF_FIELD_BYTE_OFFSET:
*val = byte_off;
+ if (!bitfield) {
+ *field_sz = byte_sz;
+ *type_id = field_type_id;
+ }
break;
case BPF_FIELD_BYTE_SIZE:
*val = byte_sz;
@@ -4277,117 +5157,384 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
break;
case BPF_FIELD_EXISTS:
default:
- pr_warn("prog '%s': unknown relo %d at insn #%d\n",
- bpf_program__title(prog, false),
- relo->kind, relo->insn_off / 8);
- return -EINVAL;
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ __s64 sz;
+
+ /* type-based relos return zero when target type is not found */
+ if (!spec) {
+ *val = 0;
+ return 0;
+ }
+
+ switch (relo->kind) {
+ case BPF_TYPE_ID_TARGET:
+ *val = spec->root_type_id;
+ break;
+ case BPF_TYPE_EXISTS:
+ *val = 1;
+ break;
+ case BPF_TYPE_SIZE:
+ sz = btf__resolve_size(spec->btf, spec->root_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ break;
+ case BPF_TYPE_ID_LOCAL:
+ /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ const struct btf_type *t;
+ const struct btf_enum *e;
+
+ switch (relo->kind) {
+ case BPF_ENUMVAL_EXISTS:
+ *val = spec ? 1 : 0;
+ break;
+ case BPF_ENUMVAL_VALUE:
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+ t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+ e = btf_enum(t) + spec->spec[0].idx;
+ *val = e->val;
+ break;
+ default:
+ return -EOPNOTSUPP;
}
return 0;
}
+struct bpf_core_relo_res
+{
+ /* expected value in the instruction, unless validate == false */
+ __u32 orig_val;
+ /* new value that needs to be patched up to */
+ __u32 new_val;
+ /* relocation unsuccessful, poison instruction, but don't fail load */
+ bool poison;
+ /* some relocations can't be validated against orig_val */
+ bool validate;
+ /* for field byte offset relocations or the forms:
+ * *(T *)(rX + <off>) = rY
+ * rX = *(T *)(rY + <off>),
+ * we remember original and resolved field size to adjust direct
+ * memory loads of pointers and integers; this is necessary for 32-bit
+ * host kernel architectures, but also allows to automatically
+ * relocate fields that were resized from, e.g., u32 to u64, etc.
+ */
+ bool fail_memsz_adjust;
+ __u32 orig_sz;
+ __u32 orig_type_id;
+ __u32 new_sz;
+ __u32 new_type_id;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct bpf_core_spec *local_spec,
+ const struct bpf_core_spec *targ_spec,
+ struct bpf_core_relo_res *res)
+{
+ int err = -EOPNOTSUPP;
+
+ res->orig_val = 0;
+ res->new_val = 0;
+ res->poison = false;
+ res->validate = true;
+ res->fail_memsz_adjust = false;
+ res->orig_sz = res->new_sz = 0;
+ res->orig_type_id = res->new_type_id = 0;
+
+ if (core_relo_is_field_based(relo->kind)) {
+ err = bpf_core_calc_field_relo(prog, relo, local_spec,
+ &res->orig_val, &res->orig_sz,
+ &res->orig_type_id, &res->validate);
+ err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
+ &res->new_val, &res->new_sz,
+ &res->new_type_id, NULL);
+ if (err)
+ goto done;
+ /* Validate if it's safe to adjust load/store memory size.
+ * Adjustments are performed only if original and new memory
+ * sizes differ.
+ */
+ res->fail_memsz_adjust = false;
+ if (res->orig_sz != res->new_sz) {
+ const struct btf_type *orig_t, *new_t;
+
+ orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+ new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+
+ /* There are two use cases in which it's safe to
+ * adjust load/store's mem size:
+ * - reading a 32-bit kernel pointer, while on BPF
+ * size pointers are always 64-bit; in this case
+ * it's safe to "downsize" instruction size due to
+ * pointer being treated as unsigned integer with
+ * zero-extended upper 32-bits;
+ * - reading unsigned integers, again due to
+ * zero-extension is preserving the value correctly.
+ *
+ * In all other cases it's incorrect to attempt to
+ * load/store field because read value will be
+ * incorrect, so we poison relocated instruction.
+ */
+ if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+ goto done;
+ if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+ btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+ btf_int_encoding(new_t) != BTF_INT_SIGNED)
+ goto done;
+
+ /* mark as invalid mem size adjustment, but this will
+ * only be checked for LDX/STX/ST insns
+ */
+ res->fail_memsz_adjust = true;
+ }
+ } else if (core_relo_is_type_based(relo->kind)) {
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+ } else if (core_relo_is_enumval_based(relo->kind)) {
+ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+ }
+
+done:
+ if (err == -EUCLEAN) {
+ /* EUCLEAN is used to signal instruction poisoning request */
+ res->poison = true;
+ err = 0;
+ } else if (err == -EOPNOTSUPP) {
+ /* EOPNOTSUPP means unknown/unsupported relocation */
+ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+ prog->name, relo_idx, core_relo_kind_str(relo->kind),
+ relo->kind, relo->insn_off / 8);
+ }
+
+ return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
+ int insn_idx, struct bpf_insn *insn)
+{
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+ prog->name, relo_idx, insn_idx);
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with the following message:
+ * invalid func unknown#195896080
+ */
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static bool is_ldimm64(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_DW: return 8;
+ case BPF_W: return 4;
+ case BPF_H: return 2;
+ case BPF_B: return 1;
+ default: return -1;
+ }
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+ switch (sz) {
+ case 8: return BPF_DW;
+ case 4: return BPF_W;
+ case 2: return BPF_H;
+ case 1: return BPF_B;
+ default: return -1;
+ }
+}
+
/*
* Patch relocatable BPF instruction.
*
* Patched value is determined by relocation kind and target specification.
- * For field existence relocation target spec will be NULL if field is not
- * found.
+ * For existence relocations target spec will be NULL if field/type is not found.
* Expected insn->imm value is determined using relocation kind and local
* spec, and is checked before patching instruction. If actual insn->imm value
* is wrong, bail out with error.
*
- * Currently three kinds of BPF instructions are supported:
+ * Currently supported classes of BPF instruction are:
* 1. rX = <imm> (assignment with immediate operand);
* 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
*/
-static int bpf_core_reloc_insn(struct bpf_program *prog,
- const struct bpf_field_reloc *relo,
+static int bpf_core_patch_insn(struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
int relo_idx,
- const struct bpf_core_spec *local_spec,
- const struct bpf_core_spec *targ_spec)
+ const struct bpf_core_relo_res *res)
{
__u32 orig_val, new_val;
struct bpf_insn *insn;
- bool validate = true;
- int insn_idx, err;
+ int insn_idx;
__u8 class;
- if (relo->insn_off % sizeof(struct bpf_insn))
+ if (relo->insn_off % BPF_INSN_SZ)
return -EINVAL;
- insn_idx = relo->insn_off / sizeof(struct bpf_insn);
+ insn_idx = relo->insn_off / BPF_INSN_SZ;
+ /* adjust insn_idx from section frame of reference to the local
+ * program's frame of reference; (sub-)program code is not yet
+ * relocated, so it's enough to just subtract in-section offset
+ */
+ insn_idx = insn_idx - prog->sec_insn_off;
insn = &prog->insns[insn_idx];
class = BPF_CLASS(insn->code);
- if (relo->kind == BPF_FIELD_EXISTS) {
- orig_val = 1; /* can't generate EXISTS relo w/o local field */
- new_val = targ_spec ? 1 : 0;
- } else if (!targ_spec) {
- pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
- bpf_program__title(prog, false), relo_idx, insn_idx);
- insn->code = BPF_JMP | BPF_CALL;
- insn->dst_reg = 0;
- insn->src_reg = 0;
- insn->off = 0;
- /* if this instruction is reachable (not a dead code),
- * verifier will complain with the following message:
- * invalid func unknown#195896080
+ if (res->poison) {
+poison:
+ /* poison second part of ldimm64 to avoid confusing error from
+ * verifier about "unknown opcode 00"
*/
- insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+ if (is_ldimm64(insn))
+ bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
+ bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
return 0;
- } else {
- err = bpf_core_calc_field_relo(prog, relo, local_spec,
- &orig_val, &validate);
- if (err)
- return err;
- err = bpf_core_calc_field_relo(prog, relo, targ_spec,
- &new_val, NULL);
- if (err)
- return err;
}
+ orig_val = res->orig_val;
+ new_val = res->new_val;
+
switch (class) {
case BPF_ALU:
case BPF_ALU64:
if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
- if (validate && insn->imm != orig_val) {
+ if (res->validate && insn->imm != orig_val) {
pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
- bpf_program__title(prog, false), relo_idx,
+ prog->name, relo_idx,
insn_idx, insn->imm, orig_val, new_val);
return -EINVAL;
}
orig_val = insn->imm;
insn->imm = new_val;
pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
- bpf_program__title(prog, false), relo_idx, insn_idx,
+ prog->name, relo_idx, insn_idx,
orig_val, new_val);
break;
case BPF_LDX:
case BPF_ST:
case BPF_STX:
- if (validate && insn->off != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
- bpf_program__title(prog, false), relo_idx,
- insn_idx, insn->off, orig_val, new_val);
+ if (res->validate && insn->off != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
+ prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
return -EINVAL;
}
if (new_val > SHRT_MAX) {
pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
- bpf_program__title(prog, false), relo_idx,
- insn_idx, new_val);
+ prog->name, relo_idx, insn_idx, new_val);
return -ERANGE;
}
+ if (res->fail_memsz_adjust) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+ "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+ prog->name, relo_idx, insn_idx);
+ goto poison;
+ }
+
orig_val = insn->off;
insn->off = new_val;
pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
- bpf_program__title(prog, false), relo_idx, insn_idx,
- orig_val, new_val);
+ prog->name, relo_idx, insn_idx, orig_val, new_val);
+
+ if (res->new_sz != res->orig_sz) {
+ int insn_bytes_sz, insn_bpf_sz;
+
+ insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+ if (insn_bytes_sz != res->orig_sz) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+ prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+ return -EINVAL;
+ }
+
+ insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+ if (insn_bpf_sz < 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+ prog->name, relo_idx, insn_idx, res->new_sz);
+ return -EINVAL;
+ }
+
+ insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+ prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+ }
break;
+ case BPF_LD: {
+ __u64 imm;
+
+ if (!is_ldimm64(insn) ||
+ insn[0].src_reg != 0 || insn[0].off != 0 ||
+ insn_idx + 1 >= prog->insns_cnt ||
+ insn[1].code != 0 || insn[1].dst_reg != 0 ||
+ insn[1].src_reg != 0 || insn[1].off != 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+ prog->name, relo_idx, insn_idx);
+ return -EINVAL;
+ }
+
+ imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+ if (res->validate && imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+ prog->name, relo_idx,
+ insn_idx, (unsigned long long)imm,
+ orig_val, new_val);
+ return -EINVAL;
+ }
+
+ insn[0].imm = new_val;
+ insn[1].imm = 0; /* currently only 32-bit values are supported */
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+ prog->name, relo_idx, insn_idx,
+ (unsigned long long)imm, new_val);
+ break;
+ }
default:
- pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
- bpf_program__title(prog, false), relo_idx,
- insn_idx, insn->code, insn->src_reg, insn->dst_reg,
- insn->off, insn->imm);
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+ prog->name, relo_idx, insn_idx, insn->code,
+ insn->src_reg, insn->dst_reg, insn->off, insn->imm);
return -EINVAL;
}
@@ -4401,29 +5548,48 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
{
const struct btf_type *t;
+ const struct btf_enum *e;
const char *s;
__u32 type_id;
int i;
- type_id = spec->spec[0].type_id;
+ type_id = spec->root_type_id;
t = btf__type_by_id(spec->btf, type_id);
s = btf__name_by_offset(spec->btf, t->name_off);
- libbpf_print(level, "[%u] %s + ", type_id, s);
- for (i = 0; i < spec->raw_len; i++)
- libbpf_print(level, "%d%s", spec->raw_spec[i],
- i == spec->raw_len - 1 ? " => " : ":");
+ libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+ if (core_relo_is_type_based(spec->relo_kind))
+ return;
- libbpf_print(level, "%u.%u @ &x",
- spec->bit_offset / 8, spec->bit_offset % 8);
+ if (core_relo_is_enumval_based(spec->relo_kind)) {
+ t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+ e = btf_enum(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
- for (i = 0; i < spec->len; i++) {
- if (spec->spec[i].name)
- libbpf_print(level, ".%s", spec->spec[i].name);
- else
- libbpf_print(level, "[%u]", spec->spec[i].idx);
+ libbpf_print(level, "::%s = %u", s, e->val);
+ return;
}
+ if (core_relo_is_field_based(spec->relo_kind)) {
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else if (i > 0 || spec->spec[i].idx > 0)
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+ libbpf_print(level, " (");
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+ if (spec->bit_offset % 8)
+ libbpf_print(level, " @ offset %u.%u)",
+ spec->bit_offset / 8, spec->bit_offset % 8);
+ else
+ libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+ return;
+ }
}
static size_t bpf_core_hash_fn(const void *key, void *ctx)
@@ -4487,22 +5653,22 @@ static void *u32_as_hash_key(__u32 x)
* CPU-wise compared to prebuilding a map from all local type names to
* a list of candidate type names. It's also sped up by caching resolved
* list of matching candidates per each local "root" type ID, that has at
- * least one bpf_field_reloc associated with it. This list is shared
+ * least one bpf_core_relo associated with it. This list is shared
* between multiple relocations for the same type ID and is updated as some
* of the candidates are pruned due to structural incompatibility.
*/
-static int bpf_core_reloc_field(struct bpf_program *prog,
- const struct bpf_field_reloc *relo,
- int relo_idx,
- const struct btf *local_btf,
- const struct btf *targ_btf,
- struct hashmap *cand_cache)
+static int bpf_core_apply_relo(struct bpf_program *prog,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ const struct btf *targ_btf,
+ struct hashmap *cand_cache)
{
- const char *prog_name = bpf_program__title(prog, false);
- struct bpf_core_spec local_spec, cand_spec, targ_spec;
+ struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
const void *type_key = u32_as_hash_key(relo->type_id);
- const struct btf_type *local_type, *cand_type;
- const char *local_name, *cand_name;
+ struct bpf_core_relo_res cand_res, targ_res;
+ const struct btf_type *local_type;
+ const char *local_name;
struct ids_vec *cand_ids;
__u32 local_id, cand_id;
const char *spec_str;
@@ -4514,32 +5680,49 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
return -EINVAL;
local_name = btf__name_by_offset(local_btf, local_type->name_off);
- if (str_is_empty(local_name))
+ if (!local_name)
return -EINVAL;
spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
if (str_is_empty(spec_str))
return -EINVAL;
- err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
+ err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
if (err) {
- pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
- prog_name, relo_idx, local_id, local_name, spec_str,
- err);
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
+ str_is_empty(local_name) ? "<anon>" : local_name,
+ spec_str, err);
return -EINVAL;
}
- pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx,
- relo->kind);
+ pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
+ relo_idx, core_relo_kind_str(relo->kind), relo->kind);
bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
libbpf_print(LIBBPF_DEBUG, "\n");
+ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+ if (relo->kind == BPF_TYPE_ID_LOCAL) {
+ targ_res.validate = true;
+ targ_res.poison = false;
+ targ_res.orig_val = local_spec.root_type_id;
+ targ_res.new_val = local_spec.root_type_id;
+ goto patch_insn;
+ }
+
+ /* libbpf doesn't support candidate search for anonymous types */
+ if (str_is_empty(spec_str)) {
+ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+ prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ return -EOPNOTSUPP;
+ }
+
if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
if (IS_ERR(cand_ids)) {
- pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
- prog_name, relo_idx, local_id, local_name,
- PTR_ERR(cand_ids));
+ pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
+ local_name, PTR_ERR(cand_ids));
return PTR_ERR(cand_ids);
}
err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
@@ -4551,36 +5734,51 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
for (i = 0, j = 0; i < cand_ids->len; i++) {
cand_id = cand_ids->data[i];
- cand_type = btf__type_by_id(targ_btf, cand_id);
- cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
-
- err = bpf_core_spec_match(&local_spec, targ_btf,
- cand_id, &cand_spec);
- pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
- prog_name, relo_idx, i, cand_name);
- bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
- libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
+ err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
if (err < 0) {
- pr_warn("prog '%s': relo #%d: matching error: %d\n",
- prog_name, relo_idx, err);
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+ prog->name, relo_idx, i);
+ bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+ libbpf_print(LIBBPF_WARN, ": %d\n", err);
return err;
}
+
+ pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
+ relo_idx, err == 0 ? "non-matching" : "matching", i);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
if (err == 0)
continue;
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+ if (err)
+ return err;
+
if (j == 0) {
+ targ_res = cand_res;
targ_spec = cand_spec;
} else if (cand_spec.bit_offset != targ_spec.bit_offset) {
- /* if there are many candidates, they should all
- * resolve to the same bit offset
+ /* if there are many field relo candidates, they
+ * should all resolve to the same bit offset
*/
- pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
- prog_name, relo_idx, cand_spec.bit_offset,
+ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+ prog->name, relo_idx, cand_spec.bit_offset,
targ_spec.bit_offset);
return -EINVAL;
+ } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+ /* all candidates should result in the same relocation
+ * decision and value, otherwise it's dangerous to
+ * proceed due to ambiguity
+ */
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+ prog->name, relo_idx,
+ cand_res.poison ? "failure" : "success", cand_res.new_val,
+ targ_res.poison ? "failure" : "success", targ_res.new_val);
+ return -EINVAL;
}
- cand_ids->data[j++] = cand_spec.spec[0].type_id;
+ cand_ids->data[j++] = cand_spec.root_type_id;
}
/*
@@ -4599,22 +5797,28 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
* as well as expected case, depending whether instruction w/
* relocation is guarded in some way that makes it unreachable (dead
* code) if relocation can't be resolved. This is handled in
- * bpf_core_reloc_insn() uniformly by replacing that instruction with
+ * bpf_core_patch_insn() uniformly by replacing that instruction with
* BPF helper call insn (using invalid helper ID). If that instruction
* is indeed unreachable, then it will be ignored and eliminated by
* verifier. If it was an error, then verifier will complain and point
* to a specific instruction number in its log.
*/
- if (j == 0)
- pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
- prog_name, relo_idx, local_id, local_name, spec_str);
+ if (j == 0) {
+ pr_debug("prog '%s': relo #%d: no matching targets found\n",
+ prog->name, relo_idx);
+
+ /* calculate single target relo result explicitly */
+ err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
+ if (err)
+ return err;
+ }
- /* bpf_core_reloc_insn should know how to handle missing targ_spec */
- err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec,
- j ? &targ_spec : NULL);
+patch_insn:
+ /* bpf_core_patch_insn() should know how to handle missing targ_spec */
+ err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
if (err) {
pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
- prog_name, relo_idx, relo->insn_off, err);
+ prog->name, relo_idx, relo->insn_off, err);
return -EINVAL;
}
@@ -4622,23 +5826,26 @@ static int bpf_core_reloc_field(struct bpf_program *prog,
}
static int
-bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
{
const struct btf_ext_info_sec *sec;
- const struct bpf_field_reloc *rec;
+ const struct bpf_core_relo *rec;
const struct btf_ext_info *seg;
struct hashmap_entry *entry;
struct hashmap *cand_cache = NULL;
struct bpf_program *prog;
struct btf *targ_btf;
const char *sec_name;
- int i, err = 0;
+ int i, err = 0, insn_idx, sec_idx;
+
+ if (obj->btf_ext->core_relo_info.len == 0)
+ return 0;
if (targ_btf_path)
- targ_btf = btf__parse_elf(targ_btf_path, NULL);
+ targ_btf = btf__parse(targ_btf_path, NULL);
else
- targ_btf = libbpf_find_kernel_btf();
- if (IS_ERR(targ_btf)) {
+ targ_btf = obj->btf_vmlinux;
+ if (IS_ERR_OR_NULL(targ_btf)) {
pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
return PTR_ERR(targ_btf);
}
@@ -4649,37 +5856,63 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
goto out;
}
- seg = &obj->btf_ext->field_reloc_info;
+ seg = &obj->btf_ext->core_relo_info;
for_each_btf_ext_sec(seg, sec) {
sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
if (str_is_empty(sec_name)) {
err = -EINVAL;
goto out;
}
- prog = bpf_object__find_program_by_title(obj, sec_name);
+ /* bpf_object's ELF is gone by now so it's not easy to find
+ * section index by section name, but we can find *any*
+ * bpf_program within desired section name and use it's
+ * prog->sec_idx to do a proper search by section index and
+ * instruction offset
+ */
+ prog = NULL;
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ if (strcmp(prog->sec_name, sec_name) == 0)
+ break;
+ }
if (!prog) {
- pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
- sec_name);
- err = -EINVAL;
- goto out;
+ pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
+ return -ENOENT;
}
+ sec_idx = prog->sec_idx;
- pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
+ pr_debug("sec '%s': found %d CO-RE relocations\n",
sec_name, sec->num_info);
for_each_btf_ext_rec(seg, sec, i, rec) {
- err = bpf_core_reloc_field(prog, rec, i, obj->btf,
- targ_btf, cand_cache);
+ insn_idx = rec->insn_off / BPF_INSN_SZ;
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+ if (!prog) {
+ pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
+ sec_name, insn_idx, i);
+ err = -EINVAL;
+ goto out;
+ }
+ /* no need to apply CO-RE relocation if the program is
+ * not going to be loaded
+ */
+ if (!prog->load)
+ continue;
+
+ err = bpf_core_apply_relo(prog, rec, i, obj->btf,
+ targ_btf, cand_cache);
if (err) {
pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
- sec_name, i, err);
+ prog->name, i, err);
goto out;
}
}
}
out:
- btf__free(targ_btf);
+ /* obj->btf_vmlinux is freed at the end of object load phase */
+ if (targ_btf != obj->btf_vmlinux)
+ btf__free(targ_btf);
if (!IS_ERR_OR_NULL(cand_cache)) {
hashmap__for_each_entry(cand_cache, entry, i) {
bpf_core_free_cands(entry->value);
@@ -4689,118 +5922,432 @@ out:
return err;
}
+/* Relocate data references within program code:
+ * - map references;
+ * - global variable references;
+ * - extern references.
+ */
static int
-bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
{
- int err = 0;
+ int i;
- if (obj->btf_ext->field_reloc_info.len)
- err = bpf_core_reloc_fields(obj, targ_btf_path);
+ for (i = 0; i < prog->nr_reloc; i++) {
+ struct reloc_desc *relo = &prog->reloc_desc[i];
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+ struct extern_desc *ext;
- return err;
+ switch (relo->type) {
+ case RELO_LD64:
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ relo->processed = true;
+ break;
+ case RELO_DATA:
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[1].imm = insn[0].imm + relo->sym_off;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ relo->processed = true;
+ break;
+ case RELO_EXTERN:
+ ext = &obj->externs[relo->sym_off];
+ if (ext->type == EXT_KCFG) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ insn[1].imm = ext->kcfg.data_off;
+ } else /* EXT_KSYM */ {
+ if (ext->ksym.type_id) { /* typed ksyms */
+ insn[0].src_reg = BPF_PSEUDO_BTF_ID;
+ insn[0].imm = ext->ksym.vmlinux_btf_id;
+ } else { /* typeless ksyms */
+ insn[0].imm = (__u32)ext->ksym.addr;
+ insn[1].imm = ext->ksym.addr >> 32;
+ }
+ }
+ relo->processed = true;
+ break;
+ case RELO_CALL:
+ /* will be handled as a follow up pass */
+ break;
+ default:
+ pr_warn("prog '%s': relo #%d: bad relo type %d\n",
+ prog->name, i, relo->type);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
}
-static int
-bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
- struct reloc_desc *relo)
+static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
+ const struct bpf_program *prog,
+ const struct btf_ext_info *ext_info,
+ void **prog_info, __u32 *prog_rec_cnt,
+ __u32 *prog_rec_sz)
{
- struct bpf_insn *insn, *new_insn;
- struct bpf_program *text;
- size_t new_cnt;
- int err;
+ void *copy_start = NULL, *copy_end = NULL;
+ void *rec, *rec_end, *new_prog_info;
+ const struct btf_ext_info_sec *sec;
+ size_t old_sz, new_sz;
+ const char *sec_name;
+ int i, off_adj;
- if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) {
- text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
- if (!text) {
- pr_warn("no .text section found yet relo into text exist\n");
- return -LIBBPF_ERRNO__RELOC;
+ for_each_btf_ext_sec(ext_info, sec) {
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+ if (!sec_name)
+ return -EINVAL;
+ if (strcmp(sec_name, prog->sec_name) != 0)
+ continue;
+
+ for_each_btf_ext_rec(ext_info, sec, i, rec) {
+ __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
+
+ if (insn_off < prog->sec_insn_off)
+ continue;
+ if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
+ break;
+
+ if (!copy_start)
+ copy_start = rec;
+ copy_end = rec + ext_info->rec_size;
}
- new_cnt = prog->insns_cnt + text->insns_cnt;
- new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
- if (!new_insn) {
- pr_warn("oom in prog realloc\n");
+
+ if (!copy_start)
+ return -ENOENT;
+
+ /* append func/line info of a given (sub-)program to the main
+ * program func/line info
+ */
+ old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
+ new_sz = old_sz + (copy_end - copy_start);
+ new_prog_info = realloc(*prog_info, new_sz);
+ if (!new_prog_info)
return -ENOMEM;
- }
- prog->insns = new_insn;
+ *prog_info = new_prog_info;
+ *prog_rec_cnt = new_sz / ext_info->rec_size;
+ memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
+
+ /* Kernel instruction offsets are in units of 8-byte
+ * instructions, while .BTF.ext instruction offsets generated
+ * by Clang are in units of bytes. So convert Clang offsets
+ * into kernel offsets and adjust offset according to program
+ * relocated position.
+ */
+ off_adj = prog->sub_insn_off - prog->sec_insn_off;
+ rec = new_prog_info + old_sz;
+ rec_end = new_prog_info + new_sz;
+ for (; rec < rec_end; rec += ext_info->rec_size) {
+ __u32 *insn_off = rec;
- if (obj->btf_ext) {
- err = bpf_program_reloc_btf_ext(prog, obj,
- text->section_name,
- prog->insns_cnt);
- if (err)
- return err;
+ *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
}
-
- memcpy(new_insn + prog->insns_cnt, text->insns,
- text->insns_cnt * sizeof(*insn));
- prog->main_prog_cnt = prog->insns_cnt;
- prog->insns_cnt = new_cnt;
- pr_debug("added %zd insn from %s to prog %s\n",
- text->insns_cnt, text->section_name,
- prog->section_name);
+ *prog_rec_sz = ext_info->rec_size;
+ return 0;
}
- insn = &prog->insns[relo->insn_idx];
- insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;
- return 0;
+ return -ENOENT;
}
static int
-bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
+reloc_prog_func_and_line_info(const struct bpf_object *obj,
+ struct bpf_program *main_prog,
+ const struct bpf_program *prog)
{
- int i, err;
+ int err;
- if (!prog)
+ /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
+ * supprot func/line info
+ */
+ if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
return 0;
- if (obj->btf_ext) {
- err = bpf_program_reloc_btf_ext(prog, obj,
- prog->section_name, 0);
- if (err)
+ /* only attempt func info relocation if main program's func_info
+ * relocation was successful
+ */
+ if (main_prog != prog && !main_prog->func_info)
+ goto line_info;
+
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
+ &main_prog->func_info,
+ &main_prog->func_info_cnt,
+ &main_prog->func_info_rec_size);
+ if (err) {
+ if (err != -ENOENT) {
+ pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
+ prog->name, err);
return err;
+ }
+ if (main_prog->func_info) {
+ /*
+ * Some info has already been found but has problem
+ * in the last btf_ext reloc. Must have to error out.
+ */
+ pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
+ return err;
+ }
+ /* Have problem loading the very first info. Ignore the rest. */
+ pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
+ prog->name);
}
- if (!prog->reloc_desc)
+line_info:
+ /* don't relocate line info if main program's relocation failed */
+ if (main_prog != prog && !main_prog->line_info)
return 0;
- for (i = 0; i < prog->nr_reloc; i++) {
- struct reloc_desc *relo = &prog->reloc_desc[i];
- struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
+ &main_prog->line_info,
+ &main_prog->line_info_cnt,
+ &main_prog->line_info_rec_size);
+ if (err) {
+ if (err != -ENOENT) {
+ pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
+ prog->name, err);
+ return err;
+ }
+ if (main_prog->line_info) {
+ /*
+ * Some info has already been found but has problem
+ * in the last btf_ext reloc. Must have to error out.
+ */
+ pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
+ return err;
+ }
+ /* Have problem loading the very first info. Ignore the rest. */
+ pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
+ prog->name);
+ }
+ return 0;
+}
+
+static int cmp_relo_by_insn_idx(const void *key, const void *elem)
+{
+ size_t insn_idx = *(const size_t *)key;
+ const struct reloc_desc *relo = elem;
+
+ if (insn_idx == relo->insn_idx)
+ return 0;
+ return insn_idx < relo->insn_idx ? -1 : 1;
+}
+
+static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
+{
+ return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
+ sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
+}
+
+static int
+bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
+ struct bpf_program *prog)
+{
+ size_t sub_insn_idx, insn_idx, new_cnt;
+ struct bpf_program *subprog;
+ struct bpf_insn *insns, *insn;
+ struct reloc_desc *relo;
+ int err;
+
+ err = reloc_prog_func_and_line_info(obj, main_prog, prog);
+ if (err)
+ return err;
+
+ for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
+ if (!insn_is_subprog_call(insn))
+ continue;
- if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
- pr_warn("relocation out of range: '%s'\n",
- prog->section_name);
+ relo = find_prog_insn_relo(prog, insn_idx);
+ if (relo && relo->type != RELO_CALL) {
+ pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
+ prog->name, insn_idx, relo->type);
return -LIBBPF_ERRNO__RELOC;
}
+ if (relo) {
+ /* sub-program instruction index is a combination of
+ * an offset of a symbol pointed to by relocation and
+ * call instruction's imm field; for global functions,
+ * call always has imm = -1, but for static functions
+ * relocation is against STT_SECTION and insn->imm
+ * points to a start of a static function
+ */
+ sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ } else {
+ /* if subprogram call is to a static function within
+ * the same ELF section, there won't be any relocation
+ * emitted, but it also means there is no additional
+ * offset necessary, insns->imm is relative to
+ * instruction's original position within the section
+ */
+ sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
+ }
- switch (relo->type) {
- case RELO_LD64:
- insn[0].src_reg = BPF_PSEUDO_MAP_FD;
- insn[0].imm = obj->maps[relo->map_idx].fd;
- break;
- case RELO_DATA:
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[1].imm = insn[0].imm + relo->sym_off;
- insn[0].imm = obj->maps[relo->map_idx].fd;
- break;
- case RELO_EXTERN:
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
- insn[1].imm = relo->sym_off;
- break;
- case RELO_CALL:
- err = bpf_program__reloc_text(prog, obj, relo);
+ /* we enforce that sub-programs should be in .text section */
+ subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
+ if (!subprog) {
+ pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
+ prog->name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ /* if it's the first call instruction calling into this
+ * subprogram (meaning this subprog hasn't been processed
+ * yet) within the context of current main program:
+ * - append it at the end of main program's instructions blog;
+ * - process is recursively, while current program is put on hold;
+ * - if that subprogram calls some other not yet processes
+ * subprogram, same thing will happen recursively until
+ * there are no more unprocesses subprograms left to append
+ * and relocate.
+ */
+ if (subprog->sub_insn_off == 0) {
+ subprog->sub_insn_off = main_prog->insns_cnt;
+
+ new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+ if (!insns) {
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+ return -ENOMEM;
+ }
+ main_prog->insns = insns;
+ main_prog->insns_cnt = new_cnt;
+
+ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+ subprog->insns_cnt * sizeof(*insns));
+
+ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+ main_prog->name, subprog->insns_cnt, subprog->name);
+
+ err = bpf_object__reloc_code(obj, main_prog, subprog);
if (err)
return err;
- break;
- default:
- pr_warn("relo #%d: bad relo type %d\n", i, relo->type);
- return -EINVAL;
}
+
+ /* main_prog->insns memory could have been re-allocated, so
+ * calculate pointer again
+ */
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
+ /* calculate correct instruction position within current main
+ * prog; each main prog can have a different set of
+ * subprograms appended (potentially in different order as
+ * well), so position of any subprog can be different for
+ * different main programs */
+ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
+
+ if (relo)
+ relo->processed = true;
+
+ pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
+ prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
}
- zfree(&prog->reloc_desc);
- prog->nr_reloc = 0;
+ return 0;
+}
+
+/*
+ * Relocate sub-program calls.
+ *
+ * Algorithm operates as follows. Each entry-point BPF program (referred to as
+ * main prog) is processed separately. For each subprog (non-entry functions,
+ * that can be called from either entry progs or other subprogs) gets their
+ * sub_insn_off reset to zero. This serves as indicator that this subprogram
+ * hasn't been yet appended and relocated within current main prog. Once its
+ * relocated, sub_insn_off will point at the position within current main prog
+ * where given subprog was appended. This will further be used to relocate all
+ * the call instructions jumping into this subprog.
+ *
+ * We start with main program and process all call instructions. If the call
+ * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
+ * is zero), subprog instructions are appended at the end of main program's
+ * instruction array. Then main program is "put on hold" while we recursively
+ * process newly appended subprogram. If that subprogram calls into another
+ * subprogram that hasn't been appended, new subprogram is appended again to
+ * the *main* prog's instructions (subprog's instructions are always left
+ * untouched, as they need to be in unmodified state for subsequent main progs
+ * and subprog instructions are always sent only as part of a main prog) and
+ * the process continues recursively. Once all the subprogs called from a main
+ * prog or any of its subprogs are appended (and relocated), all their
+ * positions within finalized instructions array are known, so it's easy to
+ * rewrite call instructions with correct relative offsets, corresponding to
+ * desired target subprog.
+ *
+ * Its important to realize that some subprogs might not be called from some
+ * main prog and any of its called/used subprogs. Those will keep their
+ * subprog->sub_insn_off as zero at all times and won't be appended to current
+ * main prog and won't be relocated within the context of current main prog.
+ * They might still be used from other main progs later.
+ *
+ * Visually this process can be shown as below. Suppose we have two main
+ * programs mainA and mainB and BPF object contains three subprogs: subA,
+ * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
+ * subC both call subB:
+ *
+ * +--------+ +-------+
+ * | v v |
+ * +--+---+ +--+-+-+ +---+--+
+ * | subA | | subB | | subC |
+ * +--+---+ +------+ +---+--+
+ * ^ ^
+ * | |
+ * +---+-------+ +------+----+
+ * | mainA | | mainB |
+ * +-----------+ +-----------+
+ *
+ * We'll start relocating mainA, will find subA, append it and start
+ * processing sub A recursively:
+ *
+ * +-----------+------+
+ * | mainA | subA |
+ * +-----------+------+
+ *
+ * At this point we notice that subB is used from subA, so we append it and
+ * relocate (there are no further subcalls from subB):
+ *
+ * +-----------+------+------+
+ * | mainA | subA | subB |
+ * +-----------+------+------+
+ *
+ * At this point, we relocate subA calls, then go one level up and finish with
+ * relocatin mainA calls. mainA is done.
+ *
+ * For mainB process is similar but results in different order. We start with
+ * mainB and skip subA and subB, as mainB never calls them (at least
+ * directly), but we see subC is needed, so we append and start processing it:
+ *
+ * +-----------+------+
+ * | mainB | subC |
+ * +-----------+------+
+ * Now we see subC needs subB, so we go back to it, append and relocate it:
+ *
+ * +-----------+------+------+
+ * | mainB | subC | subB |
+ * +-----------+------+------+
+ *
+ * At this point we unwind recursion, relocate calls in subC, then in mainB.
+ */
+static int
+bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
+{
+ struct bpf_program *subprog;
+ int i, j, err;
+
+ /* mark all subprogs as not relocated (yet) within the context of
+ * current main program
+ */
+ for (i = 0; i < obj->nr_programs; i++) {
+ subprog = &obj->programs[i];
+ if (!prog_is_subprog(obj, subprog))
+ continue;
+
+ subprog->sub_insn_off = 0;
+ for (j = 0; j < subprog->nr_reloc; j++)
+ if (subprog->reloc_desc[j].type == RELO_CALL)
+ subprog->reloc_desc[j].processed = false;
+ }
+
+ err = bpf_object__reloc_code(obj, prog, prog);
+ if (err)
+ return err;
+
+
return 0;
}
@@ -4819,80 +6366,255 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
return err;
}
}
- /* ensure .text is relocated first, as it's going to be copied as-is
- * later for sub-program calls
+ /* relocate data references first for all programs and sub-programs,
+ * as they don't change relative to code locations, so subsequent
+ * subprogram processing won't need to re-calculate any of them
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- if (prog->idx != obj->efile.text_shndx)
- continue;
-
- err = bpf_program__relocate(prog, obj);
+ err = bpf_object__relocate_data(obj, prog);
if (err) {
- pr_warn("failed to relocate '%s'\n", prog->section_name);
+ pr_warn("prog '%s': failed to relocate data references: %d\n",
+ prog->name, err);
return err;
}
- break;
}
- /* now relocate everything but .text, which by now is relocated
- * properly, so we can copy raw sub-program instructions as is safely
+ /* now relocate subprogram calls and append used subprograms to main
+ * programs; each copy of subprogram code needs to be relocated
+ * differently for each main program, because its code location might
+ * have changed
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- if (prog->idx == obj->efile.text_shndx)
+ /* sub-program's sub-calls are relocated within the context of
+ * its main program only
+ */
+ if (prog_is_subprog(obj, prog))
continue;
- err = bpf_program__relocate(prog, obj);
+ err = bpf_object__relocate_calls(obj, prog);
if (err) {
- pr_warn("failed to relocate '%s'\n", prog->section_name);
+ pr_warn("prog '%s': failed to relocate calls: %d\n",
+ prog->name, err);
return err;
}
}
+ /* free up relocation descriptors */
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ }
return 0;
}
-static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
- GElf_Shdr *shdr,
- Elf_Data *data);
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
+ GElf_Shdr *shdr, Elf_Data *data);
-static int bpf_object__collect_reloc(struct bpf_object *obj)
+static int bpf_object__collect_map_relos(struct bpf_object *obj,
+ GElf_Shdr *shdr, Elf_Data *data)
{
- int i, err;
+ const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
+ int i, j, nrels, new_sz;
+ const struct btf_var_secinfo *vi = NULL;
+ const struct btf_type *sec, *var, *def;
+ struct bpf_map *map = NULL, *targ_map;
+ const struct btf_member *member;
+ const char *name, *mname;
+ Elf_Data *symbols;
+ unsigned int moff;
+ GElf_Sym sym;
+ GElf_Rel rel;
+ void *tmp;
+
+ if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
+ return -EINVAL;
+ sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
+ if (!sec)
+ return -EINVAL;
+
+ symbols = obj->efile.symbols;
+ nrels = shdr->sh_size / shdr->sh_entsize;
+ for (i = 0; i < nrels; i++) {
+ if (!gelf_getrel(data, i, &rel)) {
+ pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+ pr_warn(".maps relo #%d: symbol %zx not found\n",
+ i, (size_t)GELF_R_SYM(rel.r_info));
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
+ if (sym.st_shndx != obj->efile.btf_maps_shndx) {
+ pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
+ i, name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
+ i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
+ (size_t)rel.r_offset, sym.st_name, name);
+
+ for (j = 0; j < obj->nr_maps; j++) {
+ map = &obj->maps[j];
+ if (map->sec_idx != obj->efile.btf_maps_shndx)
+ continue;
- if (!obj_elf_valid(obj)) {
- pr_warn("Internal error: elf object is closed\n");
- return -LIBBPF_ERRNO__INTERNAL;
+ vi = btf_var_secinfos(sec) + map->btf_var_idx;
+ if (vi->offset <= rel.r_offset &&
+ rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
+ break;
+ }
+ if (j == obj->nr_maps) {
+ pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
+ i, name, (size_t)rel.r_offset);
+ return -EINVAL;
+ }
+
+ if (!bpf_map_type__is_map_in_map(map->def.type))
+ return -EINVAL;
+ if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
+ map->def.key_size != sizeof(int)) {
+ pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
+ i, map->name, sizeof(int));
+ return -EINVAL;
+ }
+
+ targ_map = bpf_object__find_map_by_name(obj, name);
+ if (!targ_map)
+ return -ESRCH;
+
+ var = btf__type_by_id(obj->btf, vi->type);
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+ if (btf_vlen(def) == 0)
+ return -EINVAL;
+ member = btf_members(def) + btf_vlen(def) - 1;
+ mname = btf__name_by_offset(obj->btf, member->name_off);
+ if (strcmp(mname, "values"))
+ return -EINVAL;
+
+ moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
+ if (rel.r_offset - vi->offset < moff)
+ return -EINVAL;
+
+ moff = rel.r_offset - vi->offset - moff;
+ /* here we use BPF pointer size, which is always 64 bit, as we
+ * are parsing ELF that was built for BPF target
+ */
+ if (moff % bpf_ptr_sz)
+ return -EINVAL;
+ moff /= bpf_ptr_sz;
+ if (moff >= map->init_slots_sz) {
+ new_sz = moff + 1;
+ tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
+ if (!tmp)
+ return -ENOMEM;
+ map->init_slots = tmp;
+ memset(map->init_slots + map->init_slots_sz, 0,
+ (new_sz - map->init_slots_sz) * host_ptr_sz);
+ map->init_slots_sz = new_sz;
+ }
+ map->init_slots[moff] = targ_map;
+
+ pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
+ i, map->name, moff, name);
}
+ return 0;
+}
+
+static int cmp_relocs(const void *_a, const void *_b)
+{
+ const struct reloc_desc *a = _a;
+ const struct reloc_desc *b = _b;
+
+ if (a->insn_idx != b->insn_idx)
+ return a->insn_idx < b->insn_idx ? -1 : 1;
+
+ /* no two relocations should have the same insn_idx, but ... */
+ if (a->type != b->type)
+ return a->type < b->type ? -1 : 1;
+
+ return 0;
+}
+
+static int bpf_object__collect_relos(struct bpf_object *obj)
+{
+ int i, err;
+
for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
Elf_Data *data = obj->efile.reloc_sects[i].data;
int idx = shdr->sh_info;
- struct bpf_program *prog;
if (shdr->sh_type != SHT_REL) {
pr_warn("internal error at %d\n", __LINE__);
return -LIBBPF_ERRNO__INTERNAL;
}
- if (idx == obj->efile.st_ops_shndx) {
- err = bpf_object__collect_struct_ops_map_reloc(obj,
- shdr,
- data);
- if (err)
- return err;
+ if (idx == obj->efile.st_ops_shndx)
+ err = bpf_object__collect_st_ops_relos(obj, shdr, data);
+ else if (idx == obj->efile.btf_maps_shndx)
+ err = bpf_object__collect_map_relos(obj, shdr, data);
+ else
+ err = bpf_object__collect_prog_relos(obj, shdr, data);
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *p = &obj->programs[i];
+
+ if (!p->nr_reloc)
continue;
- }
- prog = bpf_object__find_prog_by_idx(obj, idx);
- if (!prog) {
- pr_warn("relocation failed: no section(%d)\n", idx);
- return -LIBBPF_ERRNO__RELOC;
- }
+ qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
+ }
+ return 0;
+}
- err = bpf_program__collect_reloc(prog, shdr, data, obj);
- if (err)
- return err;
+static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
+{
+ if (BPF_CLASS(insn->code) == BPF_JMP &&
+ BPF_OP(insn->code) == BPF_CALL &&
+ BPF_SRC(insn->code) == BPF_K &&
+ insn->src_reg == 0 &&
+ insn->dst_reg == 0) {
+ *func_id = insn->imm;
+ return true;
+ }
+ return false;
+}
+
+static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+{
+ struct bpf_insn *insn = prog->insns;
+ enum bpf_func_id func_id;
+ int i;
+
+ for (i = 0; i < prog->insns_cnt; i++, insn++) {
+ if (!insn_is_helper_call(insn, &func_id))
+ continue;
+
+ /* on kernels that don't yet support
+ * bpf_probe_read_{kernel,user}[_str] helpers, fall back
+ * to bpf_probe_read() which works well for old kernels
+ */
+ switch (func_id) {
+ case BPF_FUNC_probe_read_kernel:
+ case BPF_FUNC_probe_read_user:
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ insn->imm = BPF_FUNC_probe_read;
+ break;
+ case BPF_FUNC_probe_read_kernel_str:
+ case BPF_FUNC_probe_read_user_str:
+ if (!kernel_supports(FEAT_PROBE_READ_KERN))
+ insn->imm = BPF_FUNC_probe_read_str;
+ break;
+ default:
+ break;
+ }
}
return 0;
}
@@ -4913,12 +6635,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
load_attr.prog_type = prog->type;
/* old kernels might not support specifying expected_attach_type */
- if (!prog->caps->exp_attach_type && prog->sec_def &&
+ if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
prog->sec_def->is_exp_attach_type_optional)
load_attr.expected_attach_type = 0;
else
load_attr.expected_attach_type = prog->expected_attach_type;
- if (prog->caps->name)
+ if (kernel_supports(FEAT_PROG_NAME))
load_attr.name = prog->name;
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
@@ -4934,18 +6656,17 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.kern_version = kern_version;
load_attr.prog_ifindex = prog->prog_ifindex;
}
- /* if .BTF.ext was loaded, kernel supports associated BTF for prog */
- if (prog->obj->btf_ext)
- btf_fd = bpf_object__btf_fd(prog->obj);
- else
- btf_fd = -1;
- load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
- load_attr.func_info = prog->func_info;
- load_attr.func_info_rec_size = prog->func_info_rec_size;
- load_attr.func_info_cnt = prog->func_info_cnt;
- load_attr.line_info = prog->line_info;
- load_attr.line_info_rec_size = prog->line_info_rec_size;
- load_attr.line_info_cnt = prog->line_info_cnt;
+ /* specify func_info/line_info only if kernel supports them */
+ btf_fd = bpf_object__btf_fd(prog->obj);
+ if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
+ load_attr.prog_btf_fd = btf_fd;
+ load_attr.func_info = prog->func_info;
+ load_attr.func_info_rec_size = prog->func_info_rec_size;
+ load_attr.func_info_cnt = prog->func_info_cnt;
+ load_attr.line_info = prog->line_info;
+ load_attr.line_info_rec_size = prog->line_info_rec_size;
+ load_attr.line_info_cnt = prog->line_info_cnt;
+ }
load_attr.log_level = prog->log_level;
load_attr.prog_flags = prog->prog_flags;
@@ -4963,6 +6684,20 @@ retry_load:
if (ret >= 0) {
if (log_buf && load_attr.log_level)
pr_debug("verifier log:\n%s", log_buf);
+
+ if (prog->obj->rodata_map_idx >= 0 &&
+ kernel_supports(FEAT_PROG_BIND_MAP)) {
+ struct bpf_map *rodata_map =
+ &prog->obj->maps[prog->obj->rodata_map_idx];
+
+ if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("prog '%s': failed to bind .rodata map: %s\n",
+ prog->name, cp);
+ /* Don't fail hard if can't bind rodata. */
+ }
+ }
+
*pfd = ret;
ret = 0;
goto out;
@@ -4975,7 +6710,7 @@ retry_load:
free(log_buf);
goto retry_load;
}
- ret = -errno;
+ ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warn("load bpf program failed: %s\n", cp);
pr_perm_msg(ret);
@@ -5014,6 +6749,11 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
{
int err = 0, fd, i, btf_id;
+ if (prog->obj->loaded) {
+ pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
+ return -EINVAL;
+ }
+
if ((prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_LSM ||
prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -5026,7 +6766,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (prog->instances.nr < 0 || !prog->instances.fds) {
if (prog->preprocessor) {
pr_warn("Internal error: can't load program '%s'\n",
- prog->section_name);
+ prog->name);
return -LIBBPF_ERRNO__INTERNAL;
}
@@ -5041,8 +6781,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (!prog->preprocessor) {
if (prog->instances.nr != 1) {
- pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n",
- prog->section_name, prog->instances.nr);
+ pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
+ prog->name, prog->instances.nr);
}
err = load_program(prog, prog->insns, prog->insns_cnt,
license, kern_ver, &fd);
@@ -5060,13 +6800,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
prog->insns_cnt, &result);
if (err) {
pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
- i, prog->section_name);
+ i, prog->name);
goto out;
}
if (!result.new_insn_ptr || !result.new_insn_cnt) {
pr_debug("Skip loading the %dth instance of program '%s'\n",
- i, prog->section_name);
+ i, prog->name);
prog->instances.fds[i] = -1;
if (result.pfd)
*result.pfd = -1;
@@ -5077,7 +6817,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
result.new_insn_cnt, license, kern_ver, &fd);
if (err) {
pr_warn("Loading the %dth instance of program '%s' failed\n",
- i, prog->section_name);
+ i, prog->name);
goto out;
}
@@ -5087,31 +6827,36 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
}
out:
if (err)
- pr_warn("failed to load program '%s'\n", prog->section_name);
+ pr_warn("failed to load program '%s'\n", prog->name);
zfree(&prog->insns);
prog->insns_cnt = 0;
return err;
}
-static bool bpf_program__is_function_storage(const struct bpf_program *prog,
- const struct bpf_object *obj)
-{
- return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
-}
-
static int
bpf_object__load_progs(struct bpf_object *obj, int log_level)
{
+ struct bpf_program *prog;
size_t i;
int err;
for (i = 0; i < obj->nr_programs; i++) {
- if (bpf_program__is_function_storage(&obj->programs[i], obj))
+ prog = &obj->programs[i];
+ err = bpf_object__sanitize_prog(obj, prog);
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ if (prog_is_subprog(obj, prog))
+ continue;
+ if (!prog->load) {
+ pr_debug("prog '%s': skipped loading\n", prog->name);
continue;
- obj->programs[i].log_level |= log_level;
- err = bpf_program__load(&obj->programs[i],
- obj->license,
- obj->kern_version);
+ }
+ prog->log_level |= log_level;
+ err = bpf_program__load(prog, obj->license, obj->kern_version);
if (err)
return err;
}
@@ -5168,18 +6913,19 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
err = err ? : bpf_object__collect_externs(obj);
err = err ? : bpf_object__finalize_btf(obj);
err = err ? : bpf_object__init_maps(obj, opts);
- err = err ? : bpf_object__init_prog_names(obj);
- err = err ? : bpf_object__collect_reloc(obj);
+ err = err ? : bpf_object__collect_relos(obj);
if (err)
goto out;
bpf_object__elf_finish(obj);
bpf_object__for_each_program(prog, obj) {
- prog->sec_def = find_sec_def(prog->section_name);
+ prog->sec_def = find_sec_def(prog->sec_name);
if (!prog->sec_def)
/* couldn't guess, but user might manually specify */
continue;
+ if (prog->sec_def->is_sleepable)
+ prog->prog_flags |= BPF_F_SLEEPABLE;
bpf_program__set_type(prog, prog->sec_def->prog_type);
bpf_program__set_expected_attach_type(prog,
prog->sec_def->expected_attach_type);
@@ -5289,67 +7035,195 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
bpf_object__for_each_map(m, obj) {
if (!bpf_map__is_internal(m))
continue;
- if (!obj->caps.global_data) {
+ if (!kernel_supports(FEAT_GLOBAL_DATA)) {
pr_warn("kernel doesn't support global data\n");
return -ENOTSUP;
}
- if (!obj->caps.array_mmap)
+ if (!kernel_supports(FEAT_ARRAY_MMAP))
m->def.map_flags ^= BPF_F_MMAPABLE;
}
return 0;
}
+static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
+{
+ char sym_type, sym_name[500];
+ unsigned long long sym_addr;
+ struct extern_desc *ext;
+ int ret, err = 0;
+ FILE *f;
+
+ f = fopen("/proc/kallsyms", "r");
+ if (!f) {
+ err = -errno;
+ pr_warn("failed to open /proc/kallsyms: %d\n", err);
+ return err;
+ }
+
+ while (true) {
+ ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
+ &sym_addr, &sym_type, sym_name);
+ if (ret == EOF && feof(f))
+ break;
+ if (ret != 3) {
+ pr_warn("failed to read kallsyms entry: %d\n", ret);
+ err = -EINVAL;
+ goto out;
+ }
+
+ ext = find_extern_by_name(obj, sym_name);
+ if (!ext || ext->type != EXT_KSYM)
+ continue;
+
+ if (ext->is_set && ext->ksym.addr != sym_addr) {
+ pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
+ sym_name, ext->ksym.addr, sym_addr);
+ err = -EINVAL;
+ goto out;
+ }
+ if (!ext->is_set) {
+ ext->is_set = true;
+ ext->ksym.addr = sym_addr;
+ pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
+ }
+ }
+
+out:
+ fclose(f);
+ return err;
+}
+
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+{
+ struct extern_desc *ext;
+ int i, id;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ const struct btf_type *targ_var, *targ_type;
+ __u32 targ_type_id, local_type_id;
+ const char *targ_var_name;
+ int ret;
+
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KSYM || !ext->ksym.type_id)
+ continue;
+
+ id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
+ BTF_KIND_VAR);
+ if (id <= 0) {
+ pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
+ ext->name);
+ return -ESRCH;
+ }
+
+ /* find local type_id */
+ local_type_id = ext->ksym.type_id;
+
+ /* find target type_id */
+ targ_var = btf__type_by_id(obj->btf_vmlinux, id);
+ targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
+ targ_var->name_off);
+ targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
+ targ_var->type,
+ &targ_type_id);
+
+ ret = bpf_core_types_are_compat(obj->btf, local_type_id,
+ obj->btf_vmlinux, targ_type_id);
+ if (ret <= 0) {
+ const struct btf_type *local_type;
+ const char *targ_name, *local_name;
+
+ local_type = btf__type_by_id(obj->btf, local_type_id);
+ local_name = btf__name_by_offset(obj->btf,
+ local_type->name_off);
+ targ_name = btf__name_by_offset(obj->btf_vmlinux,
+ targ_type->name_off);
+
+ pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
+ ext->name, local_type_id,
+ btf_kind_str(local_type), local_name, targ_type_id,
+ btf_kind_str(targ_type), targ_name);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.vmlinux_btf_id = id;
+ pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
+ ext->name, id, btf_kind_str(targ_var), targ_var_name);
+ }
+ return 0;
+}
+
static int bpf_object__resolve_externs(struct bpf_object *obj,
const char *extra_kconfig)
{
- bool need_config = false;
+ bool need_config = false, need_kallsyms = false;
+ bool need_vmlinux_btf = false;
struct extern_desc *ext;
+ void *kcfg_data = NULL;
int err, i;
- void *data;
if (obj->nr_extern == 0)
return 0;
- data = obj->maps[obj->kconfig_map_idx].mmaped;
+ if (obj->kconfig_map_idx >= 0)
+ kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
- if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
- void *ext_val = data + ext->data_off;
+ if (ext->type == EXT_KCFG &&
+ strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+ void *ext_val = kcfg_data + ext->kcfg.data_off;
__u32 kver = get_kernel_version();
if (!kver) {
pr_warn("failed to get kernel version\n");
return -EINVAL;
}
- err = set_ext_value_num(ext, ext_val, kver);
+ err = set_kcfg_value_num(ext, ext_val, kver);
if (err)
return err;
- pr_debug("extern %s=0x%x\n", ext->name, kver);
- } else if (strncmp(ext->name, "CONFIG_", 7) == 0) {
+ pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
+ } else if (ext->type == EXT_KCFG &&
+ strncmp(ext->name, "CONFIG_", 7) == 0) {
need_config = true;
+ } else if (ext->type == EXT_KSYM) {
+ if (ext->ksym.type_id)
+ need_vmlinux_btf = true;
+ else
+ need_kallsyms = true;
} else {
pr_warn("unrecognized extern '%s'\n", ext->name);
return -EINVAL;
}
}
if (need_config && extra_kconfig) {
- err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data);
+ err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
if (err)
return -EINVAL;
need_config = false;
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
- if (!ext->is_set) {
+ if (ext->type == EXT_KCFG && !ext->is_set) {
need_config = true;
break;
}
}
}
if (need_config) {
- err = bpf_object__read_kconfig_file(obj, data);
+ err = bpf_object__read_kconfig_file(obj, kcfg_data);
+ if (err)
+ return -EINVAL;
+ }
+ if (need_kallsyms) {
+ err = bpf_object__read_kallsyms_file(obj);
+ if (err)
+ return -EINVAL;
+ }
+ if (need_vmlinux_btf) {
+ err = bpf_object__resolve_ksyms_btf_id(obj);
if (err)
return -EINVAL;
}
@@ -5380,17 +7254,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
return -EINVAL;
if (obj->loaded) {
- pr_warn("object should not be loaded twice\n");
+ pr_warn("object '%s': load can't be attempted twice\n", obj->name);
return -EINVAL;
}
- obj->loaded = true;
-
- err = bpf_object__probe_caps(obj);
+ err = bpf_object__probe_loading(obj);
+ err = err ? : bpf_object__load_vmlinux_btf(obj);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__sanitize_maps(obj);
- err = err ? : bpf_object__load_vmlinux_btf(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
err = err ? : bpf_object__create_maps(obj);
err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
@@ -5399,6 +7271,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
btf__free(obj->btf_vmlinux);
obj->btf_vmlinux = NULL;
+ obj->loaded = true; /* doesn't matter if successfully or not */
+
if (err)
goto out;
@@ -5496,14 +7370,15 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->section_name, prog->instances.nr);
+ instance, prog->name, prog->instances.nr);
return -EINVAL;
}
if (bpf_obj_pin(prog->instances.fds[instance], path)) {
- cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("failed to pin program: %s\n", cp);
- return -errno;
+ return err;
}
pr_debug("pinned program '%s'\n", path);
@@ -5526,7 +7401,7 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->section_name, prog->instances.nr);
+ instance, prog->name, prog->instances.nr);
return -EINVAL;
}
@@ -5556,8 +7431,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
}
if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n",
- prog->section_name);
+ pr_warn("no instances of prog %s to pin\n", prog->name);
return -EINVAL;
}
@@ -5619,8 +7493,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
}
if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n",
- prog->section_name);
+ pr_warn("no instances of prog %s to pin\n", prog->name);
return -EINVAL;
}
@@ -5955,11 +7828,45 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
return 0;
}
+static void bpf_map__destroy(struct bpf_map *map)
+{
+ if (map->clear_priv)
+ map->clear_priv(map, map->priv);
+ map->priv = NULL;
+ map->clear_priv = NULL;
+
+ if (map->inner_map) {
+ bpf_map__destroy(map->inner_map);
+ zfree(&map->inner_map);
+ }
+
+ zfree(&map->init_slots);
+ map->init_slots_sz = 0;
+
+ if (map->mmaped) {
+ munmap(map->mmaped, bpf_map_mmap_sz(map));
+ map->mmaped = NULL;
+ }
+
+ if (map->st_ops) {
+ zfree(&map->st_ops->data);
+ zfree(&map->st_ops->progs);
+ zfree(&map->st_ops->kern_func_off);
+ zfree(&map->st_ops);
+ }
+
+ zfree(&map->name);
+ zfree(&map->pin_path);
+
+ if (map->fd >= 0)
+ zclose(map->fd);
+}
+
void bpf_object__close(struct bpf_object *obj)
{
size_t i;
- if (!obj)
+ if (IS_ERR_OR_NULL(obj))
return;
if (obj->clear_priv)
@@ -5970,29 +7877,8 @@ void bpf_object__close(struct bpf_object *obj)
btf__free(obj->btf);
btf_ext__free(obj->btf_ext);
- for (i = 0; i < obj->nr_maps; i++) {
- struct bpf_map *map = &obj->maps[i];
-
- if (map->clear_priv)
- map->clear_priv(map, map->priv);
- map->priv = NULL;
- map->clear_priv = NULL;
-
- if (map->mmaped) {
- munmap(map->mmaped, bpf_map_mmap_sz(map));
- map->mmaped = NULL;
- }
-
- if (map->st_ops) {
- zfree(&map->st_ops->data);
- zfree(&map->st_ops->progs);
- zfree(&map->st_ops->kern_func_off);
- zfree(&map->st_ops);
- }
-
- zfree(&map->name);
- zfree(&map->pin_path);
- }
+ for (i = 0; i < obj->nr_maps; i++)
+ bpf_map__destroy(&obj->maps[i]);
zfree(&obj->kconfig);
zfree(&obj->externs);
@@ -6099,7 +7985,7 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
do {
prog = __bpf_program__iter(prog, obj, true);
- } while (prog && bpf_program__is_function_storage(prog, obj));
+ } while (prog && prog_is_subprog(obj, prog));
return prog;
}
@@ -6111,7 +7997,7 @@ bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
do {
prog = __bpf_program__iter(prog, obj, false);
- } while (prog && bpf_program__is_function_storage(prog, obj));
+ } while (prog && prog_is_subprog(obj, prog));
return prog;
}
@@ -6142,11 +8028,16 @@ const char *bpf_program__name(const struct bpf_program *prog)
return prog->name;
}
+const char *bpf_program__section_name(const struct bpf_program *prog)
+{
+ return prog->sec_name;
+}
+
const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
{
const char *title;
- title = prog->section_name;
+ title = prog->sec_name;
if (needs_copy) {
title = strdup(title);
if (!title) {
@@ -6158,6 +8049,20 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
return title;
}
+bool bpf_program__autoload(const struct bpf_program *prog)
+{
+ return prog->load;
+}
+
+int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
+{
+ if (prog->obj->loaded)
+ return -EINVAL;
+
+ prog->load = autoload;
+ return 0;
+}
+
int bpf_program__fd(const struct bpf_program *prog)
{
return bpf_program__nth_fd(prog, 0);
@@ -6165,7 +8070,7 @@ int bpf_program__fd(const struct bpf_program *prog)
size_t bpf_program__size(const struct bpf_program *prog)
{
- return prog->insns_cnt * sizeof(struct bpf_insn);
+ return prog->insns_cnt * BPF_INSN_SZ;
}
int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
@@ -6205,14 +8110,14 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
if (n >= prog->instances.nr || n < 0) {
pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
- n, prog->section_name, prog->instances.nr);
+ n, prog->name, prog->instances.nr);
return -EINVAL;
}
fd = prog->instances.fds[n];
if (fd < 0) {
pr_warn("%dth instance of program '%s' is invalid\n",
- n, prog->section_name);
+ n, prog->name);
return -ENOENT;
}
@@ -6261,6 +8166,7 @@ BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
+BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
enum bpf_attach_type
bpf_program__get_expected_attach_type(struct bpf_program *prog)
@@ -6323,6 +8229,8 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
struct bpf_program *prog);
static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
struct bpf_program *prog);
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
static const struct bpf_sec_def section_defs[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
@@ -6359,6 +8267,21 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_FEXIT,
.is_attach_btf = true,
.attach_fn = attach_trace),
+ SEC_DEF("fentry.s/", TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fmod_ret.s/", TRACING,
+ .expected_attach_type = BPF_MODIFY_RETURN,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fexit.s/", TRACING,
+ .expected_attach_type = BPF_TRACE_FEXIT,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_trace),
SEC_DEF("freplace/", EXT,
.is_attach_btf = true,
.attach_fn = attach_trace),
@@ -6366,7 +8289,21 @@ static const struct bpf_sec_def section_defs[] = {
.is_attach_btf = true,
.expected_attach_type = BPF_LSM_MAC,
.attach_fn = attach_lsm),
- BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
+ SEC_DEF("lsm.s/", LSM,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .expected_attach_type = BPF_LSM_MAC,
+ .attach_fn = attach_lsm),
+ SEC_DEF("iter/", TRACING,
+ .expected_attach_type = BPF_TRACE_ITER,
+ .is_attach_btf = true,
+ .attach_fn = attach_iter),
+ BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
+ BPF_XDP_DEVMAP),
+ BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
+ BPF_XDP_CPUMAP),
+ BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP,
+ BPF_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
@@ -6377,6 +8314,10 @@ static const struct bpf_sec_def section_defs[] = {
BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB,
BPF_CGROUP_INET_EGRESS),
BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
+ BPF_EAPROG_SEC("cgroup/sock_create", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET_SOCK_CREATE),
+ BPF_EAPROG_SEC("cgroup/sock_release", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET_SOCK_RELEASE),
BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK,
BPF_CGROUP_INET_SOCK_CREATE),
BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK,
@@ -6414,6 +8355,14 @@ static const struct bpf_sec_def section_defs[] = {
BPF_CGROUP_UDP4_RECVMSG),
BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_CGROUP_UDP6_RECVMSG),
+ BPF_EAPROG_SEC("cgroup/getpeername4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET4_GETPEERNAME),
+ BPF_EAPROG_SEC("cgroup/getpeername6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET6_GETPEERNAME),
+ BPF_EAPROG_SEC("cgroup/getsockname4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET4_GETSOCKNAME),
+ BPF_EAPROG_SEC("cgroup/getsockname6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET6_GETSOCKNAME),
BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_CGROUP_SYSCTL),
BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
@@ -6421,6 +8370,8 @@ static const struct bpf_sec_def section_defs[] = {
BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
BPF_CGROUP_SETSOCKOPT),
BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS),
+ BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP,
+ BPF_SK_LOOKUP),
};
#undef BPF_PROG_SEC_IMPL
@@ -6516,9 +8467,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
}
/* Collect the reloc from ELF and populate the st_ops->progs[] */
-static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
- GElf_Shdr *shdr,
- Elf_Data *data)
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
+ GElf_Shdr *shdr, Elf_Data *data)
{
const struct btf_member *member;
struct bpf_struct_ops *st_ops;
@@ -6527,7 +8477,7 @@ static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
const struct btf *btf;
struct bpf_map *map;
Elf_Data *symbols;
- unsigned int moff;
+ unsigned int moff, insn_idx;
const char *name;
__u32 member_idx;
GElf_Sym sym;
@@ -6549,8 +8499,7 @@ static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
return -LIBBPF_ERRNO__FORMAT;
}
- name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
- sym.st_name) ? : "<?>";
+ name = elf_sym_str(obj, sym.st_name) ?: "<?>";
map = find_struct_ops_map_by_offset(obj, rel.r_offset);
if (!map) {
pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n",
@@ -6573,6 +8522,12 @@ static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
map->name, (size_t)rel.r_offset, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
+ if (sym.st_value % BPF_INSN_SZ) {
+ pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
+ map->name, (unsigned long long)sym.st_value);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ insn_idx = sym.st_value / BPF_INSN_SZ;
member = find_member_by_offset(st_ops->type, moff * 8);
if (!member) {
@@ -6589,7 +8544,7 @@ static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
return -EINVAL;
}
- prog = bpf_object__find_prog_by_idx(obj, shdr_idx);
+ prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
if (!prog) {
pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
map->name, shdr_idx, name);
@@ -6599,7 +8554,7 @@ static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
if (prog->type == BPF_PROG_TYPE_UNSPEC) {
const struct bpf_sec_def *sec_def;
- sec_def = find_sec_def(prog->section_name);
+ sec_def = find_sec_def(prog->sec_name);
if (sec_def &&
sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
/* for pr_warn */
@@ -6622,13 +8577,14 @@ static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
invalid_prog:
pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
- map->name, prog->name, prog->section_name, prog->type,
+ map->name, prog->name, prog->sec_name, prog->type,
prog->attach_btf_id, prog->expected_attach_type, name);
return -EINVAL;
}
#define BTF_TRACE_PREFIX "btf_trace_"
#define BTF_LSM_PREFIX "bpf_lsm_"
+#define BTF_ITER_PREFIX "bpf_iter_"
#define BTF_MAX_NAME_SIZE 128
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
@@ -6659,6 +8615,9 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
else if (attach_type == BPF_LSM_MAC)
err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
BTF_KIND_FUNC);
+ else if (attach_type == BPF_TRACE_ITER)
+ err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
+ BTF_KIND_FUNC);
else
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
@@ -6672,6 +8631,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
enum bpf_attach_type attach_type)
{
struct btf *btf;
+ int err;
btf = libbpf_find_kernel_btf();
if (IS_ERR(btf)) {
@@ -6679,7 +8639,9 @@ int libbpf_find_vmlinux_btf_id(const char *name,
return -EINVAL;
}
- return __find_vmlinux_btf_id(btf, name, attach_type);
+ err = __find_vmlinux_btf_id(btf, name, attach_type);
+ btf__free(btf);
+ return err;
}
static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
@@ -6719,7 +8681,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)
{
enum bpf_attach_type attach_type = prog->expected_attach_type;
__u32 attach_prog_fd = prog->attach_prog_fd;
- const char *name = prog->section_name;
+ const char *name = prog->sec_name;
int i, err;
if (!name)
@@ -6785,6 +8747,71 @@ const char *bpf_map__name(const struct bpf_map *map)
return map ? map->name : NULL;
}
+enum bpf_map_type bpf_map__type(const struct bpf_map *map)
+{
+ return map->def.type;
+}
+
+int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.type = type;
+ return 0;
+}
+
+__u32 bpf_map__map_flags(const struct bpf_map *map)
+{
+ return map->def.map_flags;
+}
+
+int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.map_flags = flags;
+ return 0;
+}
+
+__u32 bpf_map__numa_node(const struct bpf_map *map)
+{
+ return map->numa_node;
+}
+
+int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->numa_node = numa_node;
+ return 0;
+}
+
+__u32 bpf_map__key_size(const struct bpf_map *map)
+{
+ return map->def.key_size;
+}
+
+int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.key_size = size;
+ return 0;
+}
+
+__u32 bpf_map__value_size(const struct bpf_map *map)
+{
+ return map->def.value_size;
+}
+
+int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.value_size = size;
+ return 0;
+}
+
__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
{
return map ? map->btf_key_type_id : 0;
@@ -6837,9 +8864,17 @@ bool bpf_map__is_internal(const struct bpf_map *map)
return map->libbpf_type != LIBBPF_MAP_UNSPEC;
}
-void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+__u32 bpf_map__ifindex(const struct bpf_map *map)
{
+ return map->map_ifindex;
+}
+
+int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
map->map_ifindex = ifindex;
+ return 0;
}
int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
@@ -6987,7 +9022,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
prog->prog_ifindex = attr->ifindex;
prog->log_level = attr->log_level;
- prog->prog_flags = attr->prog_flags;
+ prog->prog_flags |= attr->prog_flags;
if (!first_prog)
first_prog = prog;
}
@@ -7006,7 +9041,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
err = bpf_object__load(obj);
if (err) {
bpf_object__close(obj);
- return -EINVAL;
+ return err;
}
*pobj = obj;
@@ -7047,7 +9082,7 @@ int bpf_link__destroy(struct bpf_link *link)
{
int err = 0;
- if (!link)
+ if (IS_ERR_OR_NULL(link))
return 0;
if (!link->disconnected && link->detach)
@@ -7105,6 +9140,11 @@ struct bpf_link *bpf_link__open(const char *path)
return link;
}
+int bpf_link__detach(struct bpf_link *link)
+{
+ return bpf_link_detach(link->fd) ? -errno : 0;
+}
+
int bpf_link__pin(struct bpf_link *link, const char *path)
{
int err;
@@ -7168,14 +9208,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
int prog_fd, err;
if (pfd < 0) {
- pr_warn("program '%s': invalid perf event FD %d\n",
- bpf_program__title(prog, false), pfd);
+ pr_warn("prog '%s': invalid perf event FD %d\n",
+ prog->name, pfd);
return ERR_PTR(-EINVAL);
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
+ prog->name);
return ERR_PTR(-EINVAL);
}
@@ -7188,17 +9228,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
err = -errno;
free(link);
- pr_warn("program '%s': failed to attach to pfd %d: %s\n",
- bpf_program__title(prog, false), pfd,
- libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ if (err == -EPROTO)
+ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+ prog->name, pfd);
return ERR_PTR(err);
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
err = -errno;
free(link);
- pr_warn("program '%s': failed to enable pfd %d: %s\n",
- bpf_program__title(prog, false), pfd,
- libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to enable pfd %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return ERR_PTR(err);
}
return link;
@@ -7320,9 +9361,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
0 /* offset */, -1 /* pid */);
if (pfd < 0) {
- pr_warn("program '%s': failed to create %s '%s' perf event: %s\n",
- bpf_program__title(prog, false),
- retprobe ? "kretprobe" : "kprobe", func_name,
+ pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
@@ -7330,9 +9370,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
if (IS_ERR(link)) {
close(pfd);
err = PTR_ERR(link);
- pr_warn("program '%s': failed to attach to %s '%s': %s\n",
- bpf_program__title(prog, false),
- retprobe ? "kretprobe" : "kprobe", func_name,
+ pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return link;
}
@@ -7345,7 +9384,7 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
const char *func_name;
bool retprobe;
- func_name = bpf_program__title(prog, false) + sec->len;
+ func_name = prog->sec_name + sec->len;
retprobe = strcmp(sec->sec, "kretprobe/") == 0;
return bpf_program__attach_kprobe(prog, retprobe, func_name);
@@ -7363,9 +9402,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
pfd = perf_event_open_probe(true /* uprobe */, retprobe,
binary_path, func_offset, pid);
if (pfd < 0) {
- pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
- bpf_program__title(prog, false),
- retprobe ? "uretprobe" : "uprobe",
+ pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+ prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
@@ -7374,9 +9412,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
if (IS_ERR(link)) {
close(pfd);
err = PTR_ERR(link);
- pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
- bpf_program__title(prog, false),
- retprobe ? "uretprobe" : "uprobe",
+ pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
+ prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return link;
@@ -7444,9 +9481,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
pfd = perf_event_open_tracepoint(tp_category, tp_name);
if (pfd < 0) {
- pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
- bpf_program__title(prog, false),
- tp_category, tp_name,
+ pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
+ prog->name, tp_category, tp_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
@@ -7454,9 +9490,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
if (IS_ERR(link)) {
close(pfd);
err = PTR_ERR(link);
- pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
- bpf_program__title(prog, false),
- tp_category, tp_name,
+ pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
+ prog->name, tp_category, tp_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return link;
}
@@ -7469,7 +9504,7 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
char *sec_name, *tp_cat, *tp_name;
struct bpf_link *link;
- sec_name = strdup(bpf_program__title(prog, false));
+ sec_name = strdup(prog->sec_name);
if (!sec_name)
return ERR_PTR(-ENOMEM);
@@ -7498,8 +9533,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -7512,9 +9546,8 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
if (pfd < 0) {
pfd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n",
- bpf_program__title(prog, false), tp_name,
- libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
+ prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
link->fd = pfd;
@@ -7524,7 +9557,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
- const char *tp_name = bpf_program__title(prog, false) + sec->len;
+ const char *tp_name = prog->sec_name + sec->len;
return bpf_program__attach_raw_tracepoint(prog, tp_name);
}
@@ -7538,8 +9571,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -7552,9 +9584,8 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
if (pfd < 0) {
pfd = -errno;
free(link);
- pr_warn("program '%s': failed to attach: %s\n",
- bpf_program__title(prog, false),
- libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach: %s\n",
+ prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
link->fd = pfd;
@@ -7583,9 +9614,18 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
return bpf_program__attach_lsm(prog);
}
-struct bpf_link *
-bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
{
+ return bpf_program__attach_iter(prog, NULL);
+}
+
+static struct bpf_link *
+bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
+ const char *target_name)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
+ .target_btf_id = btf_id);
enum bpf_attach_type attach_type;
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
@@ -7593,8 +9633,7 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -7604,12 +9643,12 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
link->detach = &bpf_link__detach_fd;
attach_type = bpf_program__get_expected_attach_type(prog);
- link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL);
+ link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
if (link_fd < 0) {
link_fd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to cgroup: %s\n",
- bpf_program__title(prog, false),
+ pr_warn("prog '%s': failed to attach to %s: %s\n",
+ prog->name, target_name,
libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
return ERR_PTR(link_fd);
}
@@ -7617,11 +9656,101 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
return link;
}
+struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+{
+ return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
+}
+
+struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
+{
+ return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
+}
+
+struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
+{
+ /* target_fd/target_ifindex use the same field in LINK_CREATE */
+ return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
+}
+
+struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
+ int target_fd,
+ const char *attach_func_name)
+{
+ int btf_id;
+
+ if (!!target_fd != !!attach_func_name) {
+ pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
+ prog->name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (prog->type != BPF_PROG_TYPE_EXT) {
+ pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
+ prog->name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (target_fd) {
+ btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
+ if (btf_id < 0)
+ return ERR_PTR(btf_id);
+
+ return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
+ } else {
+ /* no target, so use raw_tracepoint_open for compatibility
+ * with old kernels
+ */
+ return bpf_program__attach_trace(prog);
+ }
+}
+
+struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+ const struct bpf_iter_attach_opts *opts)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int prog_fd, link_fd;
+ __u32 target_fd = 0;
+
+ if (!OPTS_VALID(opts, bpf_iter_attach_opts))
+ return ERR_PTR(-EINVAL);
+
+ link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
+ link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->detach = &bpf_link__detach_fd;
+
+ link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
+ &link_create_opts);
+ if (link_fd < 0) {
+ link_fd = -errno;
+ free(link);
+ pr_warn("prog '%s': failed to attach to iterator: %s\n",
+ prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(link_fd);
+ }
+ link->fd = link_fd;
+ return link;
+}
+
struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{
const struct bpf_sec_def *sec_def;
- sec_def = find_sec_def(bpf_program__title(prog, false));
+ sec_def = find_sec_def(prog->sec_name);
if (!sec_def || !sec_def->attach_fn)
return ERR_PTR(-ESRCH);
@@ -7787,12 +9916,15 @@ void perf_buffer__free(struct perf_buffer *pb)
{
int i;
- if (!pb)
+ if (IS_ERR_OR_NULL(pb))
return;
if (pb->cpu_bufs) {
- for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
+ for (i = 0; i < pb->cpu_cnt; i++) {
struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+ if (!cpu_buf)
+ continue;
+
bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
perf_buffer__free_cpu_buf(pb, cpu_buf);
}
@@ -7863,7 +9995,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params p = {};
struct perf_event_attr attr = { 0, };
- attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
@@ -7897,7 +10029,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params *p)
{
const char *online_cpus_file = "/sys/devices/system/cpu/online";
- struct bpf_map_info map = {};
+ struct bpf_map_info map;
char msg[STRERR_BUFSIZE];
struct perf_buffer *pb;
bool *online = NULL;
@@ -7910,19 +10042,28 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
return ERR_PTR(-EINVAL);
}
+ /* best-effort sanity checks */
+ memset(&map, 0, sizeof(map));
map_info_len = sizeof(map);
err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
if (err) {
err = -errno;
- pr_warn("failed to get map info for map FD %d: %s\n",
- map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
- return ERR_PTR(err);
- }
-
- if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
- pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
- map.name);
- return ERR_PTR(-EINVAL);
+ /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
+ * -EBADFD, -EFAULT, or -E2BIG on real error
+ */
+ if (err != -EINVAL) {
+ pr_warn("failed to get map info for map FD %d: %s\n",
+ map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+ return ERR_PTR(err);
+ }
+ pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
+ map_fd);
+ } else {
+ if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+ map.name);
+ return ERR_PTR(-EINVAL);
+ }
}
pb = calloc(1, sizeof(*pb));
@@ -7954,7 +10095,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
err = pb->cpu_cnt;
goto error;
}
- if (map.max_entries < pb->cpu_cnt)
+ if (map.max_entries && map.max_entries < pb->cpu_cnt)
pb->cpu_cnt = map.max_entries;
}
@@ -8035,7 +10176,7 @@ error:
struct perf_sample_raw {
struct perf_event_header header;
uint32_t size;
- char data[0];
+ char data[];
};
struct perf_sample_lost {
@@ -8092,6 +10233,11 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
return 0;
}
+int perf_buffer__epoll_fd(const struct perf_buffer *pb)
+{
+ return pb->epoll_fd;
+}
+
int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
{
int i, cnt, err;
@@ -8109,6 +10255,74 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
return cnt < 0 ? -errno : cnt;
}
+/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
+ * manager.
+ */
+size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
+{
+ return pb->cpu_cnt;
+}
+
+/*
+ * Return perf_event FD of a ring buffer in *buf_idx* slot of
+ * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
+ * select()/poll()/epoll() Linux syscalls.
+ */
+int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
+{
+ struct perf_cpu_buf *cpu_buf;
+
+ if (buf_idx >= pb->cpu_cnt)
+ return -EINVAL;
+
+ cpu_buf = pb->cpu_bufs[buf_idx];
+ if (!cpu_buf)
+ return -ENOENT;
+
+ return cpu_buf->fd;
+}
+
+/*
+ * Consume data from perf ring buffer corresponding to slot *buf_idx* in
+ * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
+ * consume, do nothing and return success.
+ * Returns:
+ * - 0 on success;
+ * - <0 on failure.
+ */
+int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
+{
+ struct perf_cpu_buf *cpu_buf;
+
+ if (buf_idx >= pb->cpu_cnt)
+ return -EINVAL;
+
+ cpu_buf = pb->cpu_bufs[buf_idx];
+ if (!cpu_buf)
+ return -ENOENT;
+
+ return perf_buffer__process_records(pb, cpu_buf);
+}
+
+int perf_buffer__consume(struct perf_buffer *pb)
+{
+ int i, err;
+
+ for (i = 0; i < pb->cpu_cnt; i++) {
+ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+ if (!cpu_buf)
+ continue;
+
+ err = perf_buffer__process_records(pb, cpu_buf);
+ if (err) {
+ pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
+ return err;
+ }
+ }
+ return 0;
+}
+
struct bpf_prog_info_array_desc {
int array_offset; /* e.g. offset of jited_prog_insns */
int count_offset; /* e.g. offset of jited_prog_len */
@@ -8370,9 +10584,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
btf_id = libbpf_find_prog_btf_id(attach_func_name,
attach_prog_fd);
else
- btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
- attach_func_name,
- prog->expected_attach_type);
+ btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
+ prog->expected_attach_type);
if (btf_id < 0)
return btf_id;
@@ -8606,9 +10819,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
struct bpf_program *prog = *s->progs[i].prog;
struct bpf_link **link = s->progs[i].link;
const struct bpf_sec_def *sec_def;
- const char *sec_name = bpf_program__title(prog, false);
- sec_def = find_sec_def(sec_name);
+ if (!prog->load)
+ continue;
+
+ sec_def = find_sec_def(prog->sec_name);
if (!sec_def || !sec_def->attach_fn)
continue;
@@ -8630,8 +10845,7 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
for (i = 0; i < s->prog_cnt; i++) {
struct bpf_link **link = s->progs[i].link;
- if (!IS_ERR_OR_NULL(*link))
- bpf_link__destroy(*link);
+ bpf_link__destroy(*link);
*link = NULL;
}
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f1dacecb1619..6909ee81113a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -198,8 +198,11 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
__u32 ifindex);
LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
-LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
- bool needs_copy);
+LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
+LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead")
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);
+LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
/* returns program size in bytes */
LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
@@ -227,6 +230,7 @@ LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
struct bpf_program *prog);
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
+LIBBPF_API int bpf_link__detach(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
LIBBPF_API struct bpf_link *
@@ -253,11 +257,29 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_lsm(struct bpf_program *prog);
LIBBPF_API struct bpf_link *
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_freplace(struct bpf_program *prog,
+ int target_fd, const char *attach_func_name);
struct bpf_map;
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+struct bpf_iter_attach_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ union bpf_iter_link_info *link_info;
+ __u32 link_info_len;
+};
+#define bpf_iter_attach_opts__last_field link_info_len
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+ const struct bpf_iter_attach_opts *opts);
+
struct bpf_insn;
/*
@@ -337,6 +359,7 @@ LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
@@ -364,6 +387,7 @@ LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
/*
* No need for __attribute__((packed)), all members of 'bpf_map_def'
@@ -407,11 +431,38 @@ bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
LIBBPF_API struct bpf_map *
bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
+/* get/set map FD */
LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+/* get map definition */
LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+/* get map name */
LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
+/* get/set map type */
+LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
+/* get/set map size (max_entries) */
+LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
+LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
+/* get/set map flags */
+LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
+/* get/set map NUMA node */
+LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);
+/* get/set map key size */
+LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size);
+/* get/set map value size */
+LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);
+/* get map key/value BTF type IDs */
LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+/* get/set map if_index */
+LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
@@ -419,11 +470,8 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
-LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
-LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
@@ -469,6 +517,27 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags);
+/* Ring buffer APIs */
+struct ring_buffer;
+
+typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
+
+struct ring_buffer_opts {
+ size_t sz; /* size of this struct, for forward/backward compatiblity */
+};
+
+#define ring_buffer_opts__last_field sz
+
+LIBBPF_API struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+ const struct ring_buffer_opts *opts);
+LIBBPF_API void ring_buffer__free(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+ ring_buffer_sample_fn sample_cb, void *ctx);
+LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
+LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+
+/* Perf buffer APIs */
struct perf_buffer;
typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
@@ -523,7 +592,12 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
const struct perf_buffer_raw_opts *opts);
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
+LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx);
+LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
typedef enum bpf_perf_event_ret
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index bb8831605b25..4ebfadf45b47 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -254,3 +254,86 @@ LIBBPF_0.0.8 {
bpf_program__set_lsm;
bpf_set_link_xdp_fd_opts;
} LIBBPF_0.0.7;
+
+LIBBPF_0.0.9 {
+ global:
+ bpf_enable_stats;
+ bpf_iter_create;
+ bpf_link_get_fd_by_id;
+ bpf_link_get_next_id;
+ bpf_program__attach_iter;
+ bpf_program__attach_netns;
+ perf_buffer__consume;
+ ring_buffer__add;
+ ring_buffer__consume;
+ ring_buffer__free;
+ ring_buffer__new;
+ ring_buffer__poll;
+} LIBBPF_0.0.8;
+
+LIBBPF_0.1.0 {
+ global:
+ bpf_link__detach;
+ bpf_link_detach;
+ bpf_map__ifindex;
+ bpf_map__key_size;
+ bpf_map__map_flags;
+ bpf_map__max_entries;
+ bpf_map__numa_node;
+ bpf_map__set_key_size;
+ bpf_map__set_map_flags;
+ bpf_map__set_max_entries;
+ bpf_map__set_numa_node;
+ bpf_map__set_type;
+ bpf_map__set_value_size;
+ bpf_map__type;
+ bpf_map__value_size;
+ bpf_program__attach_xdp;
+ bpf_program__autoload;
+ bpf_program__is_sk_lookup;
+ bpf_program__set_autoload;
+ bpf_program__set_sk_lookup;
+ btf__parse;
+ btf__parse_raw;
+ btf__pointer_size;
+ btf__set_fd;
+ btf__set_pointer_size;
+} LIBBPF_0.0.9;
+
+LIBBPF_0.2.0 {
+ global:
+ bpf_prog_bind_map;
+ bpf_prog_test_run_opts;
+ bpf_program__attach_freplace;
+ bpf_program__section_name;
+ btf__add_array;
+ btf__add_const;
+ btf__add_enum;
+ btf__add_enum_value;
+ btf__add_datasec;
+ btf__add_datasec_var_info;
+ btf__add_field;
+ btf__add_func;
+ btf__add_func_param;
+ btf__add_func_proto;
+ btf__add_fwd;
+ btf__add_int;
+ btf__add_ptr;
+ btf__add_restrict;
+ btf__add_str;
+ btf__add_struct;
+ btf__add_typedef;
+ btf__add_union;
+ btf__add_var;
+ btf__add_volatile;
+ btf__endianness;
+ btf__find_str;
+ btf__new_empty;
+ btf__set_endianness;
+ btf__str_by_offset;
+ perf_buffer__buffer_cnt;
+ perf_buffer__buffer_fd;
+ perf_buffer__epoll_fd;
+ perf_buffer__consume_buffer;
+ xsk_socket__create_shared;
+} LIBBPF_0.1.0;
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index a23ae1ac27eb..947d8bd8a7bb 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -15,6 +15,8 @@
#define LIBBPF_API __attribute__((visibility("default")))
#endif
+#define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg)))
+
/* Helper macro to declare and initialize libbpf options struct
*
* This dance with uninitialized declaration, followed by memset to zero,
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 8c3afbd97747..d99bc847bf84 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -9,6 +9,15 @@
#ifndef __LIBBPF_LIBBPF_INTERNAL_H
#define __LIBBPF_LIBBPF_INTERNAL_H
+#include <stdlib.h>
+#include <limits.h>
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
#include "libbpf.h"
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
@@ -23,6 +32,12 @@
#define BTF_PARAM_ENC(name, type) (name), (type)
#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+#ifndef likely
+#define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
#ifndef min
# define min(x, y) ((x) < (y) ? (x) : (y))
#endif
@@ -63,6 +78,37 @@ do { \
#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+/*
+ * Re-implement glibc's reallocarray() for libbpf internal-only use.
+ * reallocarray(), unfortunately, is not available in all versions of glibc,
+ * so requires extra feature detection and using reallocarray() stub from
+ * <tools/libc_compat.h> and COMPAT_NEED_REALLOCARRAY. All this complicates
+ * build of libbpf unnecessarily and is just a maintenance burden. Instead,
+ * it's trivial to implement libbpf-specific internal version and use it
+ * throughout libbpf.
+ */
+static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
+{
+ size_t total;
+
+#if __has_builtin(__builtin_mul_overflow)
+ if (unlikely(__builtin_mul_overflow(nmemb, size, &total)))
+ return NULL;
+#else
+ if (size == 0 || nmemb > ULONG_MAX / size)
+ return NULL;
+ total = nmemb * size;
+#endif
+ return realloc(ptr, total);
+}
+
+void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt);
+int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
const char *type_name)
@@ -94,6 +140,11 @@ static inline bool libbpf_validate_opts(const char *opts,
((opts) && opts->sz >= offsetofend(typeof(*(opts)), field))
#define OPTS_GET(opts, field, fallback_value) \
(OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
+#define OPTS_SET(opts, field, value) \
+ do { \
+ if (OPTS_HAS(opts, field)) \
+ (opts)->field = value; \
+ } while (0)
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
@@ -105,18 +156,6 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
__u32 *off);
-struct nlattr;
-typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
-int libbpf_netlink_open(unsigned int *nl_pid);
-int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
-int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
-int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
-int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
- libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
-
struct btf_ext_info {
/*
* info points to the individual info section (e.g. func_info and
@@ -138,6 +177,44 @@ struct btf_ext_info {
i < (sec)->num_info; \
i++, rec = (void *)rec + (seg)->rec_size)
+/*
+ * The .BTF.ext ELF section layout defined as
+ * struct btf_ext_header
+ * func_info subsection
+ *
+ * The func_info subsection layout:
+ * record size for struct bpf_func_info in the func_info subsection
+ * struct btf_sec_func_info for section #1
+ * a list of bpf_func_info records for section #1
+ * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
+ * but may not be identical
+ * struct btf_sec_func_info for section #2
+ * a list of bpf_func_info records for section #2
+ * ......
+ *
+ * Note that the bpf_func_info record size in .BTF.ext may not
+ * be the same as the one defined in include/uapi/linux/bpf.h.
+ * The loader should ensure that record_size meets minimum
+ * requirement and pass the record as is to the kernel. The
+ * kernel will handle the func_info properly based on its contents.
+ */
+struct btf_ext_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 func_info_off;
+ __u32 func_info_len;
+ __u32 line_info_off;
+ __u32 line_info_len;
+
+ /* optional part of .BTF.ext header */
+ __u32 core_relo_off;
+ __u32 core_relo_len;
+};
+
struct btf_ext {
union {
struct btf_ext_header *hdr;
@@ -145,7 +222,7 @@ struct btf_ext {
};
struct btf_ext_info func_info;
struct btf_ext_info line_info;
- struct btf_ext_info field_reloc_info;
+ struct btf_ext_info core_relo_info;
__u32 data_size;
};
@@ -153,7 +230,7 @@ struct btf_ext_info_sec {
__u32 sec_name_off;
__u32 num_info;
/* Followed by num_info * record_size number of bytes */
- __u8 data[0];
+ __u8 data[];
};
/* The minimum bpf_func_info checked by the loader */
@@ -170,32 +247,40 @@ struct bpf_line_info_min {
__u32 line_col;
};
-/* bpf_field_info_kind encodes which aspect of captured field has to be
- * adjusted by relocations. Currently supported values are:
- * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes);
- * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise);
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
*/
-enum bpf_field_info_kind {
+enum bpf_core_relo_kind {
BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
- BPF_FIELD_BYTE_SIZE = 1,
+ BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
- BPF_FIELD_SIGNED = 3,
- BPF_FIELD_LSHIFT_U64 = 4,
- BPF_FIELD_RSHIFT_U64 = 5,
+ BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
+ BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
+ BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
+ BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
+ BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
+ BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
+ BPF_TYPE_SIZE = 9, /* type size in bytes */
+ BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
+ BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
};
-/* The minimum bpf_field_reloc checked by the loader
+/* The minimum bpf_core_relo checked by the loader
*
- * Field relocation captures the following data:
+ * CO-RE relocation captures the following data:
* - insn_off - instruction offset (in bytes) within a BPF program that needs
* its insn->imm field to be relocated with actual field info;
* - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- * field;
+ * type or field;
* - access_str_off - offset into corresponding .BTF string section. String
- * itself encodes an accessed field using a sequence of field and array
- * indicies, separated by colon (:). It's conceptually very close to LLVM's
- * getelementptr ([0]) instruction's arguments for identifying offset to
- * a field.
+ * interpretation depends on specific relocation kind:
+ * - for field-based relocations, string encodes an accessed field using
+ * a sequence of field and array indices, separated by colon (:). It's
+ * conceptually very close to LLVM's getelementptr ([0]) instruction's
+ * arguments for identifying offset to a field.
+ * - for type-based relocations, strings is expected to be just "0";
+ * - for enum value-based relocations, string contains an index of enum
+ * value within its enum type;
*
* Example to provide a better feel.
*
@@ -226,11 +311,11 @@ enum bpf_field_info_kind {
*
* [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
*/
-struct bpf_field_reloc {
+struct bpf_core_relo {
__u32 insn_off;
__u32 type_id;
__u32 access_str_off;
- enum bpf_field_info_kind kind;
+ enum bpf_core_relo_kind kind;
};
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 2c92059c0c90..5482a9b7ae2d 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -17,9 +17,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
static bool grep(const char *buffer, const char *pattern)
{
return !!strstr(buffer, pattern);
@@ -78,6 +75,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
break;
+ case BPF_PROG_TYPE_SK_LOOKUP:
+ xattr.expected_attach_type = BPF_SK_LOOKUP;
+ break;
case BPF_PROG_TYPE_KPROBE:
xattr.kern_version = get_kernel_version();
break;
@@ -170,7 +170,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
return btf_fd;
}
-static int load_sk_storage_btf(void)
+static int load_local_storage_btf(void)
{
const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
/* struct bpf_spin_lock {
@@ -229,15 +229,21 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
key_size = 0;
break;
case BPF_MAP_TYPE_SK_STORAGE:
+ case BPF_MAP_TYPE_INODE_STORAGE:
btf_key_type_id = 1;
btf_value_type_id = 3;
value_size = 8;
max_entries = 0;
map_flags = BPF_F_NO_PREALLOC;
- btf_fd = load_sk_storage_btf();
+ btf_fd = load_local_storage_btf();
if (btf_fd < 0)
return false;
break;
+ case BPF_MAP_TYPE_RINGBUF:
+ key_size = 0;
+ value_size = 0;
+ max_entries = 4096;
+ break;
case BPF_MAP_TYPE_UNSPEC:
case BPF_MAP_TYPE_HASH:
case BPF_MAP_TYPE_ARRAY:
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 312f887570b2..4dd73de00b6f 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -15,13 +15,12 @@
#include "libbpf_internal.h"
#include "nlattr.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#ifndef SOL_NETLINK
#define SOL_NETLINK 270
#endif
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+
typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
void *cookie);
@@ -31,7 +30,7 @@ struct xdp_id_md {
struct xdp_link_info info;
};
-int libbpf_netlink_open(__u32 *nl_pid)
+static int libbpf_netlink_open(__u32 *nl_pid)
{
struct sockaddr_nl sa;
socklen_t addrlen;
@@ -283,6 +282,9 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
return 0;
}
+static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+
int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags)
{
@@ -368,121 +370,3 @@ int libbpf_nl_get_link(int sock, unsigned int nl_pid,
return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
dump_link_nlmsg, cookie);
}
-
-static int __dump_class_nlmsg(struct nlmsghdr *nlh,
- libbpf_dump_nlmsg_t dump_class_nlmsg,
- void *cookie)
-{
- struct nlattr *tb[TCA_MAX + 1], *attr;
- struct tcmsg *t = NLMSG_DATA(nlh);
- int len;
-
- len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
- attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
- if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
- return -LIBBPF_ERRNO__NLPARSE;
-
- return dump_class_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie)
-{
- struct {
- struct nlmsghdr nlh;
- struct tcmsg t;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
- .nlh.nlmsg_type = RTM_GETTCLASS,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .t.tcm_family = AF_UNSPEC,
- .t.tcm_ifindex = ifindex,
- };
- int seq = time(NULL);
-
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
- return -errno;
-
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
- dump_class_nlmsg, cookie);
-}
-
-static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg,
- void *cookie)
-{
- struct nlattr *tb[TCA_MAX + 1], *attr;
- struct tcmsg *t = NLMSG_DATA(nlh);
- int len;
-
- len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
- attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
- if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
- return -LIBBPF_ERRNO__NLPARSE;
-
- return dump_qdisc_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
-{
- struct {
- struct nlmsghdr nlh;
- struct tcmsg t;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
- .nlh.nlmsg_type = RTM_GETQDISC,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .t.tcm_family = AF_UNSPEC,
- .t.tcm_ifindex = ifindex,
- };
- int seq = time(NULL);
-
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
- return -errno;
-
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
- dump_qdisc_nlmsg, cookie);
-}
-
-static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
- libbpf_dump_nlmsg_t dump_filter_nlmsg,
- void *cookie)
-{
- struct nlattr *tb[TCA_MAX + 1], *attr;
- struct tcmsg *t = NLMSG_DATA(nlh);
- int len;
-
- len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
- attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
- if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
- return -LIBBPF_ERRNO__NLPARSE;
-
- return dump_filter_nlmsg(cookie, t, tb);
-}
-
-int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
- libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie)
-{
- struct {
- struct nlmsghdr nlh;
- struct tcmsg t;
- } req = {
- .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
- .nlh.nlmsg_type = RTM_GETTFILTER,
- .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .t.tcm_family = AF_UNSPEC,
- .t.tcm_ifindex = ifindex,
- .t.tcm_parent = handle,
- };
- int seq = time(NULL);
-
- req.nlh.nlmsg_seq = seq;
- if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
- return -errno;
-
- return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
- dump_filter_nlmsg, cookie);
-}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 0ad41dfea8eb..b607fa9852b1 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -7,14 +7,11 @@
*/
#include <errno.h>
-#include "nlattr.h"
-#include "libbpf_internal.h"
-#include <linux/rtnetlink.h>
#include <string.h>
#include <stdio.h>
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+#include <linux/rtnetlink.h>
+#include "nlattr.h"
+#include "libbpf_internal.h"
static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
[LIBBPF_NLA_U8] = sizeof(uint8_t),
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
new file mode 100644
index 000000000000..5c6522c89af1
--- /dev/null
+++ b/tools/lib/bpf/ringbuf.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Ring buffer operations.
+ *
+ * Copyright (C) 2020 Facebook, Inc.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <linux/err.h>
+#include <linux/bpf.h>
+#include <asm/barrier.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "bpf.h"
+
+struct ring {
+ ring_buffer_sample_fn sample_cb;
+ void *ctx;
+ void *data;
+ unsigned long *consumer_pos;
+ unsigned long *producer_pos;
+ unsigned long mask;
+ int map_fd;
+};
+
+struct ring_buffer {
+ struct epoll_event *events;
+ struct ring *rings;
+ size_t page_size;
+ int epoll_fd;
+ int ring_cnt;
+};
+
+static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
+{
+ if (r->consumer_pos) {
+ munmap(r->consumer_pos, rb->page_size);
+ r->consumer_pos = NULL;
+ }
+ if (r->producer_pos) {
+ munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1));
+ r->producer_pos = NULL;
+ }
+}
+
+/* Add extra RINGBUF maps to this ring buffer manager */
+int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+ ring_buffer_sample_fn sample_cb, void *ctx)
+{
+ struct bpf_map_info info;
+ __u32 len = sizeof(info);
+ struct epoll_event *e;
+ struct ring *r;
+ void *tmp;
+ int err;
+
+ memset(&info, 0, sizeof(info));
+
+ err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
+ if (err) {
+ err = -errno;
+ pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+
+ if (info.type != BPF_MAP_TYPE_RINGBUF) {
+ pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
+ map_fd);
+ return -EINVAL;
+ }
+
+ tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
+ if (!tmp)
+ return -ENOMEM;
+ rb->rings = tmp;
+
+ tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
+ if (!tmp)
+ return -ENOMEM;
+ rb->events = tmp;
+
+ r = &rb->rings[rb->ring_cnt];
+ memset(r, 0, sizeof(*r));
+
+ r->map_fd = map_fd;
+ r->sample_cb = sample_cb;
+ r->ctx = ctx;
+ r->mask = info.max_entries - 1;
+
+ /* Map writable consumer page */
+ tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ map_fd, 0);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+ r->consumer_pos = tmp;
+
+ /* Map read-only producer page and data pages. We map twice as big
+ * data size to allow simple reading of samples that wrap around the
+ * end of a ring buffer. See kernel implementation for details.
+ * */
+ tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
+ MAP_SHARED, map_fd, rb->page_size);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ ringbuf_unmap_ring(rb, r);
+ pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+ r->producer_pos = tmp;
+ r->data = tmp + rb->page_size;
+
+ e = &rb->events[rb->ring_cnt];
+ memset(e, 0, sizeof(*e));
+
+ e->events = EPOLLIN;
+ e->data.fd = rb->ring_cnt;
+ if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) {
+ err = -errno;
+ ringbuf_unmap_ring(rb, r);
+ pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+
+ rb->ring_cnt++;
+ return 0;
+}
+
+void ring_buffer__free(struct ring_buffer *rb)
+{
+ int i;
+
+ if (!rb)
+ return;
+
+ for (i = 0; i < rb->ring_cnt; ++i)
+ ringbuf_unmap_ring(rb, &rb->rings[i]);
+ if (rb->epoll_fd >= 0)
+ close(rb->epoll_fd);
+
+ free(rb->events);
+ free(rb->rings);
+ free(rb);
+}
+
+struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+ const struct ring_buffer_opts *opts)
+{
+ struct ring_buffer *rb;
+ int err;
+
+ if (!OPTS_VALID(opts, ring_buffer_opts))
+ return NULL;
+
+ rb = calloc(1, sizeof(*rb));
+ if (!rb)
+ return NULL;
+
+ rb->page_size = getpagesize();
+
+ rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (rb->epoll_fd < 0) {
+ err = -errno;
+ pr_warn("ringbuf: failed to create epoll instance: %d\n", err);
+ goto err_out;
+ }
+
+ err = ring_buffer__add(rb, map_fd, sample_cb, ctx);
+ if (err)
+ goto err_out;
+
+ return rb;
+
+err_out:
+ ring_buffer__free(rb);
+ return NULL;
+}
+
+static inline int roundup_len(__u32 len)
+{
+ /* clear out top 2 bits (discard and busy, if set) */
+ len <<= 2;
+ len >>= 2;
+ /* add length prefix */
+ len += BPF_RINGBUF_HDR_SZ;
+ /* round up to 8 byte alignment */
+ return (len + 7) / 8 * 8;
+}
+
+static int ringbuf_process_ring(struct ring* r)
+{
+ int *len_ptr, len, err, cnt = 0;
+ unsigned long cons_pos, prod_pos;
+ bool got_new_data;
+ void *sample;
+
+ cons_pos = smp_load_acquire(r->consumer_pos);
+ do {
+ got_new_data = false;
+ prod_pos = smp_load_acquire(r->producer_pos);
+ while (cons_pos < prod_pos) {
+ len_ptr = r->data + (cons_pos & r->mask);
+ len = smp_load_acquire(len_ptr);
+
+ /* sample not committed yet, bail out for now */
+ if (len & BPF_RINGBUF_BUSY_BIT)
+ goto done;
+
+ got_new_data = true;
+ cons_pos += roundup_len(len);
+
+ if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) {
+ sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ;
+ err = r->sample_cb(r->ctx, sample, len);
+ if (err) {
+ /* update consumer pos and bail out */
+ smp_store_release(r->consumer_pos,
+ cons_pos);
+ return err;
+ }
+ cnt++;
+ }
+
+ smp_store_release(r->consumer_pos, cons_pos);
+ }
+ } while (got_new_data);
+done:
+ return cnt;
+}
+
+/* Consume available ring buffer(s) data without event polling.
+ * Returns number of records consumed across all registered ring buffers, or
+ * negative number if any of the callbacks return error.
+ */
+int ring_buffer__consume(struct ring_buffer *rb)
+{
+ int i, err, res = 0;
+
+ for (i = 0; i < rb->ring_cnt; i++) {
+ struct ring *ring = &rb->rings[i];
+
+ err = ringbuf_process_ring(ring);
+ if (err < 0)
+ return err;
+ res += err;
+ }
+ return res;
+}
+
+/* Poll for available data and consume records, if any are available.
+ * Returns number of records consumed, or negative number, if any of the
+ * registered callbacks returned error.
+ */
+int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
+{
+ int i, cnt, err, res = 0;
+
+ cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
+ for (i = 0; i < cnt; i++) {
+ __u32 ring_id = rb->events[i].data.fd;
+ struct ring *ring = &rb->rings[ring_id];
+
+ err = ringbuf_process_ring(ring);
+ if (err < 0)
+ return err;
+ res += cnt;
+ }
+ return cnt < 0 ? -errno : res;
+}
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index f7f4efb70a4c..e3c98c007825 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -20,6 +20,8 @@
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_xdp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/sockios.h>
#include <net/if.h>
#include <sys/ioctl.h>
@@ -32,9 +34,6 @@
#include "libbpf_internal.h"
#include "xsk.h"
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
#ifndef SOL_XDP
#define SOL_XDP 283
#endif
@@ -48,26 +47,35 @@
#endif
struct xsk_umem {
- struct xsk_ring_prod *fill;
- struct xsk_ring_cons *comp;
+ struct xsk_ring_prod *fill_save;
+ struct xsk_ring_cons *comp_save;
char *umem_area;
struct xsk_umem_config config;
int fd;
int refcount;
+ struct list_head ctx_list;
+};
+
+struct xsk_ctx {
+ struct xsk_ring_prod *fill;
+ struct xsk_ring_cons *comp;
+ __u32 queue_id;
+ struct xsk_umem *umem;
+ int refcount;
+ int ifindex;
+ struct list_head list;
+ int prog_fd;
+ int xsks_map_fd;
+ char ifname[IFNAMSIZ];
};
struct xsk_socket {
struct xsk_ring_cons *rx;
struct xsk_ring_prod *tx;
__u64 outstanding_tx;
- struct xsk_umem *umem;
+ struct xsk_ctx *ctx;
struct xsk_socket_config config;
int fd;
- int ifindex;
- int prog_fd;
- int xsks_map_fd;
- __u32 queue_id;
- char ifname[IFNAMSIZ];
};
struct xsk_nl_info {
@@ -203,15 +211,73 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
return -EINVAL;
}
+static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xdp_mmap_offsets off;
+ void *map;
+ int err;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+ &umem->config.fill_size,
+ sizeof(umem->config.fill_size));
+ if (err)
+ return -errno;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+ &umem->config.comp_size,
+ sizeof(umem->config.comp_size));
+ if (err)
+ return -errno;
+
+ err = xsk_get_mmap_offsets(fd, &off);
+ if (err)
+ return -errno;
+
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ if (map == MAP_FAILED)
+ return -errno;
+
+ fill->mask = umem->config.fill_size - 1;
+ fill->size = umem->config.fill_size;
+ fill->producer = map + off.fr.producer;
+ fill->consumer = map + off.fr.consumer;
+ fill->flags = map + off.fr.flags;
+ fill->ring = map + off.fr.desc;
+ fill->cached_cons = umem->config.fill_size;
+
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap;
+ }
+
+ comp->mask = umem->config.comp_size - 1;
+ comp->size = umem->config.comp_size;
+ comp->producer = map + off.cr.producer;
+ comp->consumer = map + off.cr.consumer;
+ comp->flags = map + off.cr.flags;
+ comp->ring = map + off.cr.desc;
+
+ return 0;
+
+out_mmap:
+ munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ return err;
+}
+
int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
__u64 size, struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp,
const struct xsk_umem_config *usr_config)
{
- struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
struct xsk_umem *umem;
- void *map;
int err;
if (!umem_area || !umem_ptr || !fill || !comp)
@@ -230,6 +296,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
}
umem->umem_area = umem_area;
+ INIT_LIST_HEAD(&umem->ctx_list);
xsk_set_umem_config(&umem->config, usr_config);
memset(&mr, 0, sizeof(mr));
@@ -244,71 +311,16 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
err = -errno;
goto out_socket;
}
- err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
- &umem->config.fill_size,
- sizeof(umem->config.fill_size));
- if (err) {
- err = -errno;
- goto out_socket;
- }
- err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
- &umem->config.comp_size,
- sizeof(umem->config.comp_size));
- if (err) {
- err = -errno;
- goto out_socket;
- }
- err = xsk_get_mmap_offsets(umem->fd, &off);
- if (err) {
- err = -errno;
- goto out_socket;
- }
-
- map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
- XDP_UMEM_PGOFF_FILL_RING);
- if (map == MAP_FAILED) {
- err = -errno;
+ err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
+ if (err)
goto out_socket;
- }
-
- umem->fill = fill;
- fill->mask = umem->config.fill_size - 1;
- fill->size = umem->config.fill_size;
- fill->producer = map + off.fr.producer;
- fill->consumer = map + off.fr.consumer;
- fill->flags = map + off.fr.flags;
- fill->ring = map + off.fr.desc;
- fill->cached_prod = *fill->producer;
- /* cached_cons is "size" bigger than the real consumer pointer
- * See xsk_prod_nb_free
- */
- fill->cached_cons = *fill->consumer + umem->config.fill_size;
-
- map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
- XDP_UMEM_PGOFF_COMPLETION_RING);
- if (map == MAP_FAILED) {
- err = -errno;
- goto out_mmap;
- }
-
- umem->comp = comp;
- comp->mask = umem->config.comp_size - 1;
- comp->size = umem->config.comp_size;
- comp->producer = map + off.cr.producer;
- comp->consumer = map + off.cr.consumer;
- comp->flags = map + off.cr.flags;
- comp->ring = map + off.cr.desc;
- comp->cached_prod = *comp->producer;
- comp->cached_cons = *comp->consumer;
+ umem->fill_save = fill;
+ umem->comp_save = comp;
*umem_ptr = umem;
return 0;
-out_mmap:
- munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
out_socket:
close(umem->fd);
out_umem_alloc:
@@ -342,6 +354,7 @@ DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
static int xsk_load_xdp_prog(struct xsk_socket *xsk)
{
static const int log_buf_size = 16 * 1024;
+ struct xsk_ctx *ctx = xsk->ctx;
char log_buf[log_buf_size];
int err, prog_fd;
@@ -369,7 +382,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* *(u32 *)(r10 - 4) = r2 */
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
/* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* r3 = XDP_PASS */
BPF_MOV64_IMM(BPF_REG_3, 2),
/* call bpf_redirect_map */
@@ -381,7 +394,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* r2 += -4 */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
/* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* call bpf_map_lookup_elem */
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
/* r1 = r0 */
@@ -393,7 +406,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
/* r2 = *(u32 *)(r10 - 4) */
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
/* r1 = xskmap[] */
- BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
/* r3 = 0 */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* call bpf_redirect_map */
@@ -411,19 +424,21 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
return prog_fd;
}
- err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
+ err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
+ xsk->config.xdp_flags);
if (err) {
close(prog_fd);
return err;
}
- xsk->prog_fd = prog_fd;
+ ctx->prog_fd = prog_fd;
return 0;
}
static int xsk_get_max_queues(struct xsk_socket *xsk)
{
struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+ struct xsk_ctx *ctx = xsk->ctx;
struct ifreq ifr = {};
int fd, err, ret;
@@ -432,7 +447,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
return -errno;
ifr.ifr_data = (void *)&channels;
- memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
+ memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);
ifr.ifr_name[IFNAMSIZ - 1] = '\0';
err = ioctl(fd, SIOCETHTOOL, &ifr);
if (err && errno != EOPNOTSUPP) {
@@ -460,6 +475,7 @@ out:
static int xsk_create_bpf_maps(struct xsk_socket *xsk)
{
+ struct xsk_ctx *ctx = xsk->ctx;
int max_queues;
int fd;
@@ -472,15 +488,17 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
if (fd < 0)
return fd;
- xsk->xsks_map_fd = fd;
+ ctx->xsks_map_fd = fd;
return 0;
}
static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
{
- bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
- close(xsk->xsks_map_fd);
+ struct xsk_ctx *ctx = xsk->ctx;
+
+ bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
+ close(ctx->xsks_map_fd);
}
static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
@@ -488,10 +506,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
__u32 map_len = sizeof(struct bpf_map_info);
struct bpf_prog_info prog_info = {};
+ struct xsk_ctx *ctx = xsk->ctx;
struct bpf_map_info map_info;
int fd, err;
- err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
if (err)
return err;
@@ -505,11 +524,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
prog_info.nr_map_ids = num_maps;
prog_info.map_ids = (__u64)(unsigned long)map_ids;
- err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
if (err)
goto out_map_ids;
- xsk->xsks_map_fd = -1;
+ ctx->xsks_map_fd = -1;
for (i = 0; i < prog_info.nr_map_ids; i++) {
fd = bpf_map_get_fd_by_id(map_ids[i]);
@@ -523,7 +542,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
}
if (!strcmp(map_info.name, "xsks_map")) {
- xsk->xsks_map_fd = fd;
+ ctx->xsks_map_fd = fd;
continue;
}
@@ -531,7 +550,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
}
err = 0;
- if (xsk->xsks_map_fd == -1)
+ if (ctx->xsks_map_fd == -1)
err = -ENOENT;
out_map_ids:
@@ -541,16 +560,19 @@ out_map_ids:
static int xsk_set_bpf_maps(struct xsk_socket *xsk)
{
- return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
+ struct xsk_ctx *ctx = xsk->ctx;
+
+ return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
&xsk->fd, 0);
}
static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
{
+ struct xsk_ctx *ctx = xsk->ctx;
__u32 prog_id = 0;
int err;
- err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
+ err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
xsk->config.xdp_flags);
if (err)
return err;
@@ -566,12 +588,12 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
return err;
}
} else {
- xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (xsk->prog_fd < 0)
+ ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (ctx->prog_fd < 0)
return -errno;
err = xsk_lookup_bpf_maps(xsk);
if (err) {
- close(xsk->prog_fd);
+ close(ctx->prog_fd);
return err;
}
}
@@ -580,23 +602,108 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
err = xsk_set_bpf_maps(xsk);
if (err) {
xsk_delete_bpf_maps(xsk);
- close(xsk->prog_fd);
+ close(ctx->prog_fd);
return err;
}
return 0;
}
-int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
- __u32 queue_id, struct xsk_umem *umem,
- struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
- const struct xsk_socket_config *usr_config)
+static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
+ __u32 queue_id)
+{
+ struct xsk_ctx *ctx;
+
+ if (list_empty(&umem->ctx_list))
+ return NULL;
+
+ list_for_each_entry(ctx, &umem->ctx_list, list) {
+ if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
+ ctx->refcount++;
+ return ctx;
+ }
+ }
+
+ return NULL;
+}
+
+static void xsk_put_ctx(struct xsk_ctx *ctx)
+{
+ struct xsk_umem *umem = ctx->umem;
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (--ctx->refcount == 0) {
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err) {
+ munmap(ctx->fill->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size *
+ sizeof(__u64));
+ munmap(ctx->comp->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size *
+ sizeof(__u64));
+ }
+
+ list_del(&ctx->list);
+ free(ctx);
+ }
+}
+
+static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
+ struct xsk_umem *umem, int ifindex,
+ const char *ifname, __u32 queue_id,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xsk_ctx *ctx;
+ int err;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ if (!umem->fill_save) {
+ err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
+ if (err) {
+ free(ctx);
+ return NULL;
+ }
+ } else if (umem->fill_save != fill || umem->comp_save != comp) {
+ /* Copy over rings to new structs. */
+ memcpy(fill, umem->fill_save, sizeof(*fill));
+ memcpy(comp, umem->comp_save, sizeof(*comp));
+ }
+
+ ctx->ifindex = ifindex;
+ ctx->refcount = 1;
+ ctx->umem = umem;
+ ctx->queue_id = queue_id;
+ memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+ ctx->ifname[IFNAMSIZ - 1] = '\0';
+
+ umem->fill_save = NULL;
+ umem->comp_save = NULL;
+ ctx->fill = fill;
+ ctx->comp = comp;
+ list_add(&ctx->list, &umem->ctx_list);
+ return ctx;
+}
+
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *usr_config)
{
void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
struct xsk_socket *xsk;
- int err;
+ struct xsk_ctx *ctx;
+ int err, ifindex;
if (!umem || !xsk_ptr || !(rx || tx))
return -EFAULT;
@@ -609,10 +716,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
if (err)
goto out_xsk_alloc;
- if (umem->refcount &&
- !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
- pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n");
- err = -EBUSY;
+ xsk->outstanding_tx = 0;
+ ifindex = if_nametoindex(ifname);
+ if (!ifindex) {
+ err = -errno;
goto out_xsk_alloc;
}
@@ -626,16 +733,21 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->fd = umem->fd;
}
- xsk->outstanding_tx = 0;
- xsk->queue_id = queue_id;
- xsk->umem = umem;
- xsk->ifindex = if_nametoindex(ifname);
- if (!xsk->ifindex) {
- err = -errno;
- goto out_socket;
+ ctx = xsk_get_ctx(umem, ifindex, queue_id);
+ if (!ctx) {
+ if (!fill || !comp) {
+ err = -EFAULT;
+ goto out_socket;
+ }
+
+ ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
+ fill, comp);
+ if (!ctx) {
+ err = -ENOMEM;
+ goto out_socket;
+ }
}
- memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);
- xsk->ifname[IFNAMSIZ - 1] = '\0';
+ xsk->ctx = ctx;
if (rx) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -643,7 +755,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
sizeof(xsk->config.rx_size));
if (err) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
}
if (tx) {
@@ -652,14 +764,14 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
sizeof(xsk->config.tx_size));
if (err) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
if (err) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
if (rx) {
@@ -669,7 +781,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->fd, XDP_PGOFF_RX_RING);
if (rx_map == MAP_FAILED) {
err = -errno;
- goto out_socket;
+ goto out_put_ctx;
}
rx->mask = xsk->config.rx_size - 1;
@@ -708,10 +820,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
xsk->tx = tx;
sxdp.sxdp_family = PF_XDP;
- sxdp.sxdp_ifindex = xsk->ifindex;
- sxdp.sxdp_queue_id = xsk->queue_id;
+ sxdp.sxdp_ifindex = ctx->ifindex;
+ sxdp.sxdp_queue_id = ctx->queue_id;
if (umem->refcount > 1) {
- sxdp.sxdp_flags = XDP_SHARED_UMEM;
+ sxdp.sxdp_flags |= XDP_SHARED_UMEM;
sxdp.sxdp_shared_umem_fd = umem->fd;
} else {
sxdp.sxdp_flags = xsk->config.bind_flags;
@@ -723,7 +835,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
goto out_mmap_tx;
}
- xsk->prog_fd = -1;
+ ctx->prog_fd = -1;
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk);
@@ -742,6 +854,8 @@ out_mmap_rx:
if (rx)
munmap(rx_map, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc));
+out_put_ctx:
+ xsk_put_ctx(ctx);
out_socket:
if (--umem->refcount)
close(xsk->fd);
@@ -750,25 +864,24 @@ out_xsk_alloc:
return err;
}
-int xsk_umem__delete(struct xsk_umem *umem)
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *usr_config)
{
- struct xdp_mmap_offsets off;
- int err;
+ return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
+ rx, tx, umem->fill_save,
+ umem->comp_save, usr_config);
+}
+int xsk_umem__delete(struct xsk_umem *umem)
+{
if (!umem)
return 0;
if (umem->refcount)
return -EBUSY;
- err = xsk_get_mmap_offsets(umem->fd, &off);
- if (!err) {
- munmap(umem->fill->ring - off.fr.desc,
- off.fr.desc + umem->config.fill_size * sizeof(__u64));
- munmap(umem->comp->ring - off.cr.desc,
- off.cr.desc + umem->config.comp_size * sizeof(__u64));
- }
-
close(umem->fd);
free(umem);
@@ -778,15 +891,16 @@ int xsk_umem__delete(struct xsk_umem *umem)
void xsk_socket__delete(struct xsk_socket *xsk)
{
size_t desc_sz = sizeof(struct xdp_desc);
+ struct xsk_ctx *ctx = xsk->ctx;
struct xdp_mmap_offsets off;
int err;
if (!xsk)
return;
- if (xsk->prog_fd != -1) {
+ if (ctx->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
- close(xsk->prog_fd);
+ close(ctx->prog_fd);
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
@@ -799,14 +913,15 @@ void xsk_socket__delete(struct xsk_socket *xsk)
munmap(xsk->tx->ring - off.tx.desc,
off.tx.desc + xsk->config.tx_size * desc_sz);
}
-
}
- xsk->umem->refcount--;
+ xsk_put_ctx(ctx);
+
+ ctx->umem->refcount--;
/* Do not close an fd that also has an associated umem connected
* to it.
*/
- if (xsk->fd != xsk->umem->fd)
+ if (xsk->fd != ctx->umem->fd)
close(xsk->fd);
free(xsk);
}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 584f6820a639..1069c46364ff 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -234,6 +234,15 @@ LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
struct xsk_ring_cons *rx,
struct xsk_ring_prod *tx,
const struct xsk_socket_config *config);
+LIBBPF_API int
+xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *config);
/* Returns 0 for success and -EBUSY if the umem is still in use. */
LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
diff --git a/tools/lib/perf/Documentation/libperf-counting.txt b/tools/lib/perf/Documentation/libperf-counting.txt
index cae9757f49c1..8b75efcd67ce 100644
--- a/tools/lib/perf/Documentation/libperf-counting.txt
+++ b/tools/lib/perf/Documentation/libperf-counting.txt
@@ -7,13 +7,13 @@ libperf-counting - counting interface
DESCRIPTION
-----------
-The counting interface provides API to meassure and get count for specific perf events.
+The counting interface provides API to measure and get count for specific perf events.
The following test tries to explain count on `counting.c` example.
It is by no means complete guide to counting, but shows libperf basic API for counting.
-The `counting.c` comes with libbperf package and can be compiled and run like:
+The `counting.c` comes with libperf package and can be compiled and run like:
[source,bash]
--
@@ -26,7 +26,8 @@ count 176242, enabled 176242, run 176242
It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event,
which is available only for root.
-The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it:
+The `counting.c` example monitors two events on the current process and displays
+their count, in a nutshell it:
* creates events
* adds them to the event list
@@ -152,7 +153,7 @@ Configure event list with the thread map and open events:
--
Both events are created as disabled (note the `disabled = 1` assignment above),
-so we need to enable the whole list explicitely (both events).
+so we need to enable the whole list explicitly (both events).
From this moment events are counting and we can do our workload.
@@ -167,7 +168,8 @@ When we are done we disable the events list.
79 perf_evlist__disable(evlist);
--
-Now we need to get the counts from events, following code iterates throught the events list and read counts:
+Now we need to get the counts from events, following code iterates through the
+events list and read counts:
[source,c]
--
@@ -178,7 +180,7 @@ Now we need to get the counts from events, following code iterates throught the
85 }
--
-And finaly cleanup.
+And finally cleanup.
We close the whole events list (both events) and remove it together with the threads map:
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
index d71a7b4fcf5f..d6ca24f6ef78 100644
--- a/tools/lib/perf/Documentation/libperf-sampling.txt
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -8,13 +8,13 @@ libperf-sampling - sampling interface
DESCRIPTION
-----------
-The sampling interface provides API to meassure and get count for specific perf events.
+The sampling interface provides API to measure and get count for specific perf events.
The following test tries to explain count on `sampling.c` example.
It is by no means complete guide to sampling, but shows libperf basic API for sampling.
-The `sampling.c` comes with libbperf package and can be compiled and run like:
+The `sampling.c` comes with libperf package and can be compiled and run like:
[source,bash]
--
@@ -33,7 +33,8 @@ cpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 176
It requires root access, because it uses hardware cycles event.
-The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it:
+The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a
+nutshell it:
- creates events
- adds them to the event list
@@ -90,7 +91,7 @@ Once the setup is complete we start by defining cycles event using the `struct p
36 };
--
-Next step is to prepare cpus map.
+Next step is to prepare CPUs map.
In this case we will monitor all the available CPUs:
@@ -152,7 +153,7 @@ Once the events list is open, we can create memory maps AKA perf ring buffers:
--
The event is created as disabled (note the `disabled = 1` assignment above),
-so we need to enable the events list explicitely.
+so we need to enable the events list explicitly.
From this moment the cycles event is sampling.
@@ -212,7 +213,7 @@ Each sample needs to get parsed:
106 cpu, pid, tid, ip, period);
--
-And finaly cleanup.
+And finally cleanup.
We close the whole events list (both events) and remove it together with the threads map:
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 5a6bb512789d..0c74c30ed23a 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -29,7 +29,7 @@ SYNOPSIS
void libperf_init(libperf_print_fn_t fn);
--
-*API to handle cpu maps:*
+*API to handle CPU maps:*
[source,c]
--
@@ -217,7 +217,7 @@ Following objects are key to the libperf interface:
[horizontal]
-struct perf_cpu_map:: Provides a cpu list abstraction.
+struct perf_cpu_map:: Provides a CPU list abstraction.
struct perf_thread_map:: Provides a thread list abstraction.
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index f93f4e703e4c..ca0215047c32 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -247,7 +247,7 @@ out:
int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
- if (idx < cpus->nr)
+ if (cpus && idx < cpus->nr)
return cpus->map[idx];
return -1;
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 5b9f2ca50591..cfcdbd7be066 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -11,10 +11,8 @@
#include <internal/mmap.h>
#include <internal/cpumap.h>
#include <internal/threadmap.h>
-#include <internal/xyarray.h>
#include <internal/lib.h>
#include <linux/zalloc.h>
-#include <sys/ioctl.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
@@ -47,6 +45,9 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
if (!evsel->own_cpus || evlist->has_user_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->cpus);
+ } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) {
+ perf_cpu_map__put(evsel->cpus);
+ evsel->cpus = perf_cpu_map__get(evlist->cpus);
} else if (evsel->cpus != evsel->own_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
@@ -125,8 +126,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
perf_cpu_map__put(evlist->cpus);
+ perf_cpu_map__put(evlist->all_cpus);
perf_thread_map__put(evlist->threads);
evlist->cpus = NULL;
+ evlist->all_cpus = NULL;
evlist->threads = NULL;
fdarray__exit(&evlist->pollfd);
}
@@ -305,9 +308,9 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
}
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
- void *ptr, short revent)
+ void *ptr, short revent, enum fdarray_flags flags)
{
- int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
+ int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP, flags);
if (pos >= 0) {
evlist->pollfd.priv[pos].ptr = ptr;
@@ -488,7 +491,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
revent = !overwrite ? POLLIN : 0;
if (!evsel->system_wide &&
- perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) {
+ perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) {
perf_mmap__put(map);
return -1;
}
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 74dc8c3f0b66..2d0fa02b036f 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -45,7 +45,7 @@ struct perf_evlist_mmap_ops {
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
- void *ptr, short revent);
+ void *ptr, short revent, enum fdarray_flags flags);
int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_evlist_mmap_ops *ops,
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 69b44d2cc0f5..988c539bedb6 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -111,6 +111,14 @@ struct perf_record_cgroup {
char path[PATH_MAX];
};
+struct perf_record_text_poke_event {
+ struct perf_event_header header;
+ __u64 addr;
+ __u16 old_len;
+ __u16 new_len;
+ __u8 bytes[];
+};
+
struct perf_record_sample {
struct perf_event_header header;
__u64 array[];
@@ -193,10 +201,20 @@ struct perf_record_header_tracing_data {
__u32 size;
};
+#define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15)
+
struct perf_record_header_build_id {
struct perf_event_header header;
pid_t pid;
- __u8 build_id[24];
+ union {
+ __u8 build_id[24];
+ struct {
+ __u8 data[20];
+ __u8 size;
+ __u8 reserved1__;
+ __u16 reserved2__;
+ };
+ };
char filename[];
};
@@ -316,6 +334,10 @@ struct perf_record_time_conv {
__u64 time_shift;
__u64 time_mult;
__u64 time_zero;
+ __u64 time_cycles;
+ __u64 time_mask;
+ bool cap_user_time_zero;
+ bool cap_user_time_short;
};
struct perf_record_header_feature {
@@ -367,6 +389,7 @@ union perf_event {
struct perf_record_sample sample;
struct perf_record_bpf_event bpf;
struct perf_record_ksymbol ksymbol;
+ struct perf_record_text_poke_event text_poke;
struct perf_record_header_attr attr;
struct perf_record_event_update event_update;
struct perf_record_header_event_type event_type;
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
index 06ac7bd2144b..727396de6be5 100644
--- a/tools/lib/rbtree.c
+++ b/tools/lib/rbtree.c
@@ -13,7 +13,7 @@
#include <linux/export.h>
/*
- * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
+ * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree
*
* 1) A node is either red or black
* 2) The root is black
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 2859f107abc8..bf02d62a3b2b 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -65,12 +65,14 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
ci = cj = ei = 0;
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
- if (cmp < 0)
+ if (cmp < 0) {
cmds->names[cj++] = cmds->names[ci++];
- else if (cmp == 0)
- ci++, ei++;
- else if (cmp > 0)
+ } else if (cmp == 0) {
+ ci++;
ei++;
+ } else if (cmp > 0) {
+ ei++;
+ }
}
while (ci < cmds->cnt)
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index dbb9efbf718a..39ebf6192016 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -237,6 +237,9 @@ static int get_value(struct parse_opt_ctx_t *p,
return err;
case OPTION_CALLBACK:
+ if (opt->set)
+ *(bool *)opt->set = true;
+
if (unset)
return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
if (opt->flags & PARSE_OPT_NOARG)
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index af9def589863..d2414144eb8c 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -151,6 +151,8 @@ struct option {
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
#define OPT_CALLBACK(s, l, v, a, h, f) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f) }
+#define OPT_CALLBACK_SET(s, l, v, os, a, h, f) \
+ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .set = check_vtype(os, bool *)}
#define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 1a7a9f877095..e335ac2b9e19 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include "symbol/kallsyms.h"
+#include "api/io.h"
#include <stdio.h>
-#include <stdlib.h>
+#include <sys/stat.h>
+#include <fcntl.h>
u8 kallsyms2elf_type(char type)
{
@@ -15,74 +17,62 @@ bool kallsyms__is_function(char symbol_type)
return symbol_type == 'T' || symbol_type == 'W';
}
-/*
- * While we find nice hex chars, build a long_val.
- * Return number of chars processed.
- */
-int hex2u64(const char *ptr, u64 *long_val)
+static void read_to_eol(struct io *io)
{
- char *p;
+ int ch;
- *long_val = strtoull(ptr, &p, 16);
-
- return p - ptr;
+ for (;;) {
+ ch = io__get_char(io);
+ if (ch < 0 || ch == '\n')
+ return;
+ }
}
int kallsyms__parse(const char *filename, void *arg,
int (*process_symbol)(void *arg, const char *name,
char type, u64 start))
{
- char *line = NULL;
- size_t n;
- int err = -1;
- FILE *file = fopen(filename, "r");
-
- if (file == NULL)
- goto out_failure;
-
- err = 0;
+ struct io io;
+ char bf[BUFSIZ];
+ int err;
- while (!feof(file)) {
- u64 start;
- int line_len, len;
- char symbol_type;
- char *symbol_name;
+ io.fd = open(filename, O_RDONLY, 0);
- line_len = getline(&line, &n, file);
- if (line_len < 0 || !line)
- break;
+ if (io.fd < 0)
+ return -1;
- line[--line_len] = '\0'; /* \n */
+ io__init(&io, io.fd, bf, sizeof(bf));
- len = hex2u64(line, &start);
+ err = 0;
+ while (!io.eof) {
+ __u64 start;
+ int ch;
+ size_t i;
+ char symbol_type;
+ char symbol_name[KSYM_NAME_LEN + 1];
- /* Skip the line if we failed to parse the address. */
- if (!len)
+ if (io__get_hex(&io, &start) != ' ') {
+ read_to_eol(&io);
continue;
-
- len++;
- if (len + 2 >= line_len)
+ }
+ symbol_type = io__get_char(&io);
+ if (io__get_char(&io) != ' ') {
+ read_to_eol(&io);
continue;
-
- symbol_type = line[len];
- len += 2;
- symbol_name = line + len;
- len = line_len - len;
-
- if (len >= KSYM_NAME_LEN) {
- err = -1;
- break;
}
+ for (i = 0; i < sizeof(symbol_name); i++) {
+ ch = io__get_char(&io);
+ if (ch < 0 || ch == '\n')
+ break;
+ symbol_name[i] = ch;
+ }
+ symbol_name[i] = '\0';
err = process_symbol(arg, symbol_name, symbol_type, start);
if (err)
break;
}
- free(line);
- fclose(file);
+ close(io.fd);
return err;
-
-out_failure:
- return -1;
}
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
index bd988f7b18d4..72ab9870454b 100644
--- a/tools/lib/symbol/kallsyms.h
+++ b/tools/lib/symbol/kallsyms.h
@@ -18,8 +18,6 @@ static inline u8 kallsyms2elf_binding(char type)
return isupper(type) ? STB_GLOBAL : STB_LOCAL;
}
-int hex2u64(const char *ptr, u64 *long_val);
-
u8 kallsyms2elf_type(char type);
bool kallsyms__is_function(char symbol_type);
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
index 596032ade31f..4d6394397d92 100644
--- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
+++ b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
@@ -3,7 +3,7 @@ libtraceevent(3)
NAME
----
-tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins.
+tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins.
SYNOPSIS
--------
@@ -13,6 +13,12 @@ SYNOPSIS
struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
+void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_,
+ void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep,
+ const char pass:[*]path,
+ const char pass:[*]name,
+ void pass:[*]data),
+ void pass:[*]_data_);
--
DESCRIPTION
@@ -22,11 +28,13 @@ directories. The _tep_ argument is trace event parser context.
The plugin directories are :
[verse]
--
+ - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST
- System's plugin directory, defined at the library compile time. It
depends on the library installation prefix and usually is
_(install_preffix)/lib/traceevent/plugins_
- Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
- User's plugin directory, located at _~/.local/lib/traceevent/plugins_
+ - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST
--
Loading of plugins can be controlled by the _tep_flags_, using the
_tep_set_flag()_ API:
@@ -44,6 +52,12 @@ _tep_load_plugins()_. The _tep_ argument is trace event parser context. The
_plugin_list_ is the list of loaded plugins, returned by
the _tep_load_plugins()_ function.
+The _tep_load_plugins_hook_ function walks through all directories with plugins
+and calls user specified _load_plugin()_ hook for each plugin file. Only files
+with given _suffix_ are considered to be plugins. The _data_ is a user specified
+context, passed to _load_plugin()_. Directories and the walk order are the same
+as in _tep_load_plugins()_ API.
+
RETURN VALUE
------------
The _tep_load_plugins()_ function returns a list of successfully loaded plugins,
@@ -63,6 +77,15 @@ if (plugins == NULL) {
}
...
tep_unload_plugins(plugins, tep);
+...
+void print_plugin(struct tep_handle *tep, const char *path,
+ const char *name, void *data)
+{
+ pritnf("Found libtraceevent plugin %s/%s\n", path, name);
+}
+...
+tep_load_plugins_hook(tep, ".so", print_plugin, NULL);
+...
--
FILES
diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
index 4faf52a65791..f8361e45d446 100644
--- a/tools/lib/traceevent/event-parse-api.c
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -92,7 +92,7 @@ bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag)
return false;
}
-unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data)
+__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data)
{
unsigned short swap;
@@ -105,7 +105,7 @@ unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data)
return swap;
}
-unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data)
+__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data)
{
unsigned int swap;
@@ -120,8 +120,8 @@ unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data)
return swap;
}
-unsigned long long
-tep_data2host8(struct tep_handle *tep, unsigned long long data)
+__hidden unsigned long long
+data2host8(struct tep_handle *tep, unsigned long long data)
{
unsigned long long swap;
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
index cee469803a34..fd4bbcfbb849 100644
--- a/tools/lib/traceevent/event-parse-local.h
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -13,6 +13,9 @@ struct func_map;
struct func_list;
struct event_handler;
struct func_resolver;
+struct tep_plugins_dir;
+
+#define __hidden __attribute__((visibility ("hidden")))
struct tep_handle {
int ref_count;
@@ -47,7 +50,6 @@ struct tep_handle {
struct printk_list *printklist;
unsigned int printk_count;
-
struct tep_event **events;
int nr_events;
struct tep_event **sort_events;
@@ -81,13 +83,41 @@ struct tep_handle {
/* cache */
struct tep_event *last_event;
+
+ struct tep_plugins_dir *plugins_dir;
};
-void tep_free_event(struct tep_event *event);
-void tep_free_format_field(struct tep_format_field *field);
+enum tep_print_parse_type {
+ PRINT_FMT_STRING,
+ PRINT_FMT_ARG_DIGIT,
+ PRINT_FMT_ARG_POINTER,
+ PRINT_FMT_ARG_STRING,
+};
+
+struct tep_print_parse {
+ struct tep_print_parse *next;
+
+ char *format;
+ int ls;
+ enum tep_print_parse_type type;
+ struct tep_print_arg *arg;
+ struct tep_print_arg *len_as_arg;
+};
-unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data);
-unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data);
-unsigned long long tep_data2host8(struct tep_handle *tep, unsigned long long data);
+void free_tep_event(struct tep_event *event);
+void free_tep_format_field(struct tep_format_field *field);
+void free_tep_plugin_paths(struct tep_handle *tep);
+
+unsigned short data2host2(struct tep_handle *tep, unsigned short data);
+unsigned int data2host4(struct tep_handle *tep, unsigned int data);
+unsigned long long data2host8(struct tep_handle *tep, unsigned long long data);
+
+/* access to the internal parser */
+int peek_char(void);
+void init_input_buf(const char *buf, unsigned long long size);
+unsigned long long get_input_buf_ptr(void);
+const char *get_input_buf(void);
+enum tep_event_type read_token(char **tok);
+void free_token(char *tok);
#endif /* _PARSE_EVENTS_INT_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index e1bd2a93c6db..fe58843d047c 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -54,19 +54,26 @@ static int show_warning = 1;
warning(fmt, ##__VA_ARGS__); \
} while (0)
-static void init_input_buf(const char *buf, unsigned long long size)
+/**
+ * init_input_buf - init buffer for parsing
+ * @buf: buffer to parse
+ * @size: the size of the buffer
+ *
+ * Initializes the internal buffer that tep_read_token() will parse.
+ */
+__hidden void init_input_buf(const char *buf, unsigned long long size)
{
input_buf = buf;
input_buf_siz = size;
input_buf_ptr = 0;
}
-const char *tep_get_input_buf(void)
+__hidden const char *get_input_buf(void)
{
return input_buf;
}
-unsigned long long tep_get_input_buf_ptr(void)
+__hidden unsigned long long get_input_buf_ptr(void)
{
return input_buf_ptr;
}
@@ -100,26 +107,13 @@ process_defined_func(struct trace_seq *s, void *data, int size,
static void free_func_handle(struct tep_function_handler *func);
-/**
- * tep_buffer_init - init buffer for parsing
- * @buf: buffer to parse
- * @size: the size of the buffer
- *
- * For use with tep_read_token(), this initializes the internal
- * buffer that tep_read_token() will parse.
- */
-void tep_buffer_init(const char *buf, unsigned long long size)
-{
- init_input_buf(buf, size);
-}
-
void breakpoint(void)
{
static int x;
x++;
}
-struct tep_print_arg *alloc_arg(void)
+static struct tep_print_arg *alloc_arg(void)
{
return calloc(1, sizeof(struct tep_print_arg));
}
@@ -962,22 +956,17 @@ static int __read_char(void)
return input_buf[input_buf_ptr++];
}
-static int __peek_char(void)
-{
- if (input_buf_ptr >= input_buf_siz)
- return -1;
-
- return input_buf[input_buf_ptr];
-}
-
/**
- * tep_peek_char - peek at the next character that will be read
+ * peek_char - peek at the next character that will be read
*
* Returns the next character read, or -1 if end of buffer.
*/
-int tep_peek_char(void)
+__hidden int peek_char(void)
{
- return __peek_char();
+ if (input_buf_ptr >= input_buf_siz)
+ return -1;
+
+ return input_buf[input_buf_ptr];
}
static int extend_token(char **tok, char *buf, int size)
@@ -1033,7 +1022,7 @@ static enum tep_event_type __read_token(char **tok)
case TEP_EVENT_OP:
switch (ch) {
case '-':
- next_ch = __peek_char();
+ next_ch = peek_char();
if (next_ch == '>') {
buf[i++] = __read_char();
break;
@@ -1045,7 +1034,7 @@ static enum tep_event_type __read_token(char **tok)
case '>':
case '<':
last_ch = ch;
- ch = __peek_char();
+ ch = peek_char();
if (ch != last_ch)
goto test_equal;
buf[i++] = __read_char();
@@ -1068,7 +1057,7 @@ static enum tep_event_type __read_token(char **tok)
return type;
test_equal:
- ch = __peek_char();
+ ch = peek_char();
if (ch == '=')
buf[i++] = __read_char();
goto out;
@@ -1122,7 +1111,7 @@ static enum tep_event_type __read_token(char **tok)
break;
}
- while (get_type(__peek_char()) == type) {
+ while (get_type(peek_char()) == type) {
if (i == (BUFSIZ - 1)) {
buf[i] = 0;
tok_size += BUFSIZ;
@@ -1191,13 +1180,26 @@ static enum tep_event_type force_token(const char *str, char **tok)
return type;
}
-static void free_token(char *tok)
+/**
+ * free_token - free a token returned by tep_read_token
+ * @token: the token to free
+ */
+__hidden void free_token(char *tok)
{
if (tok)
free(tok);
}
-static enum tep_event_type read_token(char **tok)
+/**
+ * read_token - access to utilities to use the tep parser
+ * @tok: The token to return
+ *
+ * This will parse tokens from the string given by
+ * tep_init_data().
+ *
+ * Returns the token type.
+ */
+__hidden enum tep_event_type read_token(char **tok)
{
enum tep_event_type type;
@@ -1214,29 +1216,6 @@ static enum tep_event_type read_token(char **tok)
return TEP_EVENT_NONE;
}
-/**
- * tep_read_token - access to utilities to use the tep parser
- * @tok: The token to return
- *
- * This will parse tokens from the string given by
- * tep_init_data().
- *
- * Returns the token type.
- */
-enum tep_event_type tep_read_token(char **tok)
-{
- return read_token(tok);
-}
-
-/**
- * tep_free_token - free a token returned by tep_read_token
- * @token: the token to free
- */
-void tep_free_token(char *token)
-{
- free_token(token);
-}
-
/* no newline */
static enum tep_event_type read_token_item(char **tok)
{
@@ -1425,13 +1404,28 @@ static unsigned int type_size(const char *name)
return 0;
}
+static int append(char **buf, const char *delim, const char *str)
+{
+ char *new_buf;
+
+ new_buf = realloc(*buf, strlen(*buf) + strlen(delim) + strlen(str) + 1);
+ if (!new_buf)
+ return -1;
+ strcat(new_buf, delim);
+ strcat(new_buf, str);
+ *buf = new_buf;
+ return 0;
+}
+
static int event_read_fields(struct tep_event *event, struct tep_format_field **fields)
{
struct tep_format_field *field = NULL;
enum tep_event_type type;
char *token;
char *last_token;
+ char *delim = " ";
int count = 0;
+ int ret;
do {
unsigned int size_dynamic = 0;
@@ -1490,24 +1484,51 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
field->flags |= TEP_FIELD_IS_POINTER;
if (field->type) {
- char *new_type;
- new_type = realloc(field->type,
- strlen(field->type) +
- strlen(last_token) + 2);
- if (!new_type) {
- free(last_token);
- goto fail;
- }
- field->type = new_type;
- strcat(field->type, " ");
- strcat(field->type, last_token);
+ ret = append(&field->type, delim, last_token);
free(last_token);
+ if (ret < 0)
+ goto fail;
} else
field->type = last_token;
last_token = token;
+ delim = " ";
continue;
}
+ /* Handle __attribute__((user)) */
+ if ((type == TEP_EVENT_DELIM) &&
+ strcmp("__attribute__", last_token) == 0 &&
+ token[0] == '(') {
+ int depth = 1;
+ int ret;
+
+ ret = append(&field->type, " ", last_token);
+ ret |= append(&field->type, "", "(");
+ if (ret < 0)
+ goto fail;
+
+ delim = " ";
+ while ((type = read_token(&token)) != TEP_EVENT_NONE) {
+ if (type == TEP_EVENT_DELIM) {
+ if (token[0] == '(')
+ depth++;
+ else if (token[0] == ')')
+ depth--;
+ if (!depth)
+ break;
+ ret = append(&field->type, "", token);
+ delim = "";
+ } else {
+ ret = append(&field->type, delim, token);
+ delim = " ";
+ }
+ if (ret < 0)
+ goto fail;
+ free(last_token);
+ last_token = token;
+ }
+ continue;
+ }
break;
}
@@ -1523,8 +1544,6 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
if (strcmp(token, "[") == 0) {
enum tep_event_type last_type = type;
char *brackets = token;
- char *new_brackets;
- int len;
field->flags |= TEP_FIELD_IS_ARRAY;
@@ -1536,29 +1555,27 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
field->arraylen = 0;
while (strcmp(token, "]") != 0) {
+ const char *delim;
+
if (last_type == TEP_EVENT_ITEM &&
type == TEP_EVENT_ITEM)
- len = 2;
+ delim = " ";
else
- len = 1;
+ delim = "";
+
last_type = type;
- new_brackets = realloc(brackets,
- strlen(brackets) +
- strlen(token) + len);
- if (!new_brackets) {
+ ret = append(&brackets, delim, token);
+ if (ret < 0) {
free(brackets);
goto fail;
}
- brackets = new_brackets;
- if (len == 2)
- strcat(brackets, " ");
- strcat(brackets, token);
/* We only care about the last token */
field->arraylen = strtoul(token, NULL, 0);
free_token(token);
type = read_token(&token);
if (type == TEP_EVENT_NONE) {
+ free(brackets);
do_warning_event(event, "failed to find token");
goto fail;
}
@@ -1566,13 +1583,11 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
free_token(token);
- new_brackets = realloc(brackets, strlen(brackets) + 2);
- if (!new_brackets) {
+ ret = append(&brackets, "", "]");
+ if (ret < 0) {
free(brackets);
goto fail;
}
- brackets = new_brackets;
- strcat(brackets, "]");
/* add brackets to type */
@@ -1582,34 +1597,23 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
* the format: type [] item;
*/
if (type == TEP_EVENT_ITEM) {
- char *new_type;
- new_type = realloc(field->type,
- strlen(field->type) +
- strlen(field->name) +
- strlen(brackets) + 2);
- if (!new_type) {
+ ret = append(&field->type, " ", field->name);
+ if (ret < 0) {
free(brackets);
goto fail;
}
- field->type = new_type;
- strcat(field->type, " ");
- strcat(field->type, field->name);
+ ret = append(&field->type, "", brackets);
+
size_dynamic = type_size(field->name);
free_token(field->name);
- strcat(field->type, brackets);
field->name = field->alias = token;
type = read_token(&token);
} else {
- char *new_type;
- new_type = realloc(field->type,
- strlen(field->type) +
- strlen(brackets) + 1);
- if (!new_type) {
+ ret = append(&field->type, "", brackets);
+ if (ret < 0) {
free(brackets);
goto fail;
}
- field->type = new_type;
- strcat(field->type, brackets);
}
free(brackets);
}
@@ -2046,19 +2050,16 @@ process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok)
/* could just be a type pointer */
if ((strcmp(arg->op.op, "*") == 0) &&
type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) {
- char *new_atom;
+ int ret;
if (left->type != TEP_PRINT_ATOM) {
do_warning_event(event, "bad pointer type");
goto out_free;
}
- new_atom = realloc(left->atom.atom,
- strlen(left->atom.atom) + 3);
- if (!new_atom)
+ ret = append(&left->atom.atom, " ", "*");
+ if (ret < 0)
goto out_warn_free;
- left->atom.atom = new_atom;
- strcat(left->atom.atom, " *");
free(arg->op.op);
*arg = *left;
free(left);
@@ -2839,6 +2840,7 @@ process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg,
if (read_expected(TEP_EVENT_DELIM, ")") < 0)
goto out_err;
+ free_token(token);
type = read_token(&token);
*tok = token;
@@ -3063,6 +3065,37 @@ err:
}
static enum tep_event_type
+process_builtin_expect(struct tep_event *event, struct tep_print_arg *arg, char **tok)
+{
+ enum tep_event_type type;
+ char *token = NULL;
+
+ /* Handle __builtin_expect( cond, #) */
+ type = process_arg(event, arg, &token);
+
+ if (type != TEP_EVENT_DELIM || token[0] != ',')
+ goto out_free;
+
+ free_token(token);
+
+ /* We don't care what the second parameter is of the __builtin_expect() */
+ if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
+ goto out_free;
+
+ if (read_expected(TEP_EVENT_DELIM, ")") < 0)
+ goto out_free;
+
+ free_token(token);
+ type = read_token_item(tok);
+ return type;
+
+out_free:
+ free_token(token);
+ *tok = NULL;
+ return TEP_EVENT_ERROR;
+}
+
+static enum tep_event_type
process_function(struct tep_event *event, struct tep_print_arg *arg,
char *token, char **tok)
{
@@ -3106,6 +3139,10 @@ process_function(struct tep_event *event, struct tep_print_arg *arg,
free_token(token);
return process_dynamic_array_len(event, arg, tok);
}
+ if (strcmp(token, "__builtin_expect") == 0) {
+ free_token(token);
+ return process_builtin_expect(event, arg, tok);
+ }
func = find_func_handler(event->tep, token);
if (func) {
@@ -3151,18 +3188,15 @@ process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
}
/* atoms can be more than one token long */
while (type == TEP_EVENT_ITEM) {
- char *new_atom;
- new_atom = realloc(atom,
- strlen(atom) + strlen(token) + 2);
- if (!new_atom) {
+ int ret;
+
+ ret = append(&atom, " ", token);
+ if (ret < 0) {
free(atom);
*tok = NULL;
free_token(token);
return TEP_EVENT_ERROR;
}
- atom = new_atom;
- strcat(atom, " ");
- strcat(atom, token);
free_token(token);
type = read_token_item(&token);
}
@@ -3404,12 +3438,12 @@ unsigned long long tep_read_number(struct tep_handle *tep,
case 1:
return *(unsigned char *)ptr;
case 2:
- return tep_data2host2(tep, *(unsigned short *)ptr);
+ return data2host2(tep, *(unsigned short *)ptr);
case 4:
- return tep_data2host4(tep, *(unsigned int *)ptr);
+ return data2host4(tep, *(unsigned int *)ptr);
case 8:
memcpy(&val, (ptr), sizeof(unsigned long long));
- return tep_data2host8(tep, val);
+ return data2host8(tep, val);
default:
/* BUG! */
return 0;
@@ -4135,7 +4169,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
f = tep_find_any_field(event, arg->string.string);
arg->string.offset = f->offset;
}
- str_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->string.offset));
+ str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset));
str_offset &= 0xffff;
print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
break;
@@ -4153,7 +4187,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
f = tep_find_any_field(event, arg->bitmask.bitmask);
arg->bitmask.offset = f->offset;
}
- bitmask_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
+ bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
bitmask_size = bitmask_offset >> 16;
bitmask_offset &= 0xffff;
print_bitmask_to_seq(tep, s, format, len_arg,
@@ -4510,43 +4544,93 @@ get_bprint_format(void *data, int size __maybe_unused,
return format;
}
-static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
- struct tep_event *event, struct tep_print_arg *arg)
+static int print_mac_arg(struct trace_seq *s, const char *format,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg)
{
- unsigned char *buf;
const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
+ bool reverse = false;
+ unsigned char *buf;
+ int ret = 0;
if (arg->type == TEP_PRINT_FUNC) {
process_defined_func(s, data, size, event, arg);
- return;
+ return 0;
}
if (arg->type != TEP_PRINT_FIELD) {
trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
arg->type);
- return;
+ return 0;
}
- if (mac == 'm')
+ if (format[0] == 'm') {
fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
+ } else if (format[0] == 'M' && format[1] == 'F') {
+ fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x";
+ ret++;
+ }
+ if (format[1] == 'R') {
+ reverse = true;
+ ret++;
+ }
+
if (!arg->field.field) {
arg->field.field =
tep_find_any_field(event, arg->field.name);
if (!arg->field.field) {
do_warning_event(event, "%s: field %s not found",
__func__, arg->field.name);
- return;
+ return ret;
}
}
if (arg->field.field->size != 6) {
trace_seq_printf(s, "INVALIDMAC");
- return;
+ return ret;
}
+
buf = data + arg->field.field->offset;
- trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+ if (reverse)
+ trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]);
+ else
+ trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+
+ return ret;
}
-static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf)
+static int parse_ip4_print_args(struct tep_handle *tep,
+ const char *ptr, bool *reverse)
+{
+ int ret = 0;
+
+ *reverse = false;
+
+ /* hnbl */
+ switch (*ptr) {
+ case 'h':
+ if (tep->file_bigendian)
+ *reverse = false;
+ else
+ *reverse = true;
+ ret++;
+ break;
+ case 'l':
+ *reverse = true;
+ ret++;
+ break;
+ case 'n':
+ case 'b':
+ ret++;
+ /* fall through */
+ default:
+ *reverse = false;
+ break;
+ }
+
+ return ret;
+}
+
+static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf)
{
const char *fmt;
@@ -4555,7 +4639,11 @@ static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf)
else
fmt = "%d.%d.%d.%d";
- trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+ if (reverse)
+ trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]);
+ else
+ trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+
}
static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
@@ -4638,7 +4726,7 @@ static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr)
if (useIPv4) {
if (needcolon)
trace_seq_printf(s, ":");
- print_ip4_addr(s, 'I', &in6.s6_addr[12]);
+ print_ip4_addr(s, 'I', false, &in6.s6_addr[12]);
}
return;
@@ -4667,16 +4755,20 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
void *data, int size, struct tep_event *event,
struct tep_print_arg *arg)
{
+ bool reverse = false;
unsigned char *buf;
+ int ret;
+
+ ret = parse_ip4_print_args(event->tep, ptr, &reverse);
if (arg->type == TEP_PRINT_FUNC) {
process_defined_func(s, data, size, event, arg);
- return 0;
+ return ret;
}
if (arg->type != TEP_PRINT_FIELD) {
trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
- return 0;
+ return ret;
}
if (!arg->field.field) {
@@ -4685,7 +4777,7 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
if (!arg->field.field) {
do_warning("%s: field %s not found",
__func__, arg->field.name);
- return 0;
+ return ret;
}
}
@@ -4693,11 +4785,12 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
if (arg->field.field->size != 4) {
trace_seq_printf(s, "INVALIDIPv4");
- return 0;
+ return ret;
}
- print_ip4_addr(s, i, buf);
- return 0;
+ print_ip4_addr(s, i, reverse, buf);
+ return ret;
+
}
static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
@@ -4757,7 +4850,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
char have_c = 0, have_p = 0;
unsigned char *buf;
struct sockaddr_storage *sa;
+ bool reverse = false;
int rc = 0;
+ int ret;
/* pISpc */
if (i == 'I') {
@@ -4772,6 +4867,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
rc++;
}
}
+ ret = parse_ip4_print_args(event->tep, ptr, &reverse);
+ ptr += ret;
+ rc += ret;
if (arg->type == TEP_PRINT_FUNC) {
process_defined_func(s, data, size, event, arg);
@@ -4803,7 +4901,7 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
return rc;
}
- print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr);
+ print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr);
if (have_p)
trace_seq_printf(s, ":%d", ntohs(sa4->sin_port));
@@ -4837,25 +4935,20 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr,
struct tep_print_arg *arg)
{
char i = *ptr; /* 'i' or 'I' */
- char ver;
- int rc = 0;
-
- ptr++;
- rc++;
+ int rc = 1;
- ver = *ptr;
+ /* IP version */
ptr++;
- rc++;
- switch (ver) {
+ switch (*ptr) {
case '4':
- rc += print_ipv4_arg(s, ptr, i, data, size, event, arg);
+ rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg);
break;
case '6':
- rc += print_ipv6_arg(s, ptr, i, data, size, event, arg);
+ rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg);
break;
case 'S':
- rc += print_ipsa_arg(s, ptr, i, data, size, event, arg);
+ rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg);
break;
default:
return 0;
@@ -4864,6 +4957,133 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr,
return rc;
}
+static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15};
+static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+static int print_uuid_arg(struct trace_seq *s, const char *ptr,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg)
+{
+ const int *index = uuid_index;
+ char *format = "%02x";
+ int ret = 0;
+ char *buf;
+ int i;
+
+ switch (*(ptr + 1)) {
+ case 'L':
+ format = "%02X";
+ /* fall through */
+ case 'l':
+ index = guid_index;
+ ret++;
+ break;
+ case 'B':
+ format = "%02X";
+ /* fall through */
+ case 'b':
+ ret++;
+ break;
+ }
+
+ if (arg->type == TEP_PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return ret;
+ }
+
+ if (arg->type != TEP_PRINT_FIELD) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+ return ret;
+ }
+
+ if (!arg->field.field) {
+ arg->field.field =
+ tep_find_any_field(event, arg->field.name);
+ if (!arg->field.field) {
+ do_warning("%s: field %s not found",
+ __func__, arg->field.name);
+ return ret;
+ }
+ }
+
+ if (arg->field.field->size != 16) {
+ trace_seq_printf(s, "INVALIDUUID");
+ return ret;
+ }
+
+ buf = data + arg->field.field->offset;
+
+ for (i = 0; i < 16; i++) {
+ trace_seq_printf(s, format, buf[index[i]] & 0xff);
+ switch (i) {
+ case 3:
+ case 5:
+ case 7:
+ case 9:
+ trace_seq_printf(s, "-");
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int print_raw_buff_arg(struct trace_seq *s, const char *ptr,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg, int print_len)
+{
+ int plen = print_len;
+ char *delim = " ";
+ int ret = 0;
+ char *buf;
+ int i;
+ unsigned long offset;
+ int arr_len;
+
+ switch (*(ptr + 1)) {
+ case 'C':
+ delim = ":";
+ ret++;
+ break;
+ case 'D':
+ delim = "-";
+ ret++;
+ break;
+ case 'N':
+ delim = "";
+ ret++;
+ break;
+ }
+
+ if (arg->type == TEP_PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return ret;
+ }
+
+ if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+ return ret;
+ }
+
+ offset = tep_read_number(event->tep,
+ data + arg->dynarray.field->offset,
+ arg->dynarray.field->size);
+ arr_len = (unsigned long long)(offset >> 16);
+ buf = data + (offset & 0xffff);
+
+ if (arr_len < plen)
+ plen = arr_len;
+
+ if (plen < 1)
+ return ret;
+
+ trace_seq_printf(s, "%02x", buf[0] & 0xff);
+ for (i = 1; i < plen; i++)
+ trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff);
+
+ return ret;
+}
+
static int is_printable_array(char *p, unsigned int len)
{
unsigned int i;
@@ -4952,264 +5172,567 @@ void tep_print_fields(struct trace_seq *s, void *data,
}
}
-static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
+static int print_function(struct trace_seq *s, const char *format,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg)
{
- struct tep_handle *tep = event->tep;
- struct tep_print_fmt *print_fmt = &event->print_fmt;
- struct tep_print_arg *arg = print_fmt->args;
- struct tep_print_arg *args = NULL;
- const char *ptr = print_fmt->format;
- unsigned long long val;
struct func_map *func;
- const char *saveptr;
- struct trace_seq p;
- char *bprint_fmt = NULL;
- char format[32];
- int show_func;
- int len_as_arg;
- int len_arg = 0;
- int len;
- int ls;
+ unsigned long long val;
- if (event->flags & TEP_EVENT_FL_FAILED) {
- trace_seq_printf(s, "[FAILED TO PARSE]");
- tep_print_fields(s, data, size, event);
- return;
+ val = eval_num_arg(data, size, event, arg);
+ func = find_func(event->tep, val);
+ if (func) {
+ trace_seq_puts(s, func->func);
+ if (*format == 'F' || *format == 'S')
+ trace_seq_printf(s, "+0x%llx", val - func->addr);
+ } else {
+ if (event->tep->long_size == 4)
+ trace_seq_printf(s, "0x%lx", (long)val);
+ else
+ trace_seq_printf(s, "0x%llx", (long long)val);
}
- if (event->flags & TEP_EVENT_FL_ISBPRINT) {
- bprint_fmt = get_bprint_format(data, size, event);
- args = make_bprint_args(bprint_fmt, data, size, event);
- arg = args;
- ptr = bprint_fmt;
+ return 0;
+}
+
+static int print_arg_pointer(struct trace_seq *s, const char *format, int plen,
+ void *data, int size,
+ struct tep_event *event, struct tep_print_arg *arg)
+{
+ unsigned long long val;
+ int ret = 1;
+
+ if (arg->type == TEP_PRINT_BSTRING) {
+ trace_seq_puts(s, arg->string.string);
+ return 0;
+ }
+ while (*format) {
+ if (*format == 'p') {
+ format++;
+ break;
+ }
+ format++;
}
- for (; *ptr; ptr++) {
- ls = 0;
- if (*ptr == '\\') {
- ptr++;
- switch (*ptr) {
+ switch (*format) {
+ case 'F':
+ case 'f':
+ case 'S':
+ case 's':
+ ret += print_function(s, format, data, size, event, arg);
+ break;
+ case 'M':
+ case 'm':
+ ret += print_mac_arg(s, format, data, size, event, arg);
+ break;
+ case 'I':
+ case 'i':
+ ret += print_ip_arg(s, format, data, size, event, arg);
+ break;
+ case 'U':
+ ret += print_uuid_arg(s, format, data, size, event, arg);
+ break;
+ case 'h':
+ ret += print_raw_buff_arg(s, format, data, size, event, arg, plen);
+ break;
+ default:
+ ret = 0;
+ val = eval_num_arg(data, size, event, arg);
+ trace_seq_printf(s, "%p", (void *)(intptr_t)val);
+ break;
+ }
+
+ return ret;
+
+}
+
+static int print_arg_number(struct trace_seq *s, const char *format, int plen,
+ void *data, int size, int ls,
+ struct tep_event *event, struct tep_print_arg *arg)
+{
+ unsigned long long val;
+
+ val = eval_num_arg(data, size, event, arg);
+
+ switch (ls) {
+ case -2:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (char)val);
+ else
+ trace_seq_printf(s, format, (char)val);
+ break;
+ case -1:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (short)val);
+ else
+ trace_seq_printf(s, format, (short)val);
+ break;
+ case 0:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (int)val);
+ else
+ trace_seq_printf(s, format, (int)val);
+ break;
+ case 1:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (long)val);
+ else
+ trace_seq_printf(s, format, (long)val);
+ break;
+ case 2:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (long long)val);
+ else
+ trace_seq_printf(s, format, (long long)val);
+ break;
+ default:
+ do_warning_event(event, "bad count (%d)", ls);
+ event->flags |= TEP_EVENT_FL_FAILED;
+ }
+ return 0;
+}
+
+
+static void print_arg_string(struct trace_seq *s, const char *format, int plen,
+ void *data, int size,
+ struct tep_event *event, struct tep_print_arg *arg)
+{
+ struct trace_seq p;
+
+ /* Use helper trace_seq */
+ trace_seq_init(&p);
+ print_str_arg(&p, data, size, event,
+ format, plen, arg);
+ trace_seq_terminate(&p);
+ trace_seq_puts(s, p.buffer);
+ trace_seq_destroy(&p);
+}
+
+static int parse_arg_format_pointer(const char *format)
+{
+ int ret = 0;
+ int index;
+ int loop;
+
+ switch (*format) {
+ case 'F':
+ case 'S':
+ case 'f':
+ case 's':
+ ret++;
+ break;
+ case 'M':
+ case 'm':
+ /* [mM]R , [mM]F */
+ switch (format[1]) {
+ case 'R':
+ case 'F':
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ case 'I':
+ case 'i':
+ index = 2;
+ loop = 1;
+ switch (format[1]) {
+ case 'S':
+ /*[S][pfs]*/
+ while (loop) {
+ switch (format[index]) {
+ case 'p':
+ case 'f':
+ case 's':
+ ret++;
+ index++;
+ break;
+ default:
+ loop = 0;
+ break;
+ }
+ }
+ /* fall through */
+ case '4':
+ /* [4S][hnbl] */
+ switch (format[index]) {
+ case 'h':
case 'n':
- trace_seq_putc(s, '\n');
- break;
- case 't':
- trace_seq_putc(s, '\t');
- break;
- case 'r':
- trace_seq_putc(s, '\r');
- break;
- case '\\':
- trace_seq_putc(s, '\\');
+ case 'l':
+ case 'b':
+ ret++;
+ index++;
break;
- default:
- trace_seq_putc(s, *ptr);
+ }
+ if (format[1] == '4') {
+ ret++;
break;
}
+ /* fall through */
+ case '6':
+ /* [6S]c */
+ if (format[index] == 'c')
+ ret++;
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ case 'U':
+ switch (format[1]) {
+ case 'L':
+ case 'l':
+ case 'B':
+ case 'b':
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ case 'h':
+ switch (format[1]) {
+ case 'C':
+ case 'D':
+ case 'N':
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ default:
+ break;
+ }
- } else if (*ptr == '%') {
- saveptr = ptr;
- show_func = 0;
- len_as_arg = 0;
- cont_process:
- ptr++;
- switch (*ptr) {
- case '%':
- trace_seq_putc(s, '%');
- break;
- case '#':
- /* FIXME: need to handle properly */
- goto cont_process;
- case 'h':
- ls--;
- goto cont_process;
- case 'l':
- ls++;
- goto cont_process;
- case 'L':
- ls = 2;
- goto cont_process;
- case '*':
- /* The argument is the length. */
- if (!arg) {
- do_warning_event(event, "no argument match");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
- len_arg = eval_num_arg(data, size, event, arg);
- len_as_arg = 1;
- arg = arg->next;
- goto cont_process;
- case '.':
- case 'z':
- case 'Z':
- case '0' ... '9':
- case '-':
- goto cont_process;
- case 'p':
- if (tep->long_size == 4)
- ls = 1;
- else
- ls = 2;
+ return ret;
+}
- if (isalnum(ptr[1]))
- ptr++;
+static void free_parse_args(struct tep_print_parse *arg)
+{
+ struct tep_print_parse *del;
- if (arg->type == TEP_PRINT_BSTRING) {
- trace_seq_puts(s, arg->string.string);
- arg = arg->next;
- break;
- }
+ while (arg) {
+ del = arg;
+ arg = del->next;
+ free(del->format);
+ free(del);
+ }
+}
- if (*ptr == 'F' || *ptr == 'f' ||
- *ptr == 'S' || *ptr == 's') {
- show_func = *ptr;
- } else if (*ptr == 'M' || *ptr == 'm') {
- print_mac_arg(s, *ptr, data, size, event, arg);
- arg = arg->next;
- break;
- } else if (*ptr == 'I' || *ptr == 'i') {
- int n;
+static int parse_arg_add(struct tep_print_parse **parse, char *format,
+ enum tep_print_parse_type type,
+ struct tep_print_arg *arg,
+ struct tep_print_arg *len_as_arg,
+ int ls)
+{
+ struct tep_print_parse *parg = NULL;
- n = print_ip_arg(s, ptr, data, size, event, arg);
- if (n > 0) {
- ptr += n - 1;
- arg = arg->next;
- break;
- }
- }
+ parg = calloc(1, sizeof(*parg));
+ if (!parg)
+ goto error;
+ parg->format = strdup(format);
+ if (!parg->format)
+ goto error;
+ parg->type = type;
+ parg->arg = arg;
+ parg->len_as_arg = len_as_arg;
+ parg->ls = ls;
+ *parse = parg;
+ return 0;
+error:
+ if (parg) {
+ free(parg->format);
+ free(parg);
+ }
+ return -1;
+}
- /* fall through */
- case 'd':
- case 'u':
- case 'i':
- case 'x':
- case 'X':
- case 'o':
- if (!arg) {
- do_warning_event(event, "no argument match");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
+static int parse_arg_format(struct tep_print_parse **parse,
+ struct tep_event *event,
+ const char *format, struct tep_print_arg **arg)
+{
+ struct tep_print_arg *len_arg = NULL;
+ char print_format[32];
+ const char *start = format;
+ int ret = 0;
+ int ls = 0;
+ int res;
+ int len;
- len = ((unsigned long)ptr + 1) -
- (unsigned long)saveptr;
+ format++;
+ ret++;
+ for (; *format; format++) {
+ switch (*format) {
+ case '#':
+ /* FIXME: need to handle properly */
+ break;
+ case 'h':
+ ls--;
+ break;
+ case 'l':
+ ls++;
+ break;
+ case 'L':
+ ls = 2;
+ break;
+ case '.':
+ case 'z':
+ case 'Z':
+ case '0' ... '9':
+ case '-':
+ break;
+ case '*':
+ /* The argument is the length. */
+ if (!*arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ if (len_arg) {
+ do_warning_event(event, "argument already matched");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ len_arg = *arg;
+ *arg = (*arg)->next;
+ break;
+ case 'p':
+ if (!*arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ res = parse_arg_format_pointer(format + 1);
+ if (res > 0) {
+ format += res;
+ ret += res;
+ }
+ len = ((unsigned long)format + 1) -
+ (unsigned long)start;
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ len = 31;
+ }
+ memcpy(print_format, start, len);
+ print_format[len] = 0;
- /* should never happen */
- if (len > 31) {
- do_warning_event(event, "bad format!");
- event->flags |= TEP_EVENT_FL_FAILED;
- len = 31;
- }
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_ARG_POINTER, *arg, len_arg, ls);
+ *arg = (*arg)->next;
+ ret++;
+ return ret;
+ case 'd':
+ case 'u':
+ case 'i':
+ case 'x':
+ case 'X':
+ case 'o':
+ if (!*arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
- memcpy(format, saveptr, len);
- format[len] = 0;
+ len = ((unsigned long)format + 1) -
+ (unsigned long)start;
- val = eval_num_arg(data, size, event, arg);
- arg = arg->next;
+ /* should never happen */
+ if (len > 30) {
+ do_warning_event(event, "bad format!");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ len = 31;
+ }
+ memcpy(print_format, start, len);
+ print_format[len] = 0;
- if (show_func) {
- func = find_func(tep, val);
- if (func) {
- trace_seq_puts(s, func->func);
- if (show_func == 'F')
- trace_seq_printf(s,
- "+0x%llx",
- val - func->addr);
- break;
- }
- }
- if (tep->long_size == 8 && ls == 1 &&
- sizeof(long) != 8) {
- char *p;
-
- /* make %l into %ll */
- if (ls == 1 && (p = strchr(format, 'l')))
- memmove(p+1, p, strlen(p)+1);
- else if (strcmp(format, "%p") == 0)
- strcpy(format, "0x%llx");
- ls = 2;
- }
- switch (ls) {
- case -2:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (char)val);
- else
- trace_seq_printf(s, format, (char)val);
- break;
- case -1:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (short)val);
- else
- trace_seq_printf(s, format, (short)val);
- break;
- case 0:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (int)val);
- else
- trace_seq_printf(s, format, (int)val);
- break;
- case 1:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (long)val);
- else
- trace_seq_printf(s, format, (long)val);
- break;
- case 2:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg,
- (long long)val);
- else
- trace_seq_printf(s, format, (long long)val);
- break;
- default:
- do_warning_event(event, "bad count (%d)", ls);
- event->flags |= TEP_EVENT_FL_FAILED;
- }
- break;
- case 's':
- if (!arg) {
- do_warning_event(event, "no matching argument");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
+ if (event->tep->long_size == 8 && ls == 1 &&
+ sizeof(long) != 8) {
+ char *p;
- len = ((unsigned long)ptr + 1) -
- (unsigned long)saveptr;
+ /* make %l into %ll */
+ if (ls == 1 && (p = strchr(print_format, 'l')))
+ memmove(p+1, p, strlen(p)+1);
+ ls = 2;
+ }
+ if (ls < -2 || ls > 2) {
+ do_warning_event(event, "bad count (%d)", ls);
+ event->flags |= TEP_EVENT_FL_FAILED;
+ }
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls);
+ *arg = (*arg)->next;
+ ret++;
+ return ret;
+ case 's':
+ if (!*arg) {
+ do_warning_event(event, "no matching argument");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
- /* should never happen */
- if (len > 31) {
- do_warning_event(event, "bad format!");
- event->flags |= TEP_EVENT_FL_FAILED;
- len = 31;
- }
+ len = ((unsigned long)format + 1) -
+ (unsigned long)start;
+
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ len = 31;
+ }
- memcpy(format, saveptr, len);
- format[len] = 0;
- if (!len_as_arg)
- len_arg = -1;
- /* Use helper trace_seq */
- trace_seq_init(&p);
- print_str_arg(&p, data, size, event,
- format, len_arg, arg);
- trace_seq_terminate(&p);
- trace_seq_puts(s, p.buffer);
- trace_seq_destroy(&p);
- arg = arg->next;
+ memcpy(print_format, start, len);
+ print_format[len] = 0;
+
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_ARG_STRING, *arg, len_arg, 0);
+ *arg = (*arg)->next;
+ ret++;
+ return ret;
+ default:
+ snprintf(print_format, 32, ">%c<", *format);
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_STRING, NULL, NULL, 0);
+ ret++;
+ return ret;
+ }
+ ret++;
+ }
+
+out_failed:
+ return ret;
+
+}
+
+static int parse_arg_string(struct tep_print_parse **parse, const char *format)
+{
+ struct trace_seq s;
+ int ret = 0;
+
+ trace_seq_init(&s);
+ for (; *format; format++) {
+ if (*format == '\\') {
+ format++;
+ ret++;
+ switch (*format) {
+ case 'n':
+ trace_seq_putc(&s, '\n');
+ break;
+ case 't':
+ trace_seq_putc(&s, '\t');
+ break;
+ case 'r':
+ trace_seq_putc(&s, '\r');
+ break;
+ case '\\':
+ trace_seq_putc(&s, '\\');
break;
default:
- trace_seq_printf(s, ">%c<", *ptr);
-
+ trace_seq_putc(&s, *format);
+ break;
}
+ } else if (*format == '%') {
+ if (*(format + 1) == '%') {
+ trace_seq_putc(&s, '%');
+ format++;
+ ret++;
+ } else
+ break;
} else
- trace_seq_putc(s, *ptr);
+ trace_seq_putc(&s, *format);
+
+ ret++;
}
+ trace_seq_terminate(&s);
+ parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0);
+ trace_seq_destroy(&s);
+
+ return ret;
+}
+
+static struct tep_print_parse *
+parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg)
+{
+ struct tep_print_parse *parse_ret = NULL;
+ struct tep_print_parse **parse = NULL;
+ int ret;
+ int len;
+
+ len = strlen(format);
+ while (*format) {
+ if (!parse_ret)
+ parse = &parse_ret;
+ if (*format == '%' && *(format + 1) != '%')
+ ret = parse_arg_format(parse, event, format, &arg);
+ else
+ ret = parse_arg_string(parse, format);
+ if (*parse)
+ parse = &((*parse)->next);
+
+ len -= ret;
+ if (len > 0)
+ format += ret;
+ else
+ break;
+ }
+ return parse_ret;
+}
+
+static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s,
+ void *data, int size, struct tep_event *event)
+{
+ int len_arg;
+
+ while (parse) {
+ if (parse->len_as_arg)
+ len_arg = eval_num_arg(data, size, event, parse->len_as_arg);
+ switch (parse->type) {
+ case PRINT_FMT_ARG_DIGIT:
+ print_arg_number(s, parse->format,
+ parse->len_as_arg ? len_arg : -1, data,
+ size, parse->ls, event, parse->arg);
+ break;
+ case PRINT_FMT_ARG_POINTER:
+ print_arg_pointer(s, parse->format,
+ parse->len_as_arg ? len_arg : 1,
+ data, size, event, parse->arg);
+ break;
+ case PRINT_FMT_ARG_STRING:
+ print_arg_string(s, parse->format,
+ parse->len_as_arg ? len_arg : -1,
+ data, size, event, parse->arg);
+ break;
+ case PRINT_FMT_STRING:
+ default:
+ trace_seq_printf(s, "%s", parse->format);
+ break;
+ }
+ parse = parse->next;
+ }
+}
+
+static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
+{
+ struct tep_print_parse *parse = event->print_fmt.print_cache;
+ struct tep_print_arg *args = NULL;
+ char *bprint_fmt = NULL;
if (event->flags & TEP_EVENT_FL_FAILED) {
-out_failed:
trace_seq_printf(s, "[FAILED TO PARSE]");
+ tep_print_fields(s, data, size, event);
+ return;
+ }
+
+ if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+ bprint_fmt = get_bprint_format(data, size, event);
+ args = make_bprint_args(bprint_fmt, data, size, event);
+ parse = parse_args(event, bprint_fmt, args);
}
- if (args) {
+ print_event_cache(parse, s, data, size, event);
+
+ if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+ free_parse_args(parse);
free_args(args);
free(bprint_fmt);
}
@@ -6206,7 +6729,7 @@ static int find_event_handle(struct tep_handle *tep, struct tep_event *event)
}
/**
- * __tep_parse_format - parse the event format
+ * parse_format - parse the event format
* @buf: the buffer storing the event format string
* @size: the size of @buf
* @sys: the system the event belongs to
@@ -6218,9 +6741,9 @@ static int find_event_handle(struct tep_handle *tep, struct tep_event *event)
*
* /sys/kernel/debug/tracing/events/.../.../format
*/
-enum tep_errno __tep_parse_format(struct tep_event **eventp,
- struct tep_handle *tep, const char *buf,
- unsigned long size, const char *sys)
+static enum tep_errno parse_format(struct tep_event **eventp,
+ struct tep_handle *tep, const char *buf,
+ unsigned long size, const char *sys)
{
struct tep_event *event;
int ret;
@@ -6308,9 +6831,13 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp,
*list = arg;
list = &arg->next;
}
- return 0;
}
+ if (!(event->flags & TEP_EVENT_FL_ISBPRINT))
+ event->print_fmt.print_cache = parse_args(event,
+ event->print_fmt.format,
+ event->print_fmt.args);
+
return 0;
event_parse_failed:
@@ -6331,7 +6858,7 @@ __parse_event(struct tep_handle *tep,
const char *buf, unsigned long size,
const char *sys)
{
- int ret = __tep_parse_format(eventp, tep, buf, size, sys);
+ int ret = parse_format(eventp, tep, buf, size, sys);
struct tep_event *event = *eventp;
if (event == NULL)
@@ -6349,7 +6876,7 @@ __parse_event(struct tep_handle *tep,
return 0;
event_add_failed:
- tep_free_event(event);
+ free_tep_event(event);
return ret;
}
@@ -6942,7 +7469,7 @@ int tep_get_ref(struct tep_handle *tep)
return 0;
}
-void tep_free_format_field(struct tep_format_field *field)
+__hidden void free_tep_format_field(struct tep_format_field *field)
{
free(field->type);
if (field->alias != field->name)
@@ -6957,7 +7484,7 @@ static void free_format_fields(struct tep_format_field *field)
while (field) {
next = field->next;
- tep_free_format_field(field);
+ free_tep_format_field(field);
field = next;
}
}
@@ -6968,7 +7495,7 @@ static void free_formats(struct tep_format *format)
free_format_fields(format->fields);
}
-void tep_free_event(struct tep_event *event)
+__hidden void free_tep_event(struct tep_event *event)
{
free(event->name);
free(event->system);
@@ -6977,7 +7504,7 @@ void tep_free_event(struct tep_event *event)
free(event->print_fmt.format);
free_args(event->print_fmt.args);
-
+ free_parse_args(event->print_fmt.print_cache);
free(event);
}
@@ -7054,7 +7581,7 @@ void tep_free(struct tep_handle *tep)
}
for (i = 0; i < tep->nr_events; i++)
- tep_free_event(tep->events[i]);
+ free_tep_event(tep->events[i]);
while (tep->handlers) {
handle = tep->handlers;
@@ -7065,6 +7592,7 @@ void tep_free(struct tep_handle *tep)
free(tep->events);
free(tep->sort_events);
free(tep->func_resolver);
+ free_tep_plugin_paths(tep);
free(tep);
}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index b77837f75a0d..a67ad9a5b835 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -1,21 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#ifndef _PARSE_EVENTS_H
#define _PARSE_EVENTS_H
@@ -272,9 +258,12 @@ struct tep_print_arg {
};
};
+struct tep_print_parse;
+
struct tep_print_fmt {
char *format;
struct tep_print_arg *args;
+ struct tep_print_parse *print_cache;
};
struct tep_event {
@@ -379,7 +368,7 @@ enum tep_errno {
* errno since SUS requires the errno has distinct positive values.
* See 'Issue 6' in the link below.
*
- * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
*/
__TEP_ERRNO__START = -100000,
@@ -393,14 +382,29 @@ struct tep_plugin_list;
#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1))
+enum tep_plugin_load_priority {
+ TEP_PLUGIN_FIRST,
+ TEP_PLUGIN_LAST,
+};
+
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+ enum tep_plugin_load_priority prio);
struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep);
void tep_unload_plugins(struct tep_plugin_list *plugin_list,
struct tep_handle *tep);
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+ void (*load_plugin)(struct tep_handle *tep,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data);
char **tep_plugin_list_options(void);
void tep_plugin_free_options_list(char **list);
int tep_plugin_add_options(const char *name,
struct tep_plugin_option *options);
+int tep_plugin_add_option(const char *name, const char *val);
void tep_plugin_remove_options(struct tep_plugin_option *options);
+void tep_plugin_print_options(struct trace_seq *s);
void tep_print_plugins(struct trace_seq *s,
const char *prefix, const char *suffix,
const struct tep_plugin_list *list);
@@ -574,14 +578,6 @@ void tep_ref(struct tep_handle *tep);
void tep_unref(struct tep_handle *tep);
int tep_get_ref(struct tep_handle *tep);
-/* access to the internal parser */
-void tep_buffer_init(const char *buf, unsigned long long size);
-enum tep_event_type tep_read_token(char **tok);
-void tep_free_token(char *token);
-int tep_peek_char(void);
-const char *tep_get_input_buf(void);
-unsigned long long tep_get_input_buf_ptr(void);
-
/* for debugging */
void tep_print_funcs(struct tep_handle *tep);
void tep_print_printk(struct tep_handle *tep);
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index e1f7ddd5a6cf..e7f93d5fe4fd 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -13,6 +13,7 @@
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
+#include <errno.h>
#include "event-parse.h"
#include "event-parse-local.h"
#include "event-utils.h"
@@ -38,6 +39,12 @@ struct tep_plugin_list {
void *handle;
};
+struct tep_plugins_dir {
+ struct tep_plugins_dir *next;
+ char *path;
+ enum tep_plugin_load_priority prio;
+};
+
static void lower_case(char *str)
{
if (!str)
@@ -247,6 +254,170 @@ void tep_plugin_remove_options(struct tep_plugin_option *options)
}
}
+static int parse_option_name(char **option, char **plugin)
+{
+ char *p;
+
+ *plugin = NULL;
+
+ if ((p = strstr(*option, ":"))) {
+ *plugin = *option;
+ *p = '\0';
+ *option = strdup(p + 1);
+ if (!*option)
+ return -1;
+ }
+ return 0;
+}
+
+static struct tep_plugin_option *
+find_registered_option(const char *plugin, const char *option)
+{
+ struct registered_plugin_options *reg;
+ struct tep_plugin_option *op;
+ const char *op_plugin;
+
+ for (reg = registered_options; reg; reg = reg->next) {
+ for (op = reg->options; op->name; op++) {
+ if (op->plugin_alias)
+ op_plugin = op->plugin_alias;
+ else
+ op_plugin = op->file;
+
+ if (plugin && strcmp(plugin, op_plugin) != 0)
+ continue;
+ if (strcmp(option, op->name) != 0)
+ continue;
+
+ return op;
+ }
+ }
+
+ return NULL;
+}
+
+static int process_option(const char *plugin, const char *option, const char *val)
+{
+ struct tep_plugin_option *op;
+
+ op = find_registered_option(plugin, option);
+ if (!op)
+ return 0;
+
+ return update_option_value(op, val);
+}
+
+/**
+ * tep_plugin_add_option - add an option/val pair to set plugin options
+ * @name: The name of the option (format: <plugin>:<option> or just <option>)
+ * @val: (optional) the value for the option
+ *
+ * Modify a plugin option. If @val is given than the value of the option
+ * is set (note, some options just take a boolean, so @val must be either
+ * "1" or "0" or "true" or "false").
+ */
+int tep_plugin_add_option(const char *name, const char *val)
+{
+ struct trace_plugin_options *op;
+ char *option_str;
+ char *plugin;
+
+ option_str = strdup(name);
+ if (!option_str)
+ return -ENOMEM;
+
+ if (parse_option_name(&option_str, &plugin) < 0)
+ return -ENOMEM;
+
+ /* If the option exists, update the val */
+ for (op = trace_plugin_options; op; op = op->next) {
+ /* Both must be NULL or not NULL */
+ if ((!plugin || !op->plugin) && plugin != op->plugin)
+ continue;
+ if (plugin && strcmp(plugin, op->plugin) != 0)
+ continue;
+ if (strcmp(op->option, option_str) != 0)
+ continue;
+
+ /* update option */
+ free(op->value);
+ if (val) {
+ op->value = strdup(val);
+ if (!op->value)
+ goto out_free;
+ } else
+ op->value = NULL;
+
+ /* plugin and option_str don't get freed at the end */
+ free(plugin);
+ free(option_str);
+
+ plugin = op->plugin;
+ option_str = op->option;
+ break;
+ }
+
+ /* If not found, create */
+ if (!op) {
+ op = malloc(sizeof(*op));
+ if (!op)
+ goto out_free;
+ memset(op, 0, sizeof(*op));
+ op->plugin = plugin;
+ op->option = option_str;
+ if (val) {
+ op->value = strdup(val);
+ if (!op->value) {
+ free(op);
+ goto out_free;
+ }
+ }
+ op->next = trace_plugin_options;
+ trace_plugin_options = op;
+ }
+
+ return process_option(plugin, option_str, val);
+
+out_free:
+ free(plugin);
+ free(option_str);
+ return -ENOMEM;
+}
+
+static void print_op_data(struct trace_seq *s, const char *name,
+ const char *op)
+{
+ if (op)
+ trace_seq_printf(s, "%8s:\t%s\n", name, op);
+}
+
+/**
+ * tep_plugin_print_options - print out the registered plugin options
+ * @s: The trace_seq descriptor to write the plugin options into
+ *
+ * Writes a list of options into trace_seq @s.
+ */
+void tep_plugin_print_options(struct trace_seq *s)
+{
+ struct registered_plugin_options *reg;
+ struct tep_plugin_option *op;
+
+ for (reg = registered_options; reg; reg = reg->next) {
+ if (reg != registered_options)
+ trace_seq_printf(s, "============\n");
+ for (op = reg->options; op->name; op++) {
+ if (op != reg->options)
+ trace_seq_printf(s, "------------\n");
+ print_op_data(s, "file", op->file);
+ print_op_data(s, "plugin", op->plugin_alias);
+ print_op_data(s, "option", op->name);
+ print_op_data(s, "desc", op->description);
+ print_op_data(s, "value", op->value);
+ trace_seq_printf(s, "%8s:\t%d\n", "set", op->set);
+ }
+ }
+}
+
/**
* tep_print_plugins - print out the list of plugins loaded
* @s: the trace_seq descripter to write to
@@ -273,6 +444,7 @@ load_plugin(struct tep_handle *tep, const char *path,
const char *file, void *data)
{
struct tep_plugin_list **plugin_list = data;
+ struct tep_plugin_option *options;
tep_plugin_load_func func;
struct tep_plugin_list *list;
const char *alias;
@@ -297,6 +469,16 @@ load_plugin(struct tep_handle *tep, const char *path,
if (!alias)
alias = file;
+ options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME);
+ if (options) {
+ while (options->name) {
+ ret = update_option(alias, options);
+ if (ret < 0)
+ goto out_free;
+ options++;
+ }
+ }
+
func = dlsym(handle, TEP_PLUGIN_LOADER_NAME);
if (!func) {
warning("could not find func '%s' in plugin '%s'\n%s\n",
@@ -365,28 +547,53 @@ load_plugins_dir(struct tep_handle *tep, const char *suffix,
closedir(dir);
}
-static void
-load_plugins(struct tep_handle *tep, const char *suffix,
- void (*load_plugin)(struct tep_handle *tep,
- const char *path,
- const char *name,
- void *data),
- void *data)
+/**
+ * tep_load_plugins_hook - call a user specified callback to load a plugin
+ * @tep: handler to traceevent context
+ * @suffix: filter only plugin files with given suffix
+ * @load_plugin: user specified callback, called for each plugin file
+ * @data: custom context, passed to @load_plugin
+ *
+ * Searches for traceevent plugin files and calls @load_plugin for each
+ * The order of plugins search is:
+ * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST
+ * - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR
+ * - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR
+ * - In user's home: ~/.local/lib/traceevent/plugins/
+ * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST
+ *
+ */
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+ void (*load_plugin)(struct tep_handle *tep,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data)
{
+ struct tep_plugins_dir *dir = NULL;
char *home;
char *path;
char *envdir;
int ret;
- if (tep->flags & TEP_DISABLE_PLUGINS)
+ if (tep && tep->flags & TEP_DISABLE_PLUGINS)
return;
+ if (tep)
+ dir = tep->plugins_dir;
+ while (dir) {
+ if (dir->prio == TEP_PLUGIN_FIRST)
+ load_plugins_dir(tep, suffix, dir->path,
+ load_plugin, data);
+ dir = dir->next;
+ }
+
/*
* If a system plugin directory was defined,
* check that first.
*/
#ifdef PLUGIN_DIR
- if (!(tep->flags & TEP_DISABLE_SYS_PLUGINS))
+ if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS))
load_plugins_dir(tep, suffix, PLUGIN_DIR,
load_plugin, data);
#endif
@@ -415,6 +622,15 @@ load_plugins(struct tep_handle *tep, const char *suffix,
load_plugins_dir(tep, suffix, path, load_plugin, data);
+ if (tep)
+ dir = tep->plugins_dir;
+ while (dir) {
+ if (dir->prio == TEP_PLUGIN_LAST)
+ load_plugins_dir(tep, suffix, dir->path,
+ load_plugin, data);
+ dir = dir->next;
+ }
+
free(path);
}
@@ -423,10 +639,59 @@ tep_load_plugins(struct tep_handle *tep)
{
struct tep_plugin_list *list = NULL;
- load_plugins(tep, ".so", load_plugin, &list);
+ tep_load_plugins_hook(tep, ".so", load_plugin, &list);
return list;
}
+/**
+ * tep_add_plugin_path - Add a new plugin directory.
+ * @tep: Trace event handler.
+ * @path: Path to a directory. All plugin files in that
+ * directory will be loaded.
+ *@prio: Load priority of the plugins in that directory.
+ *
+ * Returns -1 in case of an error, 0 otherwise.
+ */
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+ enum tep_plugin_load_priority prio)
+{
+ struct tep_plugins_dir *dir;
+
+ if (!tep || !path)
+ return -1;
+
+ dir = calloc(1, sizeof(*dir));
+ if (!dir)
+ return -1;
+
+ dir->path = strdup(path);
+ if (!dir->path) {
+ free(dir);
+ return -1;
+ }
+ dir->prio = prio;
+ dir->next = tep->plugins_dir;
+ tep->plugins_dir = dir;
+
+ return 0;
+}
+
+__hidden void free_tep_plugin_paths(struct tep_handle *tep)
+{
+ struct tep_plugins_dir *dir;
+
+ if (!tep)
+ return;
+
+ dir = tep->plugins_dir;
+ while (dir) {
+ tep->plugins_dir = tep->plugins_dir->next;
+ free(dir->path);
+ free(dir);
+ dir = tep->plugins_dir;
+ }
+}
+
void
tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep)
{
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
index b887e7437d67..f1640d651c8a 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -361,6 +361,7 @@ translate_data(struct kbuffer *kbuf, void *data, void **rptr,
break;
case KBUFFER_TYPE_TIME_EXTEND:
+ case KBUFFER_TYPE_TIME_STAMP:
extend = read_4(kbuf, data);
data += 4;
extend <<= TS_SHIFT;
@@ -369,10 +370,6 @@ translate_data(struct kbuffer *kbuf, void *data, void **rptr,
*length = 0;
break;
- case KBUFFER_TYPE_TIME_STAMP:
- data += 12;
- *length = 0;
- break;
case 0:
*length = read_4(kbuf, data) - 4;
*length = (*length + 3) & ~3;
@@ -397,7 +394,11 @@ static unsigned int update_pointers(struct kbuffer *kbuf)
type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
- kbuf->timestamp += delta;
+ if (type_len == KBUFFER_TYPE_TIME_STAMP)
+ kbuf->timestamp = delta;
+ else
+ kbuf->timestamp += delta;
+
kbuf->index = calc_index(kbuf, ptr);
kbuf->next = kbuf->index + length;
@@ -438,7 +439,7 @@ void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
case KBUFFER_TYPE_TIME_EXTEND:
case KBUFFER_TYPE_TIME_STAMP:
return NULL;
- };
+ }
*size = length;
@@ -454,7 +455,9 @@ static int __next_event(struct kbuffer *kbuf)
if (kbuf->next >= kbuf->size)
return -1;
type = update_pointers(kbuf);
- } while (type == KBUFFER_TYPE_TIME_EXTEND || type == KBUFFER_TYPE_PADDING);
+ } while (type == KBUFFER_TYPE_TIME_EXTEND ||
+ type == KBUFFER_TYPE_TIME_STAMP ||
+ type == KBUFFER_TYPE_PADDING);
return 0;
}
@@ -547,6 +550,34 @@ int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer)
}
/**
+ * kbuffer_subbuf_timestamp - read the timestamp from a sub buffer
+ * @kbuf: The kbuffer to load
+ * @subbuf: The subbuffer to read from.
+ *
+ * Return the timestamp from a subbuffer.
+ */
+unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf)
+{
+ return kbuf->read_8(subbuf);
+}
+
+/**
+ * kbuffer_ptr_delta - read the delta field from a record
+ * @kbuf: The kbuffer to load
+ * @ptr: The record in the buffe.
+ *
+ * Return the timestamp delta from a record
+ */
+unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr)
+{
+ unsigned int type_len_ts;
+
+ type_len_ts = read_4(kbuf, ptr);
+ return ts4host(kbuf, type_len_ts);
+}
+
+
+/**
* kbuffer_read_event - read the next event in the kbuffer subbuffer
* @kbuf: The kbuffer to read from
* @ts: The address to store the timestamp of the event (may be NULL to ignore)
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
index ed4d697fc137..a2b522093cfd 100644
--- a/tools/lib/traceevent/kbuffer.h
+++ b/tools/lib/traceevent/kbuffer.h
@@ -1,22 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
/*
* Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#ifndef _KBUFFER_H
#define _KBUFFER_H
@@ -49,6 +34,8 @@ int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer);
void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts);
void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts);
unsigned long long kbuffer_timestamp(struct kbuffer *kbuf);
+unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf);
+unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr);
void *kbuffer_translate_data(int swap, void *data, unsigned int *size);
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 20eed719542e..368826bb5a57 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -38,8 +38,8 @@ static void show_error(char *error_buf, const char *fmt, ...)
int len;
int i;
- input = tep_get_input_buf();
- index = tep_get_input_buf_ptr();
+ input = get_input_buf();
+ index = get_input_buf_ptr();
len = input ? strlen(input) : 0;
if (len) {
@@ -57,25 +57,20 @@ static void show_error(char *error_buf, const char *fmt, ...)
va_end(ap);
}
-static void free_token(char *token)
-{
- tep_free_token(token);
-}
-
-static enum tep_event_type read_token(char **tok)
+static enum tep_event_type filter_read_token(char **tok)
{
enum tep_event_type type;
char *token = NULL;
do {
free_token(token);
- type = tep_read_token(&token);
+ type = read_token(&token);
} while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE);
/* If token is = or ! check to see if the next char is ~ */
if (token &&
(strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&
- tep_peek_char() == '~') {
+ peek_char() == '~') {
/* append it */
*tok = malloc(3);
if (*tok == NULL) {
@@ -85,7 +80,7 @@ static enum tep_event_type read_token(char **tok)
sprintf(*tok, "%c%c", *token, '~');
free_token(token);
/* Now remove the '~' from the buffer */
- tep_read_token(&token);
+ read_token(&token);
free_token(token);
} else
*tok = token;
@@ -959,7 +954,7 @@ process_filter(struct tep_event *event, struct tep_filter_arg **parg,
do {
free(token);
- type = read_token(&token);
+ type = filter_read_token(&token);
switch (type) {
case TEP_EVENT_SQUOTE:
case TEP_EVENT_DQUOTE:
@@ -1185,7 +1180,7 @@ process_event(struct tep_event *event, const char *filter_str,
{
int ret;
- tep_buffer_init(filter_str, strlen(filter_str));
+ init_input_buf(filter_str, strlen(filter_str));
ret = process_filter(event, parg, error_str, 0);
if (ret < 0)
@@ -1243,7 +1238,7 @@ filter_event(struct tep_event_filter *filter, struct tep_event *event,
static void filter_init_error_buf(struct tep_event_filter *filter)
{
/* clear buffer to reset show error */
- tep_buffer_init("", 0);
+ init_input_buf("", 0);
filter->error_buffer[0] = '\0';
}
@@ -1958,7 +1953,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
default:
break;
}
- asprintf(&str, val ? "TRUE" : "FALSE");
+ if (asprintf(&str, val ? "TRUE" : "FALSE") < 0)
+ str = NULL;
break;
}
}
@@ -1976,7 +1972,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
break;
}
- asprintf(&str, "(%s) %s (%s)", left, op, right);
+ if (asprintf(&str, "(%s) %s (%s)", left, op, right) < 0)
+ str = NULL;
break;
case TEP_FILTER_OP_NOT:
@@ -1992,10 +1989,12 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
right_val = 0;
if (right_val >= 0) {
/* just return the opposite */
- asprintf(&str, right_val ? "FALSE" : "TRUE");
+ if (asprintf(&str, right_val ? "FALSE" : "TRUE") < 0)
+ str = NULL;
break;
}
- asprintf(&str, "%s(%s)", op, right);
+ if (asprintf(&str, "%s(%s)", op, right) < 0)
+ str = NULL;
break;
default:
@@ -2011,7 +2010,8 @@ static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
{
char *str = NULL;
- asprintf(&str, "%lld", arg->value.val);
+ if (asprintf(&str, "%lld", arg->value.val) < 0)
+ str = NULL;
return str;
}
@@ -2069,7 +2069,8 @@ static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
break;
}
- asprintf(&str, "%s %s %s", lstr, op, rstr);
+ if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+ str = NULL;
out:
free(lstr);
free(rstr);
@@ -2113,7 +2114,8 @@ static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
if (!op)
op = "<=";
- asprintf(&str, "%s %s %s", lstr, op, rstr);
+ if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+ str = NULL;
break;
default:
@@ -2148,8 +2150,9 @@ static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
if (!op)
op = "!~";
- asprintf(&str, "%s %s \"%s\"",
- arg->str.field->name, op, arg->str.val);
+ if (asprintf(&str, "%s %s \"%s\"",
+ arg->str.field->name, op, arg->str.val) < 0)
+ str = NULL;
break;
default:
@@ -2165,7 +2168,8 @@ static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
switch (arg->type) {
case TEP_FILTER_ARG_BOOLEAN:
- asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE");
+ if (asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE") < 0)
+ str = NULL;
return str;
case TEP_FILTER_ARG_OP:
diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build
index 210d26910613..dd4da823c38f 100644
--- a/tools/lib/traceevent/plugins/Build
+++ b/tools/lib/traceevent/plugins/Build
@@ -5,6 +5,8 @@ plugin_kvm-y += plugin_kvm.o
plugin_mac80211-y += plugin_mac80211.o
plugin_sched_switch-y += plugin_sched_switch.o
plugin_function-y += plugin_function.o
+plugin_futex-y += plugin_futex.o
plugin_xen-y += plugin_xen.o
plugin_scsi-y += plugin_scsi.o
plugin_cfg80211-y += plugin_cfg80211.o
+plugin_tlb-y += plugin_tlb.o \ No newline at end of file
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
index 349bb81482ab..47e802553250 100644
--- a/tools/lib/traceevent/plugins/Makefile
+++ b/tools/lib/traceevent/plugins/Makefile
@@ -134,9 +134,11 @@ PLUGINS += plugin_kvm.so
PLUGINS += plugin_mac80211.so
PLUGINS += plugin_sched_switch.so
PLUGINS += plugin_function.so
+PLUGINS += plugin_futex.so
PLUGINS += plugin_xen.so
PLUGINS += plugin_scsi.so
PLUGINS += plugin_cfg80211.so
+PLUGINS += plugin_tlb.so
PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS))
PLUGINS_IN := $(PLUGINS:.so=-in.o)
@@ -197,7 +199,7 @@ define do_generate_dynamic_list_file
xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
if [ "$$symbol_type" = "U W" ];then \
(echo '{'; \
- $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\
+ $(NM) -u -D $1 | awk 'NF>1 {sub("@.*", "", $$2); print "\t"$$2";"}' | sort -u;\
echo '};'; \
) > $2; \
else \
diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c
index 7770fcb78e0f..807b16e1bf0f 100644
--- a/tools/lib/traceevent/plugins/plugin_function.c
+++ b/tools/lib/traceevent/plugins/plugin_function.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
@@ -50,12 +35,20 @@ struct tep_plugin_option plugin_options[] =
.set = 1,
},
{
+ .name = "offset",
+ .plugin_alias = "ftrace",
+ .description =
+ "Show function names as well as their offsets",
+ .set = 0,
+ },
+ {
.name = NULL,
}
};
static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
+static struct tep_plugin_option *ftrace_offset = &plugin_options[2];
static void add_child(struct func_stack *stack, const char *child, int pos)
{
@@ -123,6 +116,18 @@ static int add_and_get_index(const char *parent, const char *child, int cpu)
return 0;
}
+static void show_function(struct trace_seq *s, struct tep_handle *tep,
+ const char *func, unsigned long long function)
+{
+ unsigned long long offset;
+
+ trace_seq_printf(s, "%s", func);
+ if (ftrace_offset->set) {
+ offset = tep_find_function_address(tep, function);
+ trace_seq_printf(s, "+0x%x ", (int)(function - offset));
+ }
+}
+
static int function_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event *event, void *context)
{
@@ -149,14 +154,14 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
trace_seq_printf(s, "%*s", index*3, "");
if (func)
- trace_seq_printf(s, "%s", func);
+ show_function(s, tep, func, function);
else
trace_seq_printf(s, "0x%llx", function);
if (ftrace_parent->set) {
trace_seq_printf(s, " <-- ");
if (parent)
- trace_seq_printf(s, "%s", parent);
+ show_function(s, tep, parent, pfunction);
else
trace_seq_printf(s, "0x%llx", pfunction);
}
@@ -164,11 +169,93 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
return 0;
}
+static int
+trace_stack_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct tep_format_field *field;
+ unsigned long long addr;
+ const char *func;
+ int long_size;
+ void *data = record->data;
+
+ field = tep_find_any_field(event, "caller");
+ if (!field) {
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller");
+ return 0;
+ }
+
+ trace_seq_puts(s, "<stack trace >\n");
+
+ long_size = tep_get_long_size(event->tep);
+
+ for (data += field->offset; data < record->data + record->size;
+ data += long_size) {
+ addr = tep_read_number(event->tep, data, long_size);
+
+ if ((long_size == 8 && addr == (unsigned long long)-1) ||
+ ((int)addr == -1))
+ break;
+
+ func = tep_find_function(event->tep, addr);
+ if (func)
+ trace_seq_printf(s, "=> %s (%llx)\n", func, addr);
+ else
+ trace_seq_printf(s, "=> %llx\n", addr);
+ }
+
+ return 0;
+}
+
+static int
+trace_raw_data_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct tep_format_field *field;
+ unsigned long long id;
+ int long_size;
+ void *data = record->data;
+
+ if (tep_get_field_val(s, event, "id", record, &id, 1))
+ return trace_seq_putc(s, '!');
+
+ trace_seq_printf(s, "# %llx", id);
+
+ field = tep_find_any_field(event, "buf");
+ if (!field) {
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf");
+ return 0;
+ }
+
+ long_size = tep_get_long_size(event->tep);
+
+ for (data += field->offset; data < record->data + record->size;
+ data += long_size) {
+ int size = sizeof(long);
+ int left = (record->data + record->size) - data;
+ int i;
+
+ if (size > left)
+ size = left;
+
+ for (i = 0; i < size; i++)
+ trace_seq_printf(s, " %02x", *(unsigned char *)(data + i));
+ }
+
+ return 0;
+}
+
int TEP_PLUGIN_LOADER(struct tep_handle *tep)
{
tep_register_event_handler(tep, -1, "ftrace", "function",
function_handler, NULL);
+ tep_register_event_handler(tep, -1, "ftrace", "kernel_stack",
+ trace_stack_handler, NULL);
+
+ tep_register_event_handler(tep, -1, "ftrace", "raw_data",
+ trace_raw_data_handler, NULL);
+
tep_plugin_add_options("ftrace", plugin_options);
return 0;
diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c
new file mode 100644
index 000000000000..eb7c9f8a850a
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_futex.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2017 National Instruments Corp.
+ *
+ * Author: Julia Cartwright <julia@ni.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/futex.h>
+
+#include "event-parse.h"
+
+#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0]))
+
+struct futex_args {
+ unsigned long long uaddr;
+ unsigned long long op;
+ unsigned long long val;
+ unsigned long long utime; /* or val2 */
+ unsigned long long uaddr2;
+ unsigned long long val3;
+};
+
+struct futex_op {
+ const char *name;
+ const char *fmt_val;
+ const char *fmt_utime;
+ const char *fmt_uaddr2;
+ const char *fmt_val3;
+};
+
+static const struct futex_op futex_op_tbl[] = {
+ { "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx", NULL, NULL },
+ { "FUTEX_WAKE", " val=%llu", NULL, NULL, NULL },
+ { "FUTEX_FD", " val=%llu", NULL, NULL, NULL },
+ { "FUTEX_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", NULL },
+ { "FUTEX_CMP_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+ { "FUTEX_WAKE_OP", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+ { "FUTEX_LOCK_PI", NULL, " utime=0x%08llx", NULL, NULL },
+ { "FUTEX_UNLOCK_PI", NULL, NULL, NULL, NULL },
+ { "FUTEX_TRYLOCK_PI", NULL, NULL, NULL, NULL },
+ { "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx", NULL, " val3=0x%08llx" },
+ { "FUTEX_WAKE_BITSET", " val=%llu", NULL, NULL, " val3=0x%08llx" },
+ { "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" },
+ { "FUTEX_CMP_REQUEUE_PI", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+};
+
+
+static void futex_print(struct trace_seq *s, const struct futex_args *args,
+ const struct futex_op *fop)
+{
+ trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr);
+
+ if (fop->fmt_val)
+ trace_seq_printf(s, fop->fmt_val, args->val);
+
+ if (fop->fmt_utime)
+ trace_seq_printf(s,fop->fmt_utime, args->utime);
+
+ if (fop->fmt_uaddr2)
+ trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2);
+
+ if (fop->fmt_val3)
+ trace_seq_printf(s, fop->fmt_val3, args->val3);
+}
+
+static int futex_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ const struct futex_op *fop;
+ struct futex_args args;
+ unsigned long long cmd;
+
+ if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "op", record, &args.op, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "val", record, &args.val, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "utime", record, &args.utime, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "val3", record, &args.val3, 1))
+ return 1;
+
+ cmd = args.op & FUTEX_CMD_MASK;
+ if (cmd >= ARRAY_SIZE(futex_op_tbl))
+ return 1;
+
+ fop = &futex_op_tbl[cmd];
+
+ trace_seq_printf(s, "op=%s", fop->name);
+
+ if (args.op & FUTEX_PRIVATE_FLAG)
+ trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG");
+
+ if (args.op & FUTEX_CLOCK_REALTIME)
+ trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME");
+
+ futex_print(s, &args, fop);
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+ tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+ futex_handler, NULL);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+ tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+ futex_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c
index bb434e0ed03a..d98466788f14 100644
--- a/tools/lib/traceevent/plugins/plugin_hrtimer.c
+++ b/tools/lib/traceevent/plugins/plugin_hrtimer.c
@@ -1,22 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c
index 04fc125f38cb..69111a68d3cf 100644
--- a/tools/lib/traceevent/plugins/plugin_jbd2.c
+++ b/tools/lib/traceevent/plugins/plugin_jbd2.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c
index edaec5d962c3..4b4f7f9616e3 100644
--- a/tools/lib/traceevent/plugins/plugin_kmem.c
+++ b/tools/lib/traceevent/plugins/plugin_kmem.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
index c8e623065a7e..51ceeb9147eb 100644
--- a/tools/lib/traceevent/plugins/plugin_kvm.c
+++ b/tools/lib/traceevent/plugins/plugin_kvm.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
@@ -155,7 +140,23 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
_ER(EXIT_WRITE_DR5, 0x035) \
_ER(EXIT_WRITE_DR6, 0x036) \
_ER(EXIT_WRITE_DR7, 0x037) \
- _ER(EXIT_EXCP_BASE, 0x040) \
+ _ER(EXIT_EXCP_DE, 0x040) \
+ _ER(EXIT_EXCP_DB, 0x041) \
+ _ER(EXIT_EXCP_BP, 0x043) \
+ _ER(EXIT_EXCP_OF, 0x044) \
+ _ER(EXIT_EXCP_BR, 0x045) \
+ _ER(EXIT_EXCP_UD, 0x046) \
+ _ER(EXIT_EXCP_NM, 0x047) \
+ _ER(EXIT_EXCP_DF, 0x048) \
+ _ER(EXIT_EXCP_TS, 0x04a) \
+ _ER(EXIT_EXCP_NP, 0x04b) \
+ _ER(EXIT_EXCP_SS, 0x04c) \
+ _ER(EXIT_EXCP_GP, 0x04d) \
+ _ER(EXIT_EXCP_PF, 0x04e) \
+ _ER(EXIT_EXCP_MF, 0x050) \
+ _ER(EXIT_EXCP_AC, 0x051) \
+ _ER(EXIT_EXCP_MC, 0x052) \
+ _ER(EXIT_EXCP_XF, 0x053) \
_ER(EXIT_INTR, 0x060) \
_ER(EXIT_NMI, 0x061) \
_ER(EXIT_SMI, 0x062) \
@@ -201,7 +202,10 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
_ER(EXIT_MONITOR, 0x08a) \
_ER(EXIT_MWAIT, 0x08b) \
_ER(EXIT_MWAIT_COND, 0x08c) \
- _ER(EXIT_NPF, 0x400) \
+ _ER(EXIT_XSETBV, 0x08d) \
+ _ER(EXIT_NPF, 0x400) \
+ _ER(EXIT_AVIC_INCOMPLETE_IPI, 0x401) \
+ _ER(EXIT_AVIC_UNACCELERATED_ACCESS, 0x402) \
_ER(EXIT_ERR, -1)
#define _ER(reason, val) { #reason, val },
@@ -241,7 +245,7 @@ static const char *find_exit_reason(unsigned isa, int val)
}
if (!strings)
return "UNKNOWN-ISA";
- for (i = 0; strings[i].val >= 0; i++)
+ for (i = 0; strings[i].str; i++)
if (strings[i].val == val)
break;
diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c
index 884303c26b5c..f48071e3cfb8 100644
--- a/tools/lib/traceevent/plugins/plugin_mac80211.c
+++ b/tools/lib/traceevent/plugins/plugin_mac80211.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c
index 957389a0ff7a..e12fa103820a 100644
--- a/tools/lib/traceevent/plugins/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugins/plugin_sched_switch.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c
new file mode 100644
index 000000000000..43657fb60504
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_tlb.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+enum tlb_flush_reason {
+ TLB_FLUSH_ON_TASK_SWITCH,
+ TLB_REMOTE_SHOOTDOWN,
+ TLB_LOCAL_SHOOTDOWN,
+ TLB_LOCAL_MM_SHOOTDOWN,
+ NR_TLB_FLUSH_REASONS,
+};
+
+static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ unsigned long long val;
+
+ trace_seq_printf(s, "pages=");
+
+ tep_print_num_field(s, "%ld", event, "pages", record, 1);
+
+ if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0)
+ return -1;
+
+ trace_seq_puts(s, " reason=");
+
+ switch (val) {
+ case TLB_FLUSH_ON_TASK_SWITCH:
+ trace_seq_puts(s, "flush on task switch");
+ break;
+ case TLB_REMOTE_SHOOTDOWN:
+ trace_seq_puts(s, "remote shootdown");
+ break;
+ case TLB_LOCAL_SHOOTDOWN:
+ trace_seq_puts(s, "local shootdown");
+ break;
+ case TLB_LOCAL_MM_SHOOTDOWN:
+ trace_seq_puts(s, "local mm shootdown");
+ break;
+ }
+
+ trace_seq_printf(s, " (%lld)", val);
+
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+ tep_register_event_handler(tep, -1, "tlb", "tlb_flush",
+ tlb_flush_handler, NULL);
+
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+ tep_unregister_event_handler(tep, -1,
+ "tlb", "tlb_flush",
+ tlb_flush_handler, NULL);
+}
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 33ba98d72b16..99d00870b160 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -3,9 +3,9 @@
C Self R W RMW Self R W DR DW RMW SV
-- ---- - - --- ---- - - -- -- --- --
-Store, e.g., WRITE_ONCE() Y Y
-Load, e.g., READ_ONCE() Y Y Y Y
-Unsuccessful RMW operation Y Y Y Y
+Relaxed store Y Y
+Relaxed load Y Y Y Y
+Relaxed RMW operation Y Y Y Y
rcu_dereference() Y Y Y Y
Successful *_acquire() R Y Y Y Y Y Y
Successful *_release() C Y Y Y W Y
@@ -17,14 +17,19 @@ smp_mb__before_atomic() CP Y Y Y a a a a Y
smp_mb__after_atomic() CP a a Y Y Y Y Y Y
-Key: C: Ordering is cumulative
- P: Ordering propagates
- R: Read, for example, READ_ONCE(), or read portion of RMW
- W: Write, for example, WRITE_ONCE(), or write portion of RMW
- Y: Provides ordering
- a: Provides ordering given intervening RMW atomic operation
- DR: Dependent read (address dependency)
- DW: Dependent write (address, data, or control dependency)
- RMW: Atomic read-modify-write operation
- SELF: Orders self, as opposed to accesses before and/or after
- SV: Orders later accesses to the same variable
+Key: Relaxed: A relaxed operation is either READ_ONCE(), WRITE_ONCE(),
+ a *_relaxed() RMW operation, an unsuccessful RMW
+ operation, a non-value-returning RMW operation such
+ as atomic_inc(), or one of the atomic*_read() and
+ atomic*_set() family of operations.
+ C: Ordering is cumulative
+ P: Ordering propagates
+ R: Read, for example, READ_ONCE(), or read portion of RMW
+ W: Write, for example, WRITE_ONCE(), or write portion of RMW
+ Y: Provides ordering
+ a: Provides ordering given intervening RMW atomic operation
+ DR: Dependent read (address dependency)
+ DW: Dependent write (address, data, or control dependency)
+ RMW: Atomic read-modify-write operation
+ SELF: Orders self, as opposed to accesses before and/or after
+ SV: Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index e91a2eb19592..f9d610d5a1a4 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -1122,12 +1122,10 @@ maintain at least the appearance of FIFO order.
In practice, this difficulty is solved by inserting a special fence
between P1's two loads when the kernel is compiled for the Alpha
architecture. In fact, as of version 4.15, the kernel automatically
-adds this fence (called smp_read_barrier_depends() and defined as
-nothing at all on non-Alpha builds) after every READ_ONCE() and atomic
-load. The effect of the fence is to cause the CPU not to execute any
-po-later instructions until after the local cache has finished
-processing all the stores it has already received. Thus, if the code
-was changed to:
+adds this fence after every READ_ONCE() and atomic load on Alpha. The
+effect of the fence is to cause the CPU not to execute any po-later
+instructions until after the local cache has finished processing all
+the stores it has already received. Thus, if the code was changed to:
P1()
{
@@ -1146,14 +1144,14 @@ READ_ONCE() or another synchronization primitive rather than accessed
directly.
The LKMM requires that smp_rmb(), acquire fences, and strong fences
-share this property with smp_read_barrier_depends(): They do not allow
-the CPU to execute any po-later instructions (or po-later loads in the
-case of smp_rmb()) until all outstanding stores have been processed by
-the local cache. In the case of a strong fence, the CPU first has to
-wait for all of its po-earlier stores to propagate to every other CPU
-in the system; then it has to wait for the local cache to process all
-the stores received as of that time -- not just the stores received
-when the strong fence began.
+share this property: They do not allow the CPU to execute any po-later
+instructions (or po-later loads in the case of smp_rmb()) until all
+outstanding stores have been processed by the local cache. In the
+case of a strong fence, the CPU first has to wait for all of its
+po-earlier stores to propagate to every other CPU in the system; then
+it has to wait for the local cache to process all the stores received
+as of that time -- not just the stores received when the strong fence
+began.
And of course, none of this matters for any architecture other than
Alpha.
@@ -1987,28 +1985,36 @@ outcome undefined.
In technical terms, the compiler is allowed to assume that when the
program executes, there will not be any data races. A "data race"
-occurs when two conflicting memory accesses execute concurrently;
-two memory accesses "conflict" if:
+occurs when there are two memory accesses such that:
- they access the same location,
+1. they access the same location,
- they occur on different CPUs (or in different threads on the
- same CPU),
+2. at least one of them is a store,
- at least one of them is a plain access,
+3. at least one of them is plain,
- and at least one of them is a store.
+4. they occur on different CPUs (or in different threads on the
+ same CPU), and
-The LKMM tries to determine whether a program contains two conflicting
-accesses which may execute concurrently; if it does then the LKMM says
-there is a potential data race and makes no predictions about the
-program's outcome.
+5. they execute concurrently.
-Determining whether two accesses conflict is easy; you can see that
-all the concepts involved in the definition above are already part of
-the memory model. The hard part is telling whether they may execute
-concurrently. The LKMM takes a conservative attitude, assuming that
-accesses may be concurrent unless it can prove they cannot.
+In the literature, two accesses are said to "conflict" if they satisfy
+1 and 2 above. We'll go a little farther and say that two accesses
+are "race candidates" if they satisfy 1 - 4. Thus, whether or not two
+race candidates actually do race in a given execution depends on
+whether they are concurrent.
+
+The LKMM tries to determine whether a program contains race candidates
+which may execute concurrently; if it does then the LKMM says there is
+a potential data race and makes no predictions about the program's
+outcome.
+
+Determining whether two accesses are race candidates is easy; you can
+see that all the concepts involved in the definition above are already
+part of the memory model. The hard part is telling whether they may
+execute concurrently. The LKMM takes a conservative attitude,
+assuming that accesses may be concurrent unless it can prove they
+are not.
If two memory accesses aren't concurrent then one must execute before
the other. Therefore the LKMM decides two accesses aren't concurrent
@@ -2171,8 +2177,8 @@ again, now using plain accesses for buf:
}
This program does not contain a data race. Although the U and V
-accesses conflict, the LKMM can prove they are not concurrent as
-follows:
+accesses are race candidates, the LKMM can prove they are not
+concurrent as follows:
The smp_wmb() fence in P0 is both a compiler barrier and a
cumul-fence. It guarantees that no matter what hash of
@@ -2326,12 +2332,11 @@ could now perform the load of x before the load of ptr (there might be
a control dependency but no address dependency at the machine level).
Finally, it turns out there is a situation in which a plain write does
-not need to be w-post-bounded: when it is separated from the
-conflicting access by a fence. At first glance this may seem
-impossible. After all, to be conflicting the second access has to be
-on a different CPU from the first, and fences don't link events on
-different CPUs. Well, normal fences don't -- but rcu-fence can!
-Here's an example:
+not need to be w-post-bounded: when it is separated from the other
+race-candidate access by a fence. At first glance this may seem
+impossible. After all, to be race candidates the two accesses must
+be on different CPUs, and fences don't link events on different CPUs.
+Well, normal fences don't -- but rcu-fence can! Here's an example:
int x, y;
@@ -2367,7 +2372,7 @@ concurrent and there is no race, even though P1's plain store to y
isn't w-post-bounded by any marked accesses.
Putting all this material together yields the following picture. For
-two conflicting stores W and W', where W ->co W', the LKMM says the
+race-candidate stores W and W', where W ->co W', the LKMM says the
stores don't race if W can be linked to W' by a
w-post-bounded ; vis ; w-pre-bounded
@@ -2380,8 +2385,8 @@ sequence, and if W' is plain then they also have to be linked by a
w-post-bounded ; vis ; r-pre-bounded
-sequence. For a conflicting load R and store W, the LKMM says the two
-accesses don't race if R can be linked to W by an
+sequence. For race-candidate load R and store W, the LKMM says the
+two accesses don't race if R can be linked to W by an
r-post-bounded ; xb* ; w-pre-bounded
@@ -2413,20 +2418,20 @@ is, the rules governing the memory subsystem's choice of a store to
satisfy a load request and its determination of where a store will
fall in the coherence order):
- If R and W conflict and it is possible to link R to W by one
- of the xb* sequences listed above, then W ->rfe R is not
- allowed (i.e., a load cannot read from a store that it
+ If R and W are race candidates and it is possible to link R to
+ W by one of the xb* sequences listed above, then W ->rfe R is
+ not allowed (i.e., a load cannot read from a store that it
executes before, even if one or both is plain).
- If W and R conflict and it is possible to link W to R by one
- of the vis sequences listed above, then R ->fre W is not
- allowed (i.e., if a store is visible to a load then the load
- must read from that store or one coherence-after it).
+ If W and R are race candidates and it is possible to link W to
+ R by one of the vis sequences listed above, then R ->fre W is
+ not allowed (i.e., if a store is visible to a load then the
+ load must read from that store or one coherence-after it).
- If W and W' conflict and it is possible to link W to W' by one
- of the vis sequences listed above, then W' ->co W is not
- allowed (i.e., if one store is visible to a second then the
- second must come after the first in the coherence order).
+ If W and W' are race candidates and it is possible to link W
+ to W' by one of the vis sequences listed above, then W' ->co W
+ is not allowed (i.e., if one store is visible to a second then
+ the second must come after the first in the coherence order).
This is the extent to which the LKMM deals with plain accesses.
Perhaps it could say more (for example, plain accesses might
diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt
new file mode 100644
index 000000000000..2f840dcd15cf
--- /dev/null
+++ b/tools/memory-model/Documentation/litmus-tests.txt
@@ -0,0 +1,1074 @@
+Linux-Kernel Memory Model Litmus Tests
+======================================
+
+This file describes the LKMM litmus-test format by example, describes
+some tricks and traps, and finally outlines LKMM's limitations. Earlier
+versions of this material appeared in a number of LWN articles, including:
+
+https://lwn.net/Articles/720550/
+ A formal kernel memory-ordering model (part 2)
+https://lwn.net/Articles/608550/
+ Axiomatic validation of memory barriers and atomic instructions
+https://lwn.net/Articles/470681/
+ Validating Memory Barriers and Atomic Instructions
+
+This document presents information in decreasing order of applicability,
+so that, where possible, the information that has proven more commonly
+useful is shown near the beginning.
+
+For information on installing LKMM, including the underlying "herd7"
+tool, please see tools/memory-model/README.
+
+
+Copy-Pasta
+==========
+
+As with other software, it is often better (if less macho) to adapt an
+existing litmus test than it is to create one from scratch. A number
+of litmus tests may be found in the kernel source tree:
+
+ tools/memory-model/litmus-tests/
+ Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available on github
+and kernel.org:
+
+ https://github.com/paulmckrcu/litmus
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+
+The -l and -L arguments to "git grep" can be quite helpful in identifying
+existing litmus tests that are similar to the one you need. But even if
+you start with an existing litmus test, it is still helpful to have a
+good understanding of the litmus-test format.
+
+
+Examples and Format
+===================
+
+This section describes the overall format of litmus tests, starting
+with a small example of the message-passing pattern and moving on to
+more complex examples that illustrate explicit initialization and LKMM's
+minimalistic set of flow-control statements.
+
+
+Message-Passing Example
+-----------------------
+
+This section gives an overview of the format of a litmus test using an
+example based on the common message-passing use case. This use case
+appears often in the Linux kernel. For example, a flag (modeled by "y"
+below) indicates that a buffer (modeled by "x" below) is now completely
+filled in and ready for use. It would be very bad if the consumer saw the
+flag set, but, due to memory misordering, saw old values in the buffer.
+
+This example asks whether smp_store_release() and smp_load_acquire()
+suffices to avoid this bad outcome:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 WRITE_ONCE(*x, 1);
+ 8 smp_store_release(y, 1);
+ 9 }
+10
+11 P1(int *x, int *y)
+12 {
+13 int r0;
+14 int r1;
+15
+16 r0 = smp_load_acquire(y);
+17 r1 = READ_ONCE(*x);
+18 }
+19
+20 exists (1:r0=1 /\ 1:r1=0)
+
+Line 1 starts with "C", which identifies this file as being in the
+LKMM C-language format (which, as we will see, is a small fragment
+of the full C language). The remainder of line 1 is the name of
+the test, which by convention is the filename with the ".litmus"
+suffix stripped. In this case, the actual test may be found in
+tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+in the Linux-kernel source tree.
+
+Mechanically generated litmus tests will often have an optional
+double-quoted comment string on the second line. Such strings are ignored
+when running the test. Yes, you can add your own comments to litmus
+tests, but this is a bit involved due to the use of multiple parsers.
+For now, you can use C-language comments in the C code, and these comments
+may be in either the "/* */" or the "//" style. A later section will
+cover the full litmus-test commenting story.
+
+Line 3 is the initialization section. Because the default initialization
+to zero suffices for this test, the "{}" syntax is used, which mean the
+initialization section is empty. Litmus tests requiring non-default
+initialization must have non-empty initialization sections, as in the
+example that will be presented later in this document.
+
+Lines 5-9 show the first process and lines 11-18 the second process. Each
+process corresponds to a Linux-kernel task (or kthread, workqueue, thread,
+and so on; LKMM discussions often use these terms interchangeably).
+The name of the first process is "P0" and that of the second "P1".
+You can name your processes anything you like as long as the names consist
+of a single "P" followed by a number, and as long as the numbers are
+consecutive starting with zero. This can actually be quite helpful,
+for example, a .litmus file matching "^P1(" but not matching "^P2("
+must contain a two-process litmus test.
+
+The argument list for each function are pointers to the global variables
+used by that function. Unlike normal C-language function parameters, the
+names are significant. The fact that both P0() and P1() have a formal
+parameter named "x" means that these two processes are working with the
+same global variable, also named "x". So the "int *x, int *y" on P0()
+and P1() mean that both processes are working with two shared global
+variables, "x" and "y". Global variables are always passed to processes
+by reference, hence "P0(int *x, int *y)", but *never* "P0(int x, int y)".
+
+P0() has no local variables, but P1() has two of them named "r0" and "r1".
+These names may be freely chosen, but for historical reasons stemming from
+other litmus-test formats, it is conventional to use names consisting of
+"r" followed by a number as shown here. A common bug in litmus tests
+is forgetting to add a global variable to a process's parameter list.
+This will sometimes result in an error message, but can also cause the
+intended global to instead be silently treated as an undeclared local
+variable.
+
+Each process's code is similar to Linux-kernel C, as can be seen on lines
+7-8 and 13-17. This code may use many of the Linux kernel's atomic
+operations, some of its exclusive-lock functions, and some of its RCU
+and SRCU functions. An approximate list of the currently supported
+functions may be found in the linux-kernel.def file.
+
+The P0() process does "WRITE_ONCE(*x, 1)" on line 7. Because "x" is a
+pointer in P0()'s parameter list, this does an unordered store to global
+variable "x". Line 8 does "smp_store_release(y, 1)", and because "y"
+is also in P0()'s parameter list, this does a release store to global
+variable "y".
+
+The P1() process declares two local variables on lines 13 and 14.
+Line 16 does "r0 = smp_load_acquire(y)" which does an acquire load
+from global variable "y" into local variable "r0". Line 17 does a
+"r1 = READ_ONCE(*x)", which does an unordered load from "*x" into local
+variable "r1". Both "x" and "y" are in P1()'s parameter list, so both
+reference the same global variables that are used by P0().
+
+Line 20 is the "exists" assertion expression to evaluate the final state.
+This final state is evaluated after the dust has settled: both processes
+have completed and all of their memory references and memory barriers
+have propagated to all parts of the system. The references to the local
+variables "r0" and "r1" in line 24 must be prefixed with "1:" to specify
+which process they are local to.
+
+Note that the assertion expression is written in the litmus-test
+language rather than in C. For example, single "=" is an equality
+operator rather than an assignment. The "/\" character combination means
+"and". Similarly, "\/" stands for "or". Both of these are ASCII-art
+representations of the corresponding mathematical symbols. Finally,
+"~" stands for "logical not", which is "!" in C, and not to be confused
+with the C-language "~" operator which instead stands for "bitwise not".
+Parentheses may be used to override precedence.
+
+The "exists" assertion on line 20 is satisfied if the consumer sees the
+flag ("y") set but the buffer ("x") as not yet filled in, that is, if P1()
+loaded a value from "x" that was equal to 1 but loaded a value from "y"
+that was still equal to zero.
+
+This example can be checked by running the following command, which
+absolutely must be run from the tools/memory-model directory and from
+this directory only:
+
+herd7 -conf linux-kernel.cfg litmus-tests/MP+pooncerelease+poacquireonce.litmus
+
+The output is the result of something similar to a full state-space
+search, and is as follows:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=0)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.00
+12 Hash=579aaa14d8c35a39429b02e698241d09
+
+The most pertinent line is line 10, which contains "Never 0 3", which
+indicates that the bad result flagged by the "exists" clause never
+happens. This line might instead say "Sometimes" to indicate that the
+bad result happened in some but not all executions, or it might say
+"Always" to indicate that the bad result happened in all executions.
+(The herd7 tool doesn't judge, so it is only an LKMM convention that the
+"exists" clause indicates a bad result. To see this, invert the "exists"
+clause's condition and run the test.) The numbers ("0 3") at the end
+of this line indicate the number of end states satisfying the "exists"
+clause (0) and the number not not satisfying that clause (3).
+
+Another important part of this output is shown in lines 2-5, repeated here:
+
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+
+Line 2 gives the total number of end states, and each of lines 3-5 list
+one of these states, with the first ("1:r0=0; 1:r1=0;") indicating that
+both of P1()'s loads returned the value "0". As expected, given the
+"Never" on line 10, the state flagged by the "exists" clause is not
+listed. This full list of states can be helpful when debugging a new
+litmus test.
+
+The rest of the output is not normally needed, either due to irrelevance
+or due to being redundant with the lines discussed above. However, the
+following paragraph lists them for the benefit of readers possessed of
+an insatiable curiosity. Other readers should feel free to skip ahead.
+
+Line 1 echos the test name, along with the "Test" and "Allowed". Line 6's
+"No" says that the "exists" clause was not satisfied by any execution,
+and as such it has the same meaning as line 10's "Never". Line 7 is a
+lead-in to line 8's "Positive: 0 Negative: 3", which lists the number
+of end states satisfying and not satisfying the "exists" clause, just
+like the two numbers at the end of line 10. Line 9 repeats the "exists"
+clause so that you don't have to look it up in the litmus-test file.
+The number at the end of line 11 (which begins with "Time") gives the
+time in seconds required to analyze the litmus test. Small tests such
+as this one complete in a few milliseconds, so "0.00" is quite common.
+Line 12 gives a hash of the contents for the litmus-test file, and is used
+by tooling that manages litmus tests and their output. This tooling is
+used by people modifying LKMM itself, and among other things lets such
+people know which of the several thousand relevant litmus tests were
+affected by a given change to LKMM.
+
+
+Initialization
+--------------
+
+The previous example relied on the default zero initialization for
+"x" and "y", but a similar litmus test could instead initialize them
+to some other value:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {
+ 4 x=42;
+ 5 y=42;
+ 6 }
+ 7
+ 8 P0(int *x, int *y)
+ 9 {
+10 WRITE_ONCE(*x, 1);
+11 smp_store_release(y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16 int r0;
+17 int r1;
+18
+19 r0 = smp_load_acquire(y);
+20 r1 = READ_ONCE(*x);
+21 }
+22
+23 exists (1:r0=1 /\ 1:r1=42)
+
+Lines 3-6 now initialize both "x" and "y" to the value 42. This also
+means that the "exists" clause on line 23 must change "1:r1=0" to
+"1:r1=42".
+
+Running the test gives the same overall result as before, but with the
+value 42 appearing in place of the value zero:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=1; 1:r1=1;
+ 4 1:r0=42; 1:r1=1;
+ 5 1:r0=42; 1:r1=42;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=42)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.02
+12 Hash=ab9a9b7940a75a792266be279a980156
+
+It is tempting to avoid the open-coded repetitions of the value "42"
+by defining another global variable "initval=42" and replacing all
+occurrences of "42" with "initval". This will not, repeat *not*,
+initialize "x" and "y" to 42, but instead to the address of "initval"
+(try it!). See the section below on linked lists to learn more about
+why this approach to initialization can be useful.
+
+
+Control Structures
+------------------
+
+LKMM supports the C-language "if" statement, which allows modeling of
+conditional branches. In LKMM, conditional branches can affect ordering,
+but only if you are *very* careful (compilers are surprisingly able
+to optimize away conditional branches). The following example shows
+the "load buffering" (LB) use case that is used in the Linux kernel to
+synchronize between ring-buffer producers and consumers. In the example
+below, P0() is one side checking to see if an operation may proceed and
+P1() is the other side completing its update.
+
+ 1 C LB+fencembonceonce+ctrlonceonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r0;
+ 8
+ 9 r0 = READ_ONCE(*x);
+10 if (r0)
+11 WRITE_ONCE(*y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16 int r0;
+17
+18 r0 = READ_ONCE(*y);
+19 smp_mb();
+20 WRITE_ONCE(*x, 1);
+21 }
+22
+23 exists (0:r0=1 /\ 1:r0=1)
+
+P1()'s "if" statement on line 10 works as expected, so that line 11 is
+executed only if line 9 loads a non-zero value from "x". Because P1()'s
+write of "1" to "x" happens only after P1()'s read from "y", one would
+hope that the "exists" clause cannot be satisfied. LKMM agrees:
+
+ 1 Test LB+fencembonceonce+ctrlonceonce Allowed
+ 2 States 2
+ 3 0:r0=0; 1:r0=0;
+ 4 0:r0=1; 1:r0=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r0=1 /\ 1:r0=1)
+ 9 Observation LB+fencembonceonce+ctrlonceonce Never 0 2
+10 Time LB+fencembonceonce+ctrlonceonce 0.00
+11 Hash=e5260556f6de495fd39b556d1b831c3b
+
+However, there is no "while" statement due to the fact that full
+state-space search has some difficulty with iteration. However, there
+are tricks that may be used to handle some special cases, which are
+discussed below. In addition, loop-unrolling tricks may be applied,
+albeit sparingly.
+
+
+Tricks and Traps
+================
+
+This section covers extracting debug output from herd7, emulating
+spin loops, handling trivial linked lists, adding comments to litmus tests,
+emulating call_rcu(), and finally tricks to improve herd7 performance
+in order to better handle large litmus tests.
+
+
+Debug Output
+------------
+
+By default, the herd7 state output includes all variables mentioned
+in the "exists" clause. But sometimes debugging efforts are greatly
+aided by the values of other variables. Consider this litmus test
+(tools/memory-order/litmus-tests/SB+rfionceonce-poonceonces.litmus but
+slightly modified), which probes an obscure corner of hardware memory
+ordering:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r1;
+ 8 int r2;
+ 9
+10 WRITE_ONCE(*x, 1);
+11 r1 = READ_ONCE(*x);
+12 r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17 int r3;
+18 int r4;
+19
+20 WRITE_ONCE(*y, 1);
+21 r3 = READ_ONCE(*y);
+22 r4 = READ_ONCE(*x);
+23 }
+24
+25 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output is as follows:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r2=0; 1:r4=0;
+ 4 0:r2=0; 1:r4=1;
+ 5 0:r2=1; 1:r4=0;
+ 6 0:r2=1; 1:r4=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=c7f30fe0faebb7d565405d55b7318ada
+
+(This output indicates that CPUs are permitted to "snoop their own
+store buffers", which all of Linux's CPU families other than s390 will
+happily do. Such snooping results in disagreement among CPUs on the
+order of stores from different CPUs, which is rarely an issue.)
+
+But the herd7 output shows only the two variables mentioned in the
+"exists" clause. Someone modifying this test might wish to know the
+values of "x", "y", "0:r1", and "0:r3" as well. The "locations"
+statement on line 25 shows how to cause herd7 to display additional
+variables:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r1;
+ 8 int r2;
+ 9
+10 WRITE_ONCE(*x, 1);
+11 r1 = READ_ONCE(*x);
+12 r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17 int r3;
+18 int r4;
+19
+20 WRITE_ONCE(*y, 1);
+21 r3 = READ_ONCE(*y);
+22 r4 = READ_ONCE(*x);
+23 }
+24
+25 locations [0:r1; 1:r3; x; y]
+26 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output then displays the values of all the variables:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=0; x=1; y=1;
+ 4 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=1; x=1; y=1;
+ 5 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=0; x=1; y=1;
+ 6 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=1; x=1; y=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=40de8418c4b395388f6501cafd1ed38d
+
+What if you would like to know the value of a particular global variable
+at some particular point in a given process's execution? One approach
+is to use a READ_ONCE() to load that global variable into a new local
+variable, then add that local variable to the "locations" clause.
+But be careful: In some litmus tests, adding a READ_ONCE() will change
+the outcome! For one example, please see the C-READ_ONCE.litmus and
+C-READ_ONCE-omitted.litmus tests located here:
+
+ https://github.com/paulmckrcu/litmus/blob/master/manual/kernel/
+
+
+Spin Loops
+----------
+
+The analysis carried out by herd7 explores full state space, which is
+at best of exponential time complexity. Adding processes and increasing
+the amount of code in a give process can greatly increase execution time.
+Potentially infinite loops, such as those used to wait for locks to
+become available, are clearly problematic.
+
+Fortunately, it is possible to avoid state-space explosion by specially
+modeling such loops. For example, the following litmus tests emulates
+locking using xchg_acquire(), but instead of enclosing xchg_acquire()
+in a spin loop, it instead excludes executions that fail to acquire the
+lock using a herd7 "filter" clause. Note that for exclusive locking, you
+are better off using the spin_lock() and spin_unlock() that LKMM directly
+models, if for no other reason that these are much faster. However, the
+techniques illustrated in this section can be used for other purposes,
+such as emulating reader-writer locking, which LKMM does not yet model.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4 }
+ 5
+ 6 P0(int *sl, int *x0, int *x1)
+ 7 {
+ 8 int r2;
+ 9 int r1;
+10
+11 r2 = xchg_acquire(sl, 1);
+12 WRITE_ONCE(*x0, 1);
+13 r1 = READ_ONCE(*x1);
+14 smp_store_release(sl, 0);
+15 }
+16
+17 P1(int *sl, int *x0, int *x1)
+18 {
+19 int r2;
+20 int r1;
+21
+22 r2 = xchg_acquire(sl, 1);
+23 WRITE_ONCE(*x1, 1);
+24 r1 = READ_ONCE(*x0);
+25 smp_store_release(sl, 0);
+26 }
+27
+28 filter (0:r2=0 /\ 1:r2=0)
+29 exists (0:r1=0 /\ 1:r1=0)
+
+This litmus test may be found here:
+
+https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd/C-SB+l-o-o-u+l-o-o-u-X.litmus
+
+This test uses two global variables, "x1" and "x2", and also emulates a
+single global spinlock named "sl". This spinlock is held by whichever
+process changes the value of "sl" from "0" to "1", and is released when
+that process sets "sl" back to "0". P0()'s lock acquisition is emulated
+on line 11 using xchg_acquire(), which unconditionally stores the value
+"1" to "sl" and stores either "0" or "1" to "r2", depending on whether
+the lock acquisition was successful or unsuccessful (due to "sl" already
+having the value "1"), respectively. P1() operates in a similar manner.
+
+Rather unconventionally, execution appears to proceed to the critical
+section on lines 12 and 13 in either case. Line 14 then uses an
+smp_store_release() to store zero to "sl", thus emulating lock release.
+
+The case where xchg_acquire() fails to acquire the lock is handled by
+the "filter" clause on line 28, which tells herd7 to keep only those
+executions in which both "0:r2" and "1:r2" are zero, that is to pay
+attention only to those executions in which both locks are actually
+acquired. Thus, the bogus executions that would execute the critical
+sections are discarded and any effects that they might have had are
+ignored. Note well that the "filter" clause keeps those executions
+for which its expression is satisfied, that is, for which the expression
+evaluates to true. In other words, the "filter" clause says what to
+keep, not what to discard.
+
+The result of running this test is as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 2
+ 3 0:r1=0; 1:r1=1;
+ 4 0:r1=1; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r1=0 /\ 1:r1=0)
+ 9 Observation C-SB+l-o-o-u+l-o-o-u-X Never 0 2
+10 Time C-SB+l-o-o-u+l-o-o-u-X 0.03
+
+The "Never" on line 9 indicates that this use of xchg_acquire() and
+smp_store_release() really does correctly emulate locking.
+
+Why doesn't the litmus test take the simpler approach of using a spin loop
+to handle failed spinlock acquisitions, like the kernel does? The key
+insight behind this litmus test is that spin loops have no effect on the
+possible "exists"-clause outcomes of program execution in the absence
+of deadlock. In other words, given a high-quality lock-acquisition
+primitive in a deadlock-free program running on high-quality hardware,
+each lock acquisition will eventually succeed. Because herd7 already
+explores the full state space, the length of time required to actually
+acquire the lock does not matter. After all, herd7 already models all
+possible durations of the xchg_acquire() statements.
+
+Why not just add the "filter" clause to the "exists" clause, thus
+avoiding the "filter" clause entirely? This does work, but is slower.
+The reason that the "filter" clause is faster is that (in the common case)
+herd7 knows to abandon an execution as soon as the "filter" expression
+fails to be satisfied. In contrast, the "exists" clause is evaluated
+only at the end of time, thus requiring herd7 to waste time on bogus
+executions in which both critical sections proceed concurrently. In
+addition, some LKMM users like the separation of concerns provided by
+using the both the "filter" and "exists" clauses.
+
+Readers lacking a pathological interest in odd corner cases should feel
+free to skip the remainder of this section.
+
+But what if the litmus test were to temporarily set "0:r2" to a non-zero
+value? Wouldn't that cause herd7 to abandon the execution prematurely
+due to an early mismatch of the "filter" clause?
+
+Why not just try it? Line 4 of the following modified litmus test
+introduces a new global variable "x2" that is initialized to "1". Line 23
+of P1() reads that variable into "1:r2" to force an early mismatch with
+the "filter" clause. Line 24 does a known-true "if" condition to avoid
+and static analysis that herd7 might do. Finally the "exists" clause
+on line 32 is updated to a condition that is alway satisfied at the end
+of the test.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4 x2=1;
+ 5 }
+ 6
+ 7 P0(int *sl, int *x0, int *x1)
+ 8 {
+ 9 int r2;
+10 int r1;
+11
+12 r2 = xchg_acquire(sl, 1);
+13 WRITE_ONCE(*x0, 1);
+14 r1 = READ_ONCE(*x1);
+15 smp_store_release(sl, 0);
+16 }
+17
+18 P1(int *sl, int *x0, int *x1, int *x2)
+19 {
+20 int r2;
+21 int r1;
+22
+23 r2 = READ_ONCE(*x2);
+24 if (r2)
+25 r2 = xchg_acquire(sl, 1);
+26 WRITE_ONCE(*x1, 1);
+27 r1 = READ_ONCE(*x0);
+28 smp_store_release(sl, 0);
+29 }
+30
+31 filter (0:r2=0 /\ 1:r2=0)
+32 exists (x1=1)
+
+If the "filter" clause were to check each variable at each point in the
+execution, running this litmus test would display no executions because
+all executions would be filtered out at line 23. However, the output
+is instead as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 1
+ 3 x1=1;
+ 4 Ok
+ 5 Witnesses
+ 6 Positive: 2 Negative: 0
+ 7 Condition exists (x1=1)
+ 8 Observation C-SB+l-o-o-u+l-o-o-u-X Always 2 0
+ 9 Time C-SB+l-o-o-u+l-o-o-u-X 0.04
+10 Hash=080bc508da7f291e122c6de76c0088e3
+
+Line 3 shows that there is one execution that did not get filtered out,
+so the "filter" clause is evaluated only on the last assignment to
+the variables that it checks. In this case, the "filter" clause is a
+disjunction, so it might be evaluated twice, once at the final (and only)
+assignment to "0:r2" and once at the final assignment to "1:r2".
+
+
+Linked Lists
+------------
+
+LKMM can handle linked lists, but only linked lists in which each node
+contains nothing except a pointer to the next node in the list. This is
+of course quite restrictive, but there is nevertheless quite a bit that
+can be done within these confines, as can be seen in the litmus test
+at tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus:
+
+ 1 C MP+onceassign+derefonce
+ 2
+ 3 {
+ 4 y=z;
+ 5 z=0;
+ 6 }
+ 7
+ 8 P0(int *x, int **y)
+ 9 {
+10 WRITE_ONCE(*x, 1);
+11 rcu_assign_pointer(*y, x);
+12 }
+13
+14 P1(int *x, int **y)
+15 {
+16 int *r0;
+17 int r1;
+18
+19 rcu_read_lock();
+20 r0 = rcu_dereference(*y);
+21 r1 = READ_ONCE(*r0);
+22 rcu_read_unlock();
+23 }
+24
+25 exists (1:r0=x /\ 1:r1=0)
+
+Line 4's "y=z" may seem odd, given that "z" has not yet been initialized.
+But "y=z" does not set the value of "y" to that of "z", but instead
+sets the value of "y" to the *address* of "z". Lines 4 and 5 therefore
+create a simple linked list, with "y" pointing to "z" and "z" having a
+NULL pointer. A much longer linked list could be created if desired,
+and circular singly linked lists can also be created and manipulated.
+
+The "exists" clause works the same way, with the "1:r0=x" comparing P1()'s
+"r0" not to the value of "x", but again to its address. This term of the
+"exists" clause therefore tests whether line 20's load from "y" saw the
+value stored by line 11, which is in fact what is required in this case.
+
+P0()'s line 10 initializes "x" to the value 1 then line 11 links to "x"
+from "y", replacing "z".
+
+P1()'s line 20 loads a pointer from "y", and line 21 dereferences that
+pointer. The RCU read-side critical section spanning lines 19-22 is just
+for show in this example. Note that the address used for line 21's load
+depends on (in this case, "is exactly the same as") the value loaded by
+line 20. This is an example of what is called an "address dependency".
+This particular address dependency extends from the load on line 20 to the
+load on line 21. Address dependencies provide a weak form of ordering.
+
+Running this test results in the following:
+
+ 1 Test MP+onceassign+derefonce Allowed
+ 2 States 2
+ 3 1:r0=x; 1:r1=1;
+ 4 1:r0=z; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (1:r0=x /\ 1:r1=0)
+ 9 Observation MP+onceassign+derefonce Never 0 2
+10 Time MP+onceassign+derefonce 0.00
+11 Hash=49ef7a741563570102448a256a0c8568
+
+The only possible outcomes feature P1() loading a pointer to "z"
+(which contains zero) on the one hand and P1() loading a pointer to "x"
+(which contains the value one) on the other. This should be reassuring
+because it says that RCU readers cannot see the old preinitialization
+values when accessing a newly inserted list node. This undesirable
+scenario is flagged by the "exists" clause, and would occur if P1()
+loaded a pointer to "x", but obtained the pre-initialization value of
+zero after dereferencing that pointer.
+
+
+Comments
+--------
+
+Different portions of a litmus test are processed by different parsers,
+which has the charming effect of requiring different comment syntax in
+different portions of the litmus test. The C-syntax portions use
+C-language comments (either "/* */" or "//"), while the other portions
+use Ocaml comments "(* *)".
+
+The following litmus test illustrates the comment style corresponding
+to each syntactic unit of the test:
+
+ 1 C MP+onceassign+derefonce (* A *)
+ 2
+ 3 (* B *)
+ 4
+ 5 {
+ 6 y=z; (* C *)
+ 7 z=0;
+ 8 } // D
+ 9
+10 // E
+11
+12 P0(int *x, int **y) // F
+13 {
+14 WRITE_ONCE(*x, 1); // G
+15 rcu_assign_pointer(*y, x);
+16 }
+17
+18 // H
+19
+20 P1(int *x, int **y)
+21 {
+22 int *r0;
+23 int r1;
+24
+25 rcu_read_lock();
+26 r0 = rcu_dereference(*y);
+27 r1 = READ_ONCE(*r0);
+28 rcu_read_unlock();
+29 }
+30
+31 // I
+32
+33 exists (* J *) (1:r0=x /\ (* K *) 1:r1=0) (* L *)
+
+In short, use C-language comments in the C code and Ocaml comments in
+the rest of the litmus test.
+
+On the other hand, if you prefer C-style comments everywhere, the
+C preprocessor is your friend.
+
+
+Asynchronous RCU Grace Periods
+------------------------------
+
+The following litmus test is derived from the example show in
+Documentation/litmus-tests/rcu/RCU+sync+free.litmus, but converted to
+emulate call_rcu():
+
+ 1 C RCU+sync+free
+ 2
+ 3 {
+ 4 int x = 1;
+ 5 int *y = &x;
+ 6 int z = 1;
+ 7 }
+ 8
+ 9 P0(int *x, int *z, int **y)
+10 {
+11 int *r0;
+12 int r1;
+13
+14 rcu_read_lock();
+15 r0 = rcu_dereference(*y);
+16 r1 = READ_ONCE(*r0);
+17 rcu_read_unlock();
+18 }
+19
+20 P1(int *z, int **y, int *c)
+21 {
+22 rcu_assign_pointer(*y, z);
+23 smp_store_release(*c, 1); // Emulate call_rcu().
+24 }
+25
+26 P2(int *x, int *z, int **y, int *c)
+27 {
+28 int r0;
+29
+30 r0 = smp_load_acquire(*c); // Note call_rcu() request.
+31 synchronize_rcu(); // Wait one grace period.
+32 WRITE_ONCE(*x, 0); // Emulate the RCU callback.
+33 }
+34
+35 filter (2:r0=1) (* Reject too-early starts. *)
+36 exists (0:r0=x /\ 0:r1=0)
+
+Lines 4-6 initialize a linked list headed by "y" that initially contains
+"x". In addition, "z" is pre-initialized to prepare for P1(), which
+will replace "x" with "z" in this list.
+
+P0() on lines 9-18 enters an RCU read-side critical section, loads the
+list header "y" and dereferences it, leaving the node in "0:r0" and
+the node's value in "0:r1".
+
+P1() on lines 20-24 updates the list header to instead reference "z",
+then emulates call_rcu() by doing a release store into "c".
+
+P2() on lines 27-33 emulates the behind-the-scenes effect of doing a
+call_rcu(). Line 30 first does an acquire load from "c", then line 31
+waits for an RCU grace period to elapse, and finally line 32 emulates
+the RCU callback, which in turn emulates a call to kfree().
+
+Of course, it is possible for P2() to start too soon, so that the
+value of "2:r0" is zero rather than the required value of "1".
+The "filter" clause on line 35 handles this possibility, rejecting
+all executions in which "2:r0" is not equal to the value "1".
+
+
+Performance
+-----------
+
+LKMM's exploration of the full state-space can be extremely helpful,
+but it does not come for free. The price is exponential computational
+complexity in terms of the number of processes, the average number
+of statements in each process, and the total number of stores in the
+litmus test.
+
+So it is best to start small and then work up. Where possible, break
+your code down into small pieces each representing a core concurrency
+requirement.
+
+That said, herd7 is quite fast. On an unprepossessing x86 laptop, it
+was able to analyze the following 10-process RCU litmus test in about
+six seconds.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R.litmus
+
+One way to make herd7 run faster is to use the "-speedcheck true" option.
+This option prevents herd7 from generating all possible end states,
+instead causing it to focus solely on whether or not the "exists"
+clause can be satisfied. With this option, herd7 evaluates the above
+litmus test in about 300 milliseconds, for more than an order of magnitude
+improvement in performance.
+
+Larger 16-process litmus tests that would normally consume 15 minutes
+of time complete in about 40 seconds with this option. To be fair,
+you do get an extra 65,535 states when you leave off the "-speedcheck
+true" option.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R.litmus
+
+Nevertheless, litmus-test analysis really is of exponential complexity,
+whether with or without "-speedcheck true". Increasing by just three
+processes to a 19-process litmus test requires 2 hours and 40 minutes
+without, and about 8 minutes with "-speedcheck true". Each of these
+results represent roughly an order of magnitude slowdown compared to the
+16-process litmus test. Again, to be fair, the multi-hour run explores
+no fewer than 524,287 additional states compared to the shorter one.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R.litmus
+
+If you don't like command-line arguments, you can obtain a similar speedup
+by adding a "filter" clause with exactly the same expression as your
+"exists" clause.
+
+However, please note that seeing the full set of states can be extremely
+helpful when developing and debugging litmus tests.
+
+
+LIMITATIONS
+===========
+
+Limitations of the Linux-kernel memory model (LKMM) include:
+
+1. Compiler optimizations are not accurately modeled. Of course,
+ the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
+ ability to optimize, but under some circumstances it is possible
+ for the compiler to undermine the memory model. For more
+ information, see Documentation/explanation.txt (in particular,
+ the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
+ sections).
+
+ Note that this limitation in turn limits LKMM's ability to
+ accurately model address, control, and data dependencies.
+ For example, if the compiler can deduce the value of some variable
+ carrying a dependency, then the compiler can break that dependency
+ by substituting a constant of that value.
+
+2. Multiple access sizes for a single variable are not supported,
+ and neither are misaligned or partially overlapping accesses.
+
+3. Exceptions and interrupts are not modeled. In some cases,
+ this limitation can be overcome by modeling the interrupt or
+ exception with an additional process.
+
+4. I/O such as MMIO or DMA is not supported.
+
+5. Self-modifying code (such as that found in the kernel's
+ alternatives mechanism, function tracer, Berkeley Packet Filter
+ JIT compiler, and module loader) is not supported.
+
+6. Complete modeling of all variants of atomic read-modify-write
+ operations, locking primitives, and RCU is not provided.
+ For example, call_rcu() and rcu_barrier() are not supported.
+ However, a substantial amount of support is provided for these
+ operations, as shown in the linux-kernel.def file.
+
+ Here are specific limitations:
+
+ a. When rcu_assign_pointer() is passed NULL, the Linux
+ kernel provides no ordering, but LKMM models this
+ case as a store release.
+
+ b. The "unless" RMW operations are not currently modeled:
+ atomic_long_add_unless(), atomic_inc_unless_negative(),
+ and atomic_dec_unless_positive(). These can be emulated
+ in litmus tests, for example, by using atomic_cmpxchg().
+
+ One exception of this limitation is atomic_add_unless(),
+ which is provided directly by herd7 (so no corresponding
+ definition in linux-kernel.def). atomic_add_unless() is
+ modeled by herd7 therefore it can be used in litmus tests.
+
+ c. The call_rcu() function is not modeled. As was shown above,
+ it can be emulated in litmus tests by adding another
+ process that invokes synchronize_rcu() and the body of the
+ callback function, with (for example) a release-acquire
+ from the site of the emulated call_rcu() to the beginning
+ of the additional process.
+
+ d. The rcu_barrier() function is not modeled. It can be
+ emulated in litmus tests emulating call_rcu() via
+ (for example) a release-acquire from the end of each
+ additional call_rcu() process to the site of the
+ emulated rcu-barrier().
+
+ e. Although sleepable RCU (SRCU) is now modeled, there
+ are some subtle differences between its semantics and
+ those in the Linux kernel. For example, the kernel
+ might interpret the following sequence as two partially
+ overlapping SRCU read-side critical sections:
+
+ 1 r1 = srcu_read_lock(&my_srcu);
+ 2 do_something_1();
+ 3 r2 = srcu_read_lock(&my_srcu);
+ 4 do_something_2();
+ 5 srcu_read_unlock(&my_srcu, r1);
+ 6 do_something_3();
+ 7 srcu_read_unlock(&my_srcu, r2);
+
+ In contrast, LKMM will interpret this as a nested pair of
+ SRCU read-side critical sections, with the outer critical
+ section spanning lines 1-7 and the inner critical section
+ spanning lines 3-5.
+
+ This difference would be more of a concern had anyone
+ identified a reasonable use case for partially overlapping
+ SRCU read-side critical sections. For more information
+ on the trickiness of such overlapping, please see:
+ https://paulmck.livejournal.com/40593.html
+
+ f. Reader-writer locking is not modeled. It can be
+ emulated in litmus tests using atomic read-modify-write
+ operations.
+
+The fragment of the C language supported by these litmus tests is quite
+limited and in some ways non-standard:
+
+1. There is no automatic C-preprocessor pass. You can of course
+ run it manually, if you choose.
+
+2. There is no way to create functions other than the Pn() functions
+ that model the concurrent processes.
+
+3. The Pn() functions' formal parameters must be pointers to the
+ global shared variables. Nothing can be passed by value into
+ these functions.
+
+4. The only functions that can be invoked are those built directly
+ into herd7 or that are defined in the linux-kernel.def file.
+
+5. The "switch", "do", "for", "while", and "goto" C statements are
+ not supported. The "switch" statement can be emulated by the
+ "if" statement. The "do", "for", and "while" statements can
+ often be emulated by manually unrolling the loop, or perhaps by
+ enlisting the aid of the C preprocessor to minimize the resulting
+ code duplication. Some uses of "goto" can be emulated by "if",
+ and some others by unrolling.
+
+6. Although you can use a wide variety of types in litmus-test
+ variable declarations, and especially in global-variable
+ declarations, the "herd7" tool understands only int and
+ pointer types. There is no support for floating-point types,
+ enumerations, characters, strings, arrays, or structures.
+
+7. Parsing of variable declarations is very loose, with almost no
+ type checking.
+
+8. Initializers differ from their C-language counterparts.
+ For example, when an initializer contains the name of a shared
+ variable, that name denotes a pointer to that variable, not
+ the current value of that variable. For example, "int x = y"
+ is interpreted the way "int x = &y" would be in C.
+
+9. Dynamic memory allocation is not supported, although this can
+ be worked around in some cases by supplying multiple statically
+ allocated variables.
+
+Some of these limitations may be overcome in the future, but others are
+more likely to be addressed by incorporating the Linux-kernel memory model
+into other tools.
+
+Finally, please note that LKMM is subject to change as hardware, use cases,
+and compilers evolve.
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index 7fe8d7aa3029..03f58b11c252 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -1,7 +1,7 @@
This document provides "recipes", that is, litmus tests for commonly
occurring situations, as well as a few that illustrate subtly broken but
attractive nuisances. Many of these recipes include example code from
-v4.13 of the Linux kernel.
+v5.7 of the Linux kernel.
The first section covers simple special cases, the second section
takes off the training wheels to cover more involved examples,
@@ -126,7 +126,7 @@ However, it is not necessarily the case that accesses ordered by
locking will be seen as ordered by CPUs not holding that lock.
Consider this example:
- /* See Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus. */
+ /* See Z6.0+pooncelock+pooncelock+pombonce.litmus. */
void CPU0(void)
{
spin_lock(&mylock);
@@ -278,7 +278,7 @@ is present if the value loaded determines the address of a later access
first place (control dependency). Note that the term "data dependency"
is sometimes casually used to cover both address and data dependencies.
-In lib/prime_numbers.c, the expand_to_next_prime() function invokes
+In lib/math/prime_numbers.c, the expand_to_next_prime() function invokes
rcu_assign_pointer(), and the next_prime_number() function invokes
rcu_dereference(). This combination mediates access to a bit vector
that is expanded as additional primes are needed.
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index b177f3e4a614..c5fdfd19df24 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -73,6 +73,18 @@ o Christopher Pulte, Shaked Flur, Will Deacon, Jon French,
Linux-kernel memory model
=========================
+o Jade Alglave, Will Deacon, Boqun Feng, David Howells, Daniel
+ Lustig, Luc Maranget, Paul E. McKenney, Andrea Parri, Nicholas
+ Piggin, Alan Stern, Akira Yokosawa, and Peter Zijlstra.
+ 2019. "Calibrating your fear of big bad optimizing compilers"
+ Linux Weekly News. https://lwn.net/Articles/799218/
+
+o Jade Alglave, Will Deacon, Boqun Feng, David Howells, Daniel
+ Lustig, Luc Maranget, Paul E. McKenney, Andrea Parri, Nicholas
+ Piggin, Alan Stern, Akira Yokosawa, and Peter Zijlstra.
+ 2019. "Who's afraid of a big bad optimizing compiler?"
+ Linux Weekly News. https://lwn.net/Articles/793253/
+
o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
Alan Stern. 2018. "Frightening small children and disconcerting
grown-ups: Concurrency in the Linux kernel". In Proceedings of
@@ -88,6 +100,11 @@ o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
Alan Stern. 2017. "A formal kernel memory-ordering model (part 2)"
Linux Weekly News. https://lwn.net/Articles/720550/
+o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
+ Alan Stern. 2017-2019. "A Formal Model of Linux-Kernel Memory
+ Ordering" (backup material for the LWN articles)
+ https://mirrors.edge.kernel.org/pub/linux/kernel/people/paulmck/LWNLinuxMM/
+
Memory-model tooling
====================
@@ -103,12 +120,12 @@ o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding
o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and
semantics of the weak consistency model specification language
- cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531
+ cat". CoRR abs/1608.07531 (2016). https://arxiv.org/abs/1608.07531
Memory-model comparisons
========================
o Paul E. McKenney, Ulrich Weigand, Andrea Parri, and Boqun
- Feng. 2016. "Linux-Kernel Memory Model". (6 June 2016).
- http://open-std.org/JTC1/SC22/WG21/docs/papers/2016/p0124r2.html.
+ Feng. 2018. "Linux-Kernel Memory Model". (27 September 2018).
+ http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0124r6.html.
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
new file mode 100644
index 000000000000..81e1a0ec5342
--- /dev/null
+++ b/tools/memory-model/Documentation/simple.txt
@@ -0,0 +1,271 @@
+This document provides options for those wishing to keep their
+memory-ordering lives simple, as is necessary for those whose domain
+is complex. After all, there are bugs other than memory-ordering bugs,
+and the time spent gaining memory-ordering knowledge is not available
+for gaining domain knowledge. Furthermore Linux-kernel memory model
+(LKMM) is quite complex, with subtle differences in code often having
+dramatic effects on correctness.
+
+The options near the beginning of this list are quite simple. The idea
+is not that kernel hackers don't already know about them, but rather
+that they might need the occasional reminder.
+
+Please note that this is a generic guide, and that specific subsystems
+will often have special requirements or idioms. For example, developers
+of MMIO-based device drivers will often need to use mb(), rmb(), and
+wmb(), and therefore might find smp_mb(), smp_rmb(), and smp_wmb()
+to be more natural than smp_load_acquire() and smp_store_release().
+On the other hand, those coming in from other environments will likely
+be more familiar with these last two.
+
+
+Single-threaded code
+====================
+
+In single-threaded code, there is no reordering, at least assuming
+that your toolchain and hardware are working correctly. In addition,
+it is generally a mistake to assume your code will only run in a single
+threaded context as the kernel can enter the same code path on multiple
+CPUs at the same time. One important exception is a function that makes
+no external data references.
+
+In the general case, you will need to take explicit steps to ensure that
+your code really is executed within a single thread that does not access
+shared variables. A simple way to achieve this is to define a global lock
+that you acquire at the beginning of your code and release at the end,
+taking care to ensure that all references to your code's shared data are
+also carried out under that same lock. Because only one thread can hold
+this lock at a given time, your code will be executed single-threaded.
+This approach is called "code locking".
+
+Code locking can severely limit both performance and scalability, so it
+should be used with caution, and only on code paths that execute rarely.
+After all, a huge amount of effort was required to remove the Linux
+kernel's old "Big Kernel Lock", so let's please be very careful about
+adding new "little kernel locks".
+
+One of the advantages of locking is that, in happy contrast with the
+year 1981, almost all kernel developers are very familiar with locking.
+The Linux kernel's lockdep (CONFIG_PROVE_LOCKING=y) is very helpful with
+the formerly feared deadlock scenarios.
+
+Please use the standard locking primitives provided by the kernel rather
+than rolling your own. For one thing, the standard primitives interact
+properly with lockdep. For another thing, these primitives have been
+tuned to deal better with high contention. And for one final thing, it is
+surprisingly hard to correctly code production-quality lock acquisition
+and release functions. After all, even simple non-production-quality
+locking functions must carefully prevent both the CPU and the compiler
+from moving code in either direction across the locking function.
+
+Despite the scalability limitations of single-threaded code, RCU
+takes this approach for much of its grace-period processing and also
+for early-boot operation. The reason RCU is able to scale despite
+single-threaded grace-period processing is use of batching, where all
+updates that accumulated during one grace period are handled by the
+next one. In other words, slowing down grace-period processing makes
+it more efficient. Nor is RCU unique: Similar batching optimizations
+are used in many I/O operations.
+
+
+Packaged code
+=============
+
+Even if performance and scalability concerns prevent your code from
+being completely single-threaded, it is often possible to use library
+functions that handle the concurrency nearly or entirely on their own.
+This approach delegates any LKMM worries to the library maintainer.
+
+In the kernel, what is the "library"? Quite a bit. It includes the
+contents of the lib/ directory, much of the include/linux/ directory along
+with a lot of other heavily used APIs. But heavily used examples include
+the list macros (for example, include/linux/{,rcu}list.h), workqueues,
+smp_call_function(), and the various hash tables and search trees.
+
+
+Data locking
+============
+
+With code locking, we use single-threaded code execution to guarantee
+serialized access to the data that the code is accessing. However,
+we can also achieve this by instead associating the lock with specific
+instances of the data structures. This creates a "critical section"
+in the code execution that will execute as though it is single threaded.
+By placing all the accesses and modifications to a shared data structure
+inside a critical section, we ensure that the execution context that
+holds the lock has exclusive access to the shared data.
+
+The poster boy for this approach is the hash table, where placing a lock
+in each hash bucket allows operations on different buckets to proceed
+concurrently. This works because the buckets do not overlap with each
+other, so that an operation on one bucket does not interfere with any
+other bucket.
+
+As the number of buckets increases, data locking scales naturally.
+In particular, if the amount of data increases with the number of CPUs,
+increasing the number of buckets as the number of CPUs increase results
+in a naturally scalable data structure.
+
+
+Per-CPU processing
+==================
+
+Partitioning processing and data over CPUs allows each CPU to take
+a single-threaded approach while providing excellent performance and
+scalability. Of course, there is no free lunch: The dark side of this
+excellence is substantially increased memory footprint.
+
+In addition, it is sometimes necessary to occasionally update some global
+view of this processing and data, in which case something like locking
+must be used to protect this global view. This is the approach taken
+by the percpu_counter infrastructure. In many cases, there are already
+generic/library variants of commonly used per-cpu constructs available.
+Please use them rather than rolling your own.
+
+RCU uses DEFINE_PER_CPU*() declaration to create a number of per-CPU
+data sets. For example, each CPU does private quiescent-state processing
+within its instance of the per-CPU rcu_data structure, and then uses data
+locking to report quiescent states up the grace-period combining tree.
+
+
+Packaged primitives: Sequence locking
+=====================================
+
+Lockless programming is considered by many to be more difficult than
+lock-based programming, but there are a few lockless design patterns that
+have been built out into an API. One of these APIs is sequence locking.
+Although this APIs can be used in extremely complex ways, there are simple
+and effective ways of using it that avoid the need to pay attention to
+memory ordering.
+
+The basic keep-things-simple rule for sequence locking is "do not write
+in read-side code". Yes, you can do writes from within sequence-locking
+readers, but it won't be so simple. For example, such writes will be
+lockless and should be idempotent.
+
+For more sophisticated use cases, LKMM can guide you, including use
+cases involving combining sequence locking with other synchronization
+primitives. (LKMM does not yet know about sequence locking, so it is
+currently necessary to open-code it in your litmus tests.)
+
+Additional information may be found in include/linux/seqlock.h.
+
+Packaged primitives: RCU
+========================
+
+Another lockless design pattern that has been baked into an API
+is RCU. The Linux kernel makes sophisticated use of RCU, but the
+keep-things-simple rules for RCU are "do not write in read-side code"
+and "do not update anything that is visible to and accessed by readers",
+and "protect updates with locking".
+
+These rules are illustrated by the functions foo_update_a() and
+foo_get_a() shown in Documentation/RCU/whatisRCU.rst. Additional
+RCU usage patterns maybe found in Documentation/RCU and in the
+source code.
+
+
+Packaged primitives: Atomic operations
+======================================
+
+Back in the day, the Linux kernel had three types of atomic operations:
+
+1. Initialization and read-out, such as atomic_set() and atomic_read().
+
+2. Operations that did not return a value and provided no ordering,
+ such as atomic_inc() and atomic_dec().
+
+3. Operations that returned a value and provided full ordering, such as
+ atomic_add_return() and atomic_dec_and_test(). Note that some
+ value-returning operations provide full ordering only conditionally.
+ For example, cmpxchg() provides ordering only upon success.
+
+More recent kernels have operations that return a value but do not
+provide full ordering. These are flagged with either a _relaxed()
+suffix (providing no ordering), or an _acquire() or _release() suffix
+(providing limited ordering).
+
+Additional information may be found in these files:
+
+Documentation/atomic_t.txt
+Documentation/atomic_bitops.txt
+Documentation/core-api/atomic_ops.rst
+Documentation/core-api/refcount-vs-atomic.rst
+
+Reading code using these primitives is often also quite helpful.
+
+
+Lockless, fully ordered
+=======================
+
+When using locking, there often comes a time when it is necessary
+to access some variable or another without holding the data lock
+that serializes access to that variable.
+
+If you want to keep things simple, use the initialization and read-out
+operations from the previous section only when there are no racing
+accesses. Otherwise, use only fully ordered operations when accessing
+or modifying the variable. This approach guarantees that code prior
+to a given access to that variable will be seen by all CPUs has having
+happened before any code following any later access to that same variable.
+
+Please note that per-CPU functions are not atomic operations and
+hence they do not provide any ordering guarantees at all.
+
+If the lockless accesses are frequently executed reads that are used
+only for heuristics, or if they are frequently executed writes that
+are used only for statistics, please see the next section.
+
+
+Lockless statistics and heuristics
+==================================
+
+Unordered primitives such as atomic_read(), atomic_set(), READ_ONCE(), and
+WRITE_ONCE() can safely be used in some cases. These primitives provide
+no ordering, but they do prevent the compiler from carrying out a number
+of destructive optimizations (for which please see the next section).
+One example use for these primitives is statistics, such as per-CPU
+counters exemplified by the rt_cache_stat structure's routing-cache
+statistics counters. Another example use case is heuristics, such as
+the jiffies_till_first_fqs and jiffies_till_next_fqs kernel parameters
+controlling how often RCU scans for idle CPUs.
+
+But be careful. "Unordered" really does mean "unordered". It is all
+too easy to assume ordering, and this assumption must be avoided when
+using these primitives.
+
+
+Don't let the compiler trip you up
+==================================
+
+It can be quite tempting to use plain C-language accesses for lockless
+loads from and stores to shared variables. Although this is both
+possible and quite common in the Linux kernel, it does require a
+surprising amount of analysis, care, and knowledge about the compiler.
+Yes, some decades ago it was not unfair to consider a C compiler to be
+an assembler with added syntax and better portability, but the advent of
+sophisticated optimizing compilers mean that those days are long gone.
+Today's optimizing compilers can profoundly rewrite your code during the
+translation process, and have long been ready, willing, and able to do so.
+
+Therefore, if you really need to use C-language assignments instead of
+READ_ONCE(), WRITE_ONCE(), and so on, you will need to have a very good
+understanding of both the C standard and your compiler. Here are some
+introductory references and some tooling to start you on this noble quest:
+
+Who's afraid of a big bad optimizing compiler?
+ https://lwn.net/Articles/793253/
+Calibrating your fear of big bad optimizing compilers
+ https://lwn.net/Articles/799218/
+Concurrency bugs should fear the big bad data-race detector (part 1)
+ https://lwn.net/Articles/816850/
+Concurrency bugs should fear the big bad data-race detector (part 2)
+ https://lwn.net/Articles/816854/
+
+
+More complex use cases
+======================
+
+If the alternatives above do not do what you need, please look at the
+recipes-pairs.txt file to peel off the next layer of the memory-ordering
+onion.
diff --git a/tools/memory-model/README b/tools/memory-model/README
index fc07b52f2028..c8144d4aafa0 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -28,8 +28,34 @@ downloaded separately:
See "herdtools7/INSTALL.md" for installation instructions.
Note that although these tools usually provide backwards compatibility,
-this is not absolutely guaranteed. Therefore, if a later version does
-not work, please try using the exact version called out above.
+this is not absolutely guaranteed.
+
+For example, a future version of herd7 might not work with the model
+in this release. A compatible model will likely be made available in
+a later release of Linux kernel.
+
+If you absolutely need to run the model in this particular release,
+please try using the exact version called out above.
+
+klitmus7 is independent of the model provided here. It has its own
+dependency on a target kernel release where converted code is built
+and executed. Any change in kernel APIs essential to klitmus7 will
+necessitate an upgrade of klitmus7.
+
+If you find any compatibility issues in klitmus7, please inform the
+memory model maintainers.
+
+klitmus7 Compatibility Table
+----------------------------
+
+ ============ ==========
+ target Linux herdtools7
+ ------------ ----------
+ -- 4.18 7.48 --
+ 4.15 -- 4.19 7.49 --
+ 4.20 -- 5.5 7.54 --
+ 5.6 -- 7.56 --
+ ============ ==========
==================
@@ -37,10 +63,32 @@ BASIC USAGE: HERD7
==================
The memory model is used, in conjunction with "herd7", to exhaustively
-explore the state space of small litmus tests.
+explore the state space of small litmus tests. Documentation describing
+the format, features, capabilities and limitations of these litmus
+tests is available in tools/memory-model/Documentation/litmus-tests.txt.
+
+Example litmus tests may be found in the Linux-kernel source tree:
+
+ tools/memory-model/litmus-tests/
+ Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available here:
-For example, to run SB+fencembonceonces.litmus against the memory model:
+ https://github.com/paulmckrcu/litmus
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+Documentation describing litmus tests and now to use them may be found
+here:
+
+ tools/memory-model/Documentation/litmus-tests.txt
+
+The remainder of this section uses the SB+fencembonceonces.litmus test
+located in the tools/memory-model directory.
+
+To run SB+fencembonceonces.litmus against the memory model:
+
+ $ cd $LINUX_SOURCE_TREE/tools/memory-model
$ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus
Here is the corresponding output:
@@ -61,7 +109,11 @@ Here is the corresponding output:
The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
this litmus test's "exists" clause can not be satisfied.
-See "herd7 -help" or "herdtools7/doc/" for more information.
+See "herd7 -help" or "herdtools7/doc/" for more information on running the
+tool itself, but please be aware that this documentation is intended for
+people who work on the memory model itself, that is, people making changes
+to the tools/memory-model/linux-kernel.* files. It is not intended for
+people focusing on writing, understanding, and running LKMM litmus tests.
=====================
@@ -98,7 +150,11 @@ that during two million trials, the state specified in this litmus
test's "exists" clause was not reached.
And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/"
-for more information.
+for more information. And again, please be aware that this documentation
+is intended for people who work on the memory model itself, that is,
+people making changes to the tools/memory-model/linux-kernel.* files.
+It is not intended for people focusing on writing, understanding, and
+running LKMM litmus tests.
====================
@@ -111,12 +167,21 @@ Documentation/cheatsheet.txt
Documentation/explanation.txt
Describes the memory model in detail.
+Documentation/litmus-tests.txt
+ Describes the format, features, capabilities, and limitations
+ of the litmus tests that LKMM can evaluate.
+
Documentation/recipes.txt
Lists common memory-ordering patterns.
Documentation/references.txt
Provides background reading.
+Documentation/simple.txt
+ Starting point for someone new to Linux-kernel concurrency.
+ And also for those needing a reminder of the simpler approaches
+ to concurrency!
+
linux-kernel.bell
Categorizes the relevant instructions, including memory
references, memory barriers, atomic read-modify-write operations,
@@ -161,112 +226,3 @@ README
This file.
scripts Various scripts, see scripts/README.
-
-
-===========
-LIMITATIONS
-===========
-
-The Linux-kernel memory model (LKMM) has the following limitations:
-
-1. Compiler optimizations are not accurately modeled. Of course,
- the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
- ability to optimize, but under some circumstances it is possible
- for the compiler to undermine the memory model. For more
- information, see Documentation/explanation.txt (in particular,
- the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
- sections).
-
- Note that this limitation in turn limits LKMM's ability to
- accurately model address, control, and data dependencies.
- For example, if the compiler can deduce the value of some variable
- carrying a dependency, then the compiler can break that dependency
- by substituting a constant of that value.
-
-2. Multiple access sizes for a single variable are not supported,
- and neither are misaligned or partially overlapping accesses.
-
-3. Exceptions and interrupts are not modeled. In some cases,
- this limitation can be overcome by modeling the interrupt or
- exception with an additional process.
-
-4. I/O such as MMIO or DMA is not supported.
-
-5. Self-modifying code (such as that found in the kernel's
- alternatives mechanism, function tracer, Berkeley Packet Filter
- JIT compiler, and module loader) is not supported.
-
-6. Complete modeling of all variants of atomic read-modify-write
- operations, locking primitives, and RCU is not provided.
- For example, call_rcu() and rcu_barrier() are not supported.
- However, a substantial amount of support is provided for these
- operations, as shown in the linux-kernel.def file.
-
- a. When rcu_assign_pointer() is passed NULL, the Linux
- kernel provides no ordering, but LKMM models this
- case as a store release.
-
- b. The "unless" RMW operations are not currently modeled:
- atomic_long_add_unless(), atomic_add_unless(),
- atomic_inc_unless_negative(), and
- atomic_dec_unless_positive(). These can be emulated
- in litmus tests, for example, by using atomic_cmpxchg().
-
- c. The call_rcu() function is not modeled. It can be
- emulated in litmus tests by adding another process that
- invokes synchronize_rcu() and the body of the callback
- function, with (for example) a release-acquire from
- the site of the emulated call_rcu() to the beginning
- of the additional process.
-
- d. The rcu_barrier() function is not modeled. It can be
- emulated in litmus tests emulating call_rcu() via
- (for example) a release-acquire from the end of each
- additional call_rcu() process to the site of the
- emulated rcu-barrier().
-
- e. Although sleepable RCU (SRCU) is now modeled, there
- are some subtle differences between its semantics and
- those in the Linux kernel. For example, the kernel
- might interpret the following sequence as two partially
- overlapping SRCU read-side critical sections:
-
- 1 r1 = srcu_read_lock(&my_srcu);
- 2 do_something_1();
- 3 r2 = srcu_read_lock(&my_srcu);
- 4 do_something_2();
- 5 srcu_read_unlock(&my_srcu, r1);
- 6 do_something_3();
- 7 srcu_read_unlock(&my_srcu, r2);
-
- In contrast, LKMM will interpret this as a nested pair of
- SRCU read-side critical sections, with the outer critical
- section spanning lines 1-7 and the inner critical section
- spanning lines 3-5.
-
- This difference would be more of a concern had anyone
- identified a reasonable use case for partially overlapping
- SRCU read-side critical sections. For more information,
- please see: https://paulmck.livejournal.com/40593.html
-
- f. Reader-writer locking is not modeled. It can be
- emulated in litmus tests using atomic read-modify-write
- operations.
-
-The "herd7" tool has some additional limitations of its own, apart from
-the memory model:
-
-1. Non-trivial data structures such as arrays or structures are
- not supported. However, pointers are supported, allowing trivial
- linked lists to be constructed.
-
-2. Dynamic memory allocation is not supported, although this can
- be worked around in some cases by supplying multiple statically
- allocated variables.
-
-Some of these limitations may be overcome in the future, but others are
-more likely to be addressed by incorporating the Linux-kernel memory model
-into other tools.
-
-Finally, please note that LKMM is subject to change as hardware, use cases,
-and compilers evolve.
diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh
deleted file mode 100755
index 10ceee64a09a..000000000000
--- a/tools/nfsd/inject_fault.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
-#
-# Script for easier NFSD fault injection
-
-# Check that debugfs has been mounted
-DEBUGFS=`cat /proc/mounts | grep debugfs`
-if [ "$DEBUGFS" == "" ]; then
- echo "debugfs does not appear to be mounted!"
- echo "Please mount debugfs and try again"
- exit 1
-fi
-
-# Check that the fault injection directory exists
-DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd
-if [ ! -d "$DEBUGDIR" ]; then
- echo "$DEBUGDIR does not exist"
- echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION"
- exit 1
-fi
-
-function help()
-{
- echo "Usage $0 injection_type [count]"
- echo ""
- echo "Injection types are:"
- ls $DEBUGDIR
- exit 1
-}
-
-if [ $# == 0 ]; then
- help
-elif [ ! -f $DEBUGDIR/$1 ]; then
- help
-elif [ $# != 2 ]; then
- COUNT=0
-else
- COUNT=$2
-fi
-
-BEFORE=`mktemp`
-AFTER=`mktemp`
-dmesg > $BEFORE
-echo $COUNT > $DEBUGDIR/$1
-dmesg > $AFTER
-# Capture lines that only exist in the $AFTER file
-diff $BEFORE $AFTER | grep ">"
-rm -f $BEFORE $AFTER
diff --git a/tools/objtool/Build b/tools/objtool/Build
index 66f44f5cd2a6..b7222d5cc7bc 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -1,11 +1,16 @@
objtool-y += arch/$(SRCARCH)/
+
+objtool-y += weak.o
+
+objtool-$(SUBCMD_CHECK) += check.o
+objtool-$(SUBCMD_CHECK) += special.o
+objtool-$(SUBCMD_ORC) += check.o
+objtool-$(SUBCMD_ORC) += orc_gen.o
+objtool-$(SUBCMD_ORC) += orc_dump.o
+
objtool-y += builtin-check.o
objtool-y += builtin-orc.o
-objtool-y += check.o
-objtool-y += orc_gen.o
-objtool-y += orc_dump.o
objtool-y += elf.o
-objtool-y += special.o
objtool-y += objtool.o
objtool-y += libstring.o
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index de094670050b..0542e46c7552 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -289,6 +289,47 @@ they mean, and suggestions for how to fix them.
might be corrupt due to a gcc bug. For more details, see:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
+9. file.o: warning: objtool: funcA() call to funcB() with UACCESS enabled
+
+ This means that an unexpected call to a non-whitelisted function exists
+ outside of arch-specific guards.
+ X86: SMAP (stac/clac): __uaccess_begin()/__uaccess_end()
+ ARM: PAN: uaccess_enable()/uaccess_disable()
+
+ These functions should be called to denote a minimal critical section around
+ access to __user variables. See also: https://lwn.net/Articles/517475/
+
+ The intention of the warning is to prevent calls to funcB() from eventually
+ calling schedule(), potentially leaking the AC flags state, and not
+ restoring them correctly.
+
+ It also helps verify that there are no unexpected calls to funcB() which may
+ access user space pages with protections against doing so disabled.
+
+ To fix, either:
+ 1) remove explicit calls to funcB() from funcA().
+ 2) add the correct guards before and after calls to low level functions like
+ __get_user_size()/__put_user_size().
+ 3) add funcB to uaccess_safe_builtin whitelist in tools/objtool/check.c, if
+ funcB obviously does not call schedule(), and is marked notrace (since
+ function tracing inserts additional calls, which is not obvious from the
+ sources).
+
+10. file.o: warning: func()+0x5c: alternative modifies stack
+
+ This means that an alternative includes instructions that modify the
+ stack. The problem is that there is only one ORC unwind table, this means
+ that the ORC unwind entries must be valid for each of the alternatives.
+ The easiest way to enforce this is to ensure alternatives do not contain
+ any ORC entries, which in turn implies the above constraint.
+
+11. file.o: warning: unannotated intra-function call
+
+ This warning means that a direct call is done to a destination which
+ is not at the beginning of a function. If this is a legit call, you
+ can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
+ directive right before the call.
+
If the error doesn't seem to make sense, it could be a bug in objtool.
Feel free to ask the objtool maintainer for help.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index f591c4d1b6fe..4ea9a833dde7 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -35,8 +35,9 @@ all: $(OBJTOOL)
INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
- -I$(srctree)/tools/arch/$(SRCARCH)/include
-WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
+ -I$(srctree)/tools/arch/$(SRCARCH)/include \
+ -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
+WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
@@ -45,14 +46,28 @@ elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E -
CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
AWK = awk
+
+SUBCMD_CHECK := n
+SUBCMD_ORC := n
+
+ifeq ($(SRCARCH),x86)
+ SUBCMD_CHECK := y
+ SUBCMD_ORC := y
+endif
+
+ifeq ($(SUBCMD_ORC),y)
+ CFLAGS += -DINSN_USE_ORC
+endif
+
+export SUBCMD_CHECK SUBCMD_ORC
export srctree OUTPUT CFLAGS SRCARCH AWK
include $(srctree)/tools/build/Makefile.include
$(OBJTOOL_IN): fixdep FORCE
+ @$(CONFIG_SHELL) ./sync-check.sh
@$(MAKE) $(build)=objtool
$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
- @$(CONFIG_SHELL) ./sync-check.sh
$(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index ced3765c4f44..4a84c3081b8e 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -8,9 +8,13 @@
#include <stdbool.h>
#include <linux/list.h>
-#include "elf.h"
+#include "objtool.h"
#include "cfi.h"
+#ifdef INSN_USE_ORC
+#include <asm/orc_types.h>
+#endif
+
enum insn_type {
INSN_JUMP_CONDITIONAL,
INSN_JUMP_UNCONDITIONAL,
@@ -20,7 +24,6 @@ enum insn_type {
INSN_CALL_DYNAMIC,
INSN_RETURN,
INSN_CONTEXT_SWITCH,
- INSN_STACK,
INSN_BUG,
INSN_NOP,
INSN_STAC,
@@ -64,15 +67,27 @@ struct op_src {
struct stack_op {
struct op_dest dest;
struct op_src src;
+ struct list_head list;
};
-void arch_initial_func_cfi_state(struct cfi_state *state);
+struct instruction;
+
+void arch_initial_func_cfi_state(struct cfi_init_state *state);
-int arch_decode_instruction(struct elf *elf, struct section *sec,
+int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
- unsigned long *immediate, struct stack_op *op);
+ unsigned long *immediate,
+ struct list_head *ops_list);
bool arch_callee_saved_reg(unsigned char reg);
+unsigned long arch_jump_destination(struct instruction *insn);
+
+unsigned long arch_dest_reloc_offset(int addend);
+
+const char *arch_nop_insn(int len);
+
+int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
+
#endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
index 7c5004008e97..9f7869b5c5e0 100644
--- a/tools/objtool/arch/x86/Build
+++ b/tools/objtool/arch/x86/Build
@@ -1,3 +1,4 @@
+objtool-y += special.o
objtool-y += decode.o
inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index a62e032863a8..cde9c36e40ae 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -11,9 +11,11 @@
#include "../../../arch/x86/lib/inat.c"
#include "../../../arch/x86/lib/insn.c"
+#include "../../check.h"
#include "../../elf.h"
#include "../../arch.h"
#include "../../warn.h"
+#include <asm/orc_types.h>
static unsigned char op_to_cfi_reg[][2] = {
{CFI_AX, CFI_R8},
@@ -26,7 +28,7 @@ static unsigned char op_to_cfi_reg[][2] = {
{CFI_DI, CFI_R15},
};
-static int is_x86_64(struct elf *elf)
+static int is_x86_64(const struct elf *elf)
{
switch (elf->ehdr.e_machine) {
case EM_X86_64:
@@ -66,16 +68,34 @@ bool arch_callee_saved_reg(unsigned char reg)
}
}
-int arch_decode_instruction(struct elf *elf, struct section *sec,
+unsigned long arch_dest_reloc_offset(int addend)
+{
+ return addend + 4;
+}
+
+unsigned long arch_jump_destination(struct instruction *insn)
+{
+ return insn->offset + insn->len + insn->immediate;
+}
+
+#define ADD_OP(op) \
+ if (!(op = calloc(1, sizeof(*op)))) \
+ return -1; \
+ else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+
+int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
- unsigned long *immediate, struct stack_op *op)
+ unsigned long *immediate,
+ struct list_head *ops_list)
{
struct insn insn;
int x86_64, sign;
unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
modrm_reg = 0, sib = 0;
+ struct stack_op *op = NULL;
+ struct symbol *sym;
x86_64 = is_x86_64(elf);
if (x86_64 == -1)
@@ -85,7 +105,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
insn_get_length(&insn);
if (!insn_complete(&insn)) {
- WARN_FUNC("can't decode instruction", sec, offset);
+ WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
return -1;
}
@@ -123,40 +143,44 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
/* add/sub reg, %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
}
break;
case 0x50 ... 0x57:
/* push reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
- op->dest.type = OP_DEST_PUSH;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ op->dest.type = OP_DEST_PUSH;
+ }
break;
case 0x58 ... 0x5f:
/* pop reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_POP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ }
break;
case 0x68:
case 0x6a:
/* push immediate */
- *type = INSN_STACK;
- op->src.type = OP_SRC_CONST;
- op->dest.type = OP_DEST_PUSH;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
break;
case 0x70 ... 0x7f:
@@ -170,12 +194,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
if (modrm == 0xe4) {
/* and imm, %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_AND;
- op->src.reg = CFI_SP;
- op->src.offset = insn.immediate.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_AND;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.immediate.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
break;
}
@@ -187,34 +212,37 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
break;
/* add/sub imm, %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_SP;
- op->src.offset = insn.immediate.value * sign;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.immediate.value * sign;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
break;
case 0x89:
if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
/* mov %rsp, reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = CFI_SP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ }
break;
}
if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
/* mov reg, %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
break;
}
@@ -224,22 +252,24 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
(modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
/* mov reg, disp(%rbp) */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
- op->dest.type = OP_DEST_REG_INDIRECT;
- op->dest.reg = CFI_BP;
- op->dest.offset = insn.displacement.value;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = CFI_BP;
+ op->dest.offset = insn.displacement.value;
+ }
} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
/* mov reg, disp(%rsp) */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
- op->dest.type = OP_DEST_REG_INDIRECT;
- op->dest.reg = CFI_SP;
- op->dest.offset = insn.displacement.value;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = CFI_SP;
+ op->dest.offset = insn.displacement.value;
+ }
}
break;
@@ -248,23 +278,25 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
/* mov disp(%rbp), reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG_INDIRECT;
- op->src.reg = CFI_BP;
- op->src.offset = insn.displacement.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG_INDIRECT;
+ op->src.reg = CFI_BP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ }
} else if (rex_w && !rex_b && sib == 0x24 &&
modrm_mod != 3 && modrm_rm == 4) {
/* mov disp(%rsp), reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG_INDIRECT;
- op->src.reg = CFI_SP;
- op->src.offset = insn.displacement.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG_INDIRECT;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ }
}
break;
@@ -272,28 +304,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
case 0x8d:
if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
- *type = INSN_STACK;
- if (!insn.displacement.value) {
- /* lea (%rsp), reg */
- op->src.type = OP_SRC_REG;
- } else {
- /* lea disp(%rsp), reg */
- op->src.type = OP_SRC_ADD;
- op->src.offset = insn.displacement.value;
+ ADD_OP(op) {
+ if (!insn.displacement.value) {
+ /* lea (%rsp), reg */
+ op->src.type = OP_SRC_REG;
+ } else {
+ /* lea disp(%rsp), reg */
+ op->src.type = OP_SRC_ADD;
+ op->src.offset = insn.displacement.value;
+ }
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
}
- op->src.reg = CFI_SP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
} else if (rex == 0x48 && modrm == 0x65) {
/* lea disp(%rbp), %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_BP;
- op->src.offset = insn.displacement.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_BP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
} else if (rex == 0x49 && modrm == 0x62 &&
insn.displacement.value == -8) {
@@ -304,12 +338,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
* Restoring rsp back to its original value after a
* stack realignment.
*/
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_R10;
- op->src.offset = -8;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_R10;
+ op->src.offset = -8;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
} else if (rex == 0x49 && modrm == 0x65 &&
insn.displacement.value == -16) {
@@ -320,21 +355,23 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
* Restoring rsp back to its original value after a
* stack realignment.
*/
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_R13;
- op->src.offset = -16;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_R13;
+ op->src.offset = -16;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
}
break;
case 0x8f:
/* pop to mem */
- *type = INSN_STACK;
- op->src.type = OP_SRC_POP;
- op->dest.type = OP_DEST_MEM;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
+ }
break;
case 0x90:
@@ -343,16 +380,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
case 0x9c:
/* pushf */
- *type = INSN_STACK;
- op->src.type = OP_SRC_CONST;
- op->dest.type = OP_DEST_PUSHF;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSHF;
+ }
break;
case 0x9d:
/* popf */
- *type = INSN_STACK;
- op->src.type = OP_SRC_POPF;
- op->dest.type = OP_DEST_MEM;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POPF;
+ op->dest.type = OP_DEST_MEM;
+ }
break;
case 0x0f:
@@ -387,16 +426,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
} else if (op2 == 0xa0 || op2 == 0xa8) {
/* push fs/gs */
- *type = INSN_STACK;
- op->src.type = OP_SRC_CONST;
- op->dest.type = OP_DEST_PUSH;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
} else if (op2 == 0xa1 || op2 == 0xa9) {
/* pop fs/gs */
- *type = INSN_STACK;
- op->src.type = OP_SRC_POP;
- op->dest.type = OP_DEST_MEM;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
+ }
}
break;
@@ -409,8 +450,8 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
* mov bp, sp
* pop bp
*/
- *type = INSN_STACK;
- op->dest.type = OP_DEST_LEAVE;
+ ADD_OP(op)
+ op->dest.type = OP_DEST_LEAVE;
break;
@@ -429,14 +470,41 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
*type = INSN_RETURN;
break;
+ case 0xcf: /* iret */
+ /*
+ * Handle sync_core(), which has an IRET to self.
+ * All other IRET are in STT_NONE entry code.
+ */
+ sym = find_symbol_containing(sec, offset);
+ if (sym && sym->type == STT_FUNC) {
+ ADD_OP(op) {
+ /* add $40, %rsp */
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = 5*8;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
+ break;
+ }
+
+ /* fallthrough */
+
case 0xca: /* retf */
case 0xcb: /* retf */
- case 0xcf: /* iret */
*type = INSN_CONTEXT_SWITCH;
break;
case 0xe8:
*type = INSN_CALL;
+ /*
+ * For the impact on the stack, a CALL behaves like
+ * a PUSH of an immediate value (the return address).
+ */
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
break;
case 0xfc:
@@ -464,9 +532,10 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
else if (modrm_reg == 6) {
/* push from mem */
- *type = INSN_STACK;
- op->src.type = OP_SRC_CONST;
- op->dest.type = OP_DEST_PUSH;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
}
break;
@@ -480,7 +549,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
return 0;
}
-void arch_initial_func_cfi_state(struct cfi_state *state)
+void arch_initial_func_cfi_state(struct cfi_init_state *state)
{
int i;
@@ -497,3 +566,57 @@ void arch_initial_func_cfi_state(struct cfi_state *state)
state->regs[16].base = CFI_CFA;
state->regs[16].offset = -8;
}
+
+const char *arch_nop_insn(int len)
+{
+ static const char nops[5][5] = {
+ /* 1 */ { 0x90 },
+ /* 2 */ { 0x66, 0x90 },
+ /* 3 */ { 0x0f, 0x1f, 0x00 },
+ /* 4 */ { 0x0f, 0x1f, 0x40, 0x00 },
+ /* 5 */ { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+ };
+
+ if (len < 1 || len > 5) {
+ WARN("invalid NOP size: %d\n", len);
+ return NULL;
+ }
+
+ return nops[len-1];
+}
+
+int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
+{
+ struct cfi_reg *cfa = &insn->cfi.cfa;
+
+ switch (sp_reg) {
+ case ORC_REG_UNDEFINED:
+ cfa->base = CFI_UNDEFINED;
+ break;
+ case ORC_REG_SP:
+ cfa->base = CFI_SP;
+ break;
+ case ORC_REG_BP:
+ cfa->base = CFI_BP;
+ break;
+ case ORC_REG_SP_INDIRECT:
+ cfa->base = CFI_SP_INDIRECT;
+ break;
+ case ORC_REG_R10:
+ cfa->base = CFI_R10;
+ break;
+ case ORC_REG_R13:
+ cfa->base = CFI_R13;
+ break;
+ case ORC_REG_DI:
+ cfa->base = CFI_DI;
+ break;
+ case ORC_REG_DX:
+ cfa->base = CFI_DX;
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/tools/objtool/arch/x86/include/arch_elf.h b/tools/objtool/arch/x86/include/arch_elf.h
new file mode 100644
index 000000000000..69cc4264b28a
--- /dev/null
+++ b/tools/objtool/arch/x86/include/arch_elf.h
@@ -0,0 +1,6 @@
+#ifndef _OBJTOOL_ARCH_ELF
+#define _OBJTOOL_ARCH_ELF
+
+#define R_NONE R_X86_64_NONE
+
+#endif /* _OBJTOOL_ARCH_ELF */
diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch_special.h
new file mode 100644
index 000000000000..d818b2bffa02
--- /dev/null
+++ b/tools/objtool/arch/x86/include/arch_special.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _X86_ARCH_SPECIAL_H
+#define _X86_ARCH_SPECIAL_H
+
+#define EX_ENTRY_SIZE 12
+#define EX_ORIG_OFFSET 0
+#define EX_NEW_OFFSET 4
+
+#define JUMP_ENTRY_SIZE 16
+#define JUMP_ORIG_OFFSET 0
+#define JUMP_NEW_OFFSET 4
+
+#define ALT_ENTRY_SIZE 13
+#define ALT_ORIG_OFFSET 0
+#define ALT_NEW_OFFSET 4
+#define ALT_FEATURE_OFFSET 8
+#define ALT_ORIG_LEN_OFFSET 10
+#define ALT_NEW_LEN_OFFSET 11
+
+#endif /* _X86_ARCH_SPECIAL_H */
diff --git a/tools/objtool/arch/x86/include/cfi_regs.h b/tools/objtool/arch/x86/include/cfi_regs.h
new file mode 100644
index 000000000000..79bc517efba8
--- /dev/null
+++ b/tools/objtool/arch/x86/include/cfi_regs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _OBJTOOL_CFI_REGS_H
+#define _OBJTOOL_CFI_REGS_H
+
+#define CFI_AX 0
+#define CFI_DX 1
+#define CFI_CX 2
+#define CFI_BX 3
+#define CFI_SI 4
+#define CFI_DI 5
+#define CFI_BP 6
+#define CFI_SP 7
+#define CFI_R8 8
+#define CFI_R9 9
+#define CFI_R10 10
+#define CFI_R11 11
+#define CFI_R12 12
+#define CFI_R13 13
+#define CFI_R14 14
+#define CFI_R15 15
+#define CFI_RA 16
+#define CFI_NUM_REGS 17
+
+#endif /* _OBJTOOL_CFI_REGS_H */
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
new file mode 100644
index 000000000000..fd4af88c0ea5
--- /dev/null
+++ b/tools/objtool/arch/x86/special.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <string.h>
+
+#include "../../special.h"
+#include "../../builtin.h"
+
+#define X86_FEATURE_POPCNT (4 * 32 + 23)
+#define X86_FEATURE_SMAP (9 * 32 + 20)
+
+void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+{
+ switch (feature) {
+ case X86_FEATURE_SMAP:
+ /*
+ * If UACCESS validation is enabled; force that alternative;
+ * otherwise force it the other way.
+ *
+ * What we want to avoid is having both the original and the
+ * alternative code flow at the same time, in that case we can
+ * find paths that see the STAC but take the NOP instead of
+ * CLAC and the other way around.
+ */
+ if (uaccess)
+ alt->skip_orig = true;
+ else
+ alt->skip_alt = true;
+ break;
+ case X86_FEATURE_POPCNT:
+ /*
+ * It has been requested that we don't validate the !POPCNT
+ * feature path which is a "very very small percentage of
+ * machines".
+ */
+ alt->skip_orig = true;
+ break;
+ default:
+ break;
+ }
+}
+
+bool arch_support_alt_relocation(struct special_alt *special_alt,
+ struct instruction *insn,
+ struct reloc *reloc)
+{
+ /*
+ * The x86 alternatives code adjusts the offsets only when it
+ * encounters a branch instruction at the very beginning of the
+ * replacement group.
+ */
+ return insn->offset == special_alt->new_off &&
+ (insn->type == INSN_CALL || is_static_jump(insn));
+}
+
+/*
+ * There are 3 basic jump table patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ * This is the most common case by far. It jumps to an address in a simple
+ * jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ * This is caused by a rare GCC quirk, currently only seen in three driver
+ * functions in the kernel, only with certain obscure non-distro configs.
+ *
+ * As part of an optimization, GCC makes a copy of an existing switch jump
+ * table, modifies it, and then hard-codes the jump (albeit with an indirect
+ * jump) to use a single entry in the table. The rest of the jump table and
+ * some of its jump targets remain as dead code.
+ *
+ * In such a case we can just crudely ignore all unreachable instruction
+ * warnings for the entire object file. Ideally we would just ignore them
+ * for the function, but that would require redesigning the code quite a
+ * bit. And honestly that's just not worth doing: unreachable instruction
+ * warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ * ... some instructions ...
+ * jmpq *(%reg1,%reg2,8)
+ *
+ * This is a fairly uncommon pattern which is new for GCC 6. As of this
+ * writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ * As of GCC 7 there are quite a few more of these and the 'in between' code
+ * is significant. Esp. with KASAN enabled some of the code between the mov
+ * and jmpq uses .rodata itself, which can confuse things.
+ *
+ * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ * ensure the same register is used in the mov and jump instructions.
+ *
+ * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ */
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct reloc *text_reloc, *rodata_reloc;
+ struct section *table_sec;
+ unsigned long table_offset;
+
+ /* look for a relocation which references .rodata */
+ text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
+ !text_reloc->sym->sec->rodata)
+ return NULL;
+
+ table_offset = text_reloc->addend;
+ table_sec = text_reloc->sym->sec;
+
+ if (text_reloc->type == R_X86_64_PC32)
+ table_offset += 4;
+
+ /*
+ * Make sure the .rodata address isn't associated with a
+ * symbol. GCC jump tables are anonymous data.
+ *
+ * Also support C jump tables which are in the same format as
+ * switch jump tables. For objtool to recognize them, they
+ * need to be placed in the C_JUMP_TABLE_SECTION section. They
+ * have symbols associated with them.
+ */
+ if (find_symbol_containing(table_sec, table_offset) &&
+ strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
+ return NULL;
+
+ /*
+ * Each table entry has a rela associated with it. The rela
+ * should reference text in the same function as the original
+ * instruction.
+ */
+ rodata_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+ if (!rodata_reloc)
+ return NULL;
+
+ /*
+ * Use of RIP-relative switch jumps is quite rare, and
+ * indicates a rare GCC quirk/bug which can leave dead
+ * code behind.
+ */
+ if (text_reloc->type == R_X86_64_PC32)
+ file->ignore_unreachables = true;
+
+ return rodata_reloc;
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 10fbe75ab43d..c6d199bfd0ae 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -14,10 +14,11 @@
*/
#include <subcmd/parse-options.h>
+#include <string.h>
#include "builtin.h"
-#include "check.h"
+#include "objtool.h"
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
static const char * const check_usage[] = {
"objtool check [<options>] file.o",
@@ -32,12 +33,16 @@ const struct option check_options[] = {
OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
+ OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
+ OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
OPT_END(),
};
int cmd_check(int argc, const char **argv)
{
- const char *objname;
+ const char *objname, *s;
+ struct objtool_file *file;
+ int ret;
argc = parse_options(argc, argv, check_options, check_usage, 0);
@@ -46,5 +51,20 @@ int cmd_check(int argc, const char **argv)
objname = argv[0];
- return check(objname, false);
+ s = strstr(objname, "vmlinux.o");
+ if (s && !s[9])
+ vmlinux = true;
+
+ file = objtool_open_read(objname);
+ if (!file)
+ return 1;
+
+ ret = check(file);
+ if (ret)
+ return ret;
+
+ if (file->elf->changed)
+ return elf_write(file->elf);
+
+ return 0;
}
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 5f7cc6157edd..7b31121fa60b 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -14,8 +14,7 @@
#include <string.h>
#include "builtin.h"
-#include "check.h"
-
+#include "objtool.h"
static const char *orc_usage[] = {
"objtool orc generate [<options>] file.o",
@@ -32,13 +31,38 @@ int cmd_orc(int argc, const char **argv)
usage_with_options(orc_usage, check_options);
if (!strncmp(argv[0], "gen", 3)) {
+ struct objtool_file *file;
+ int ret;
+
argc = parse_options(argc, argv, check_options, orc_usage, 0);
if (argc != 1)
usage_with_options(orc_usage, check_options);
objname = argv[0];
- return check(objname, true);
+ file = objtool_open_read(objname);
+ if (!file)
+ return 1;
+
+ ret = check(file);
+ if (ret)
+ return ret;
+
+ if (list_empty(&file->insn_list))
+ return 0;
+
+ ret = create_orc(file);
+ if (ret)
+ return ret;
+
+ ret = create_orc_sections(file);
+ if (ret)
+ return ret;
+
+ if (!file->elf->changed)
+ return 0;
+
+ return elf_write(file->elf);
}
if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index 0b907902ee79..85c979caa367 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -8,7 +8,7 @@
#include <subcmd/parse-options.h>
extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
extern int cmd_check(int argc, const char **argv);
extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h
index 4427bf8ed686..c7c59c6a44ee 100644
--- a/tools/objtool/cfi.h
+++ b/tools/objtool/cfi.h
@@ -6,38 +6,33 @@
#ifndef _OBJTOOL_CFI_H
#define _OBJTOOL_CFI_H
+#include "cfi_regs.h"
+
#define CFI_UNDEFINED -1
#define CFI_CFA -2
#define CFI_SP_INDIRECT -3
#define CFI_BP_INDIRECT -4
-#define CFI_AX 0
-#define CFI_DX 1
-#define CFI_CX 2
-#define CFI_BX 3
-#define CFI_SI 4
-#define CFI_DI 5
-#define CFI_BP 6
-#define CFI_SP 7
-#define CFI_R8 8
-#define CFI_R9 9
-#define CFI_R10 10
-#define CFI_R11 11
-#define CFI_R12 12
-#define CFI_R13 13
-#define CFI_R14 14
-#define CFI_R15 15
-#define CFI_RA 16
-#define CFI_NUM_REGS 17
-
struct cfi_reg {
int base;
int offset;
};
-struct cfi_state {
+struct cfi_init_state {
+ struct cfi_reg regs[CFI_NUM_REGS];
struct cfi_reg cfa;
+};
+
+struct cfi_state {
struct cfi_reg regs[CFI_NUM_REGS];
+ struct cfi_reg vals[CFI_NUM_REGS];
+ struct cfi_reg cfa;
+ int stack_size;
+ int drap_reg, drap_offset;
+ unsigned char type;
+ bool bp_scratch;
+ bool drap;
+ bool end;
};
#endif /* _OBJTOOL_CFI_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 3c6da70e6084..c6ab44543c92 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -7,36 +7,37 @@
#include <stdlib.h>
#include "builtin.h"
+#include "cfi.h"
+#include "arch.h"
#include "check.h"
-#include "elf.h"
#include "special.h"
-#include "arch.h"
#include "warn.h"
+#include "arch_elf.h"
+#include <linux/objtool.h>
#include <linux/hashtable.h>
#include <linux/kernel.h>
+#include <linux/static_call_types.h>
#define FAKE_JUMP_OFFSET -1
-#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
-
struct alternative {
struct list_head list;
struct instruction *insn;
bool skip_orig;
};
-const char *objname;
-struct cfi_state initial_func_cfi;
+struct cfi_init_state initial_func_cfi;
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset)
{
struct instruction *insn;
- hash_for_each_possible(file->insn_hash, insn, hash, offset)
+ hash_for_each_possible(file->insn_hash, insn, hash, sec_offset_hash(sec, offset)) {
if (insn->sec == sec && insn->offset == offset)
return insn;
+ }
return NULL;
}
@@ -108,12 +109,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
for (insn = next_insn_same_sec(file, insn); insn; \
insn = next_insn_same_sec(file, insn))
-static bool is_static_jump(struct instruction *insn)
-{
- return insn->type == INSN_JUMP_CONDITIONAL ||
- insn->type == INSN_JUMP_UNCONDITIONAL;
-}
-
static bool is_sibling_call(struct instruction *insn)
{
/* An indirect jump is either a sibling call or a jump to a table. */
@@ -226,18 +221,31 @@ static bool dead_end_function(struct objtool_file *file, struct symbol *func)
return __dead_end_function(file, func, 0);
}
-static void clear_insn_state(struct insn_state *state)
+static void init_cfi_state(struct cfi_state *cfi)
{
int i;
- memset(state, 0, sizeof(*state));
- state->cfa.base = CFI_UNDEFINED;
for (i = 0; i < CFI_NUM_REGS; i++) {
- state->regs[i].base = CFI_UNDEFINED;
- state->vals[i].base = CFI_UNDEFINED;
+ cfi->regs[i].base = CFI_UNDEFINED;
+ cfi->vals[i].base = CFI_UNDEFINED;
}
- state->drap_reg = CFI_UNDEFINED;
- state->drap_offset = -1;
+ cfi->cfa.base = CFI_UNDEFINED;
+ cfi->drap_reg = CFI_UNDEFINED;
+ cfi->drap_offset = -1;
+}
+
+static void init_insn_state(struct insn_state *state, struct section *sec)
+{
+ memset(state, 0, sizeof(*state));
+ init_cfi_state(&state->cfi);
+
+ /*
+ * We need the full vmlinux for noinstr validation, otherwise we can
+ * not correctly determine insn->call_dest->sec (external symbols do
+ * not have a section).
+ */
+ if (vmlinux && sec)
+ state->noinstr = sec->noinstr;
}
/*
@@ -263,6 +271,10 @@ static int decode_instructions(struct objtool_file *file)
strncmp(sec->name, ".discard.", 9))
sec->text = true;
+ if (!strcmp(sec->name, ".noinstr.text") ||
+ !strcmp(sec->name, ".entry.text"))
+ sec->noinstr = true;
+
for (offset = 0; offset < sec->len; offset += insn->len) {
insn = malloc(sizeof(*insn));
if (!insn) {
@@ -271,7 +283,8 @@ static int decode_instructions(struct objtool_file *file)
}
memset(insn, 0, sizeof(*insn));
INIT_LIST_HEAD(&insn->alts);
- clear_insn_state(&insn->state);
+ INIT_LIST_HEAD(&insn->stack_ops);
+ init_cfi_state(&insn->cfi);
insn->sec = sec;
insn->offset = offset;
@@ -280,11 +293,11 @@ static int decode_instructions(struct objtool_file *file)
sec->len - offset,
&insn->len, &insn->type,
&insn->immediate,
- &insn->stack_op);
+ &insn->stack_ops);
if (ret)
goto err;
- hash_add(file->insn_hash, &insn->hash, insn->offset);
+ hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
list_add_tail(&insn->list, &file->insn_list);
nr_insns++;
}
@@ -314,15 +327,27 @@ err:
return ret;
}
+static struct instruction *find_last_insn(struct objtool_file *file,
+ struct section *sec)
+{
+ struct instruction *insn = NULL;
+ unsigned int offset;
+ unsigned int end = (sec->len > 10) ? sec->len - 10 : 0;
+
+ for (offset = sec->len - 1; offset >= end && !insn; offset--)
+ insn = find_insn(file, sec, offset);
+
+ return insn;
+}
+
/*
* Mark "ud2" instructions and manually annotated dead ends.
*/
static int add_dead_ends(struct objtool_file *file)
{
struct section *sec;
- struct rela *rela;
+ struct reloc *reloc;
struct instruction *insn;
- bool found;
/*
* By default, "ud2" is a dead end unless otherwise annotated, because
@@ -339,31 +364,24 @@ static int add_dead_ends(struct objtool_file *file)
if (!sec)
goto reachable;
- list_for_each_entry(rela, &sec->rela_list, list) {
- if (rela->sym->type != STT_SECTION) {
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ if (reloc->sym->type != STT_SECTION) {
WARN("unexpected relocation symbol type in %s", sec->name);
return -1;
}
- insn = find_insn(file, rela->sym->sec, rela->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc->addend);
if (insn)
insn = list_prev_entry(insn, list);
- else if (rela->addend == rela->sym->sec->len) {
- found = false;
- list_for_each_entry_reverse(insn, &file->insn_list, list) {
- if (insn->sec == rela->sym->sec) {
- found = true;
- break;
- }
- }
-
- if (!found) {
+ else if (reloc->addend == reloc->sym->sec->len) {
+ insn = find_last_insn(file, reloc->sym->sec);
+ if (!insn) {
WARN("can't find unreachable insn at %s+0x%x",
- rela->sym->sec->name, rela->addend);
+ reloc->sym->sec->name, reloc->addend);
return -1;
}
} else {
WARN("can't find unreachable insn at %s+0x%x",
- rela->sym->sec->name, rela->addend);
+ reloc->sym->sec->name, reloc->addend);
return -1;
}
@@ -381,31 +399,24 @@ reachable:
if (!sec)
return 0;
- list_for_each_entry(rela, &sec->rela_list, list) {
- if (rela->sym->type != STT_SECTION) {
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ if (reloc->sym->type != STT_SECTION) {
WARN("unexpected relocation symbol type in %s", sec->name);
return -1;
}
- insn = find_insn(file, rela->sym->sec, rela->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc->addend);
if (insn)
insn = list_prev_entry(insn, list);
- else if (rela->addend == rela->sym->sec->len) {
- found = false;
- list_for_each_entry_reverse(insn, &file->insn_list, list) {
- if (insn->sec == rela->sym->sec) {
- found = true;
- break;
- }
- }
-
- if (!found) {
+ else if (reloc->addend == reloc->sym->sec->len) {
+ insn = find_last_insn(file, reloc->sym->sec);
+ if (!insn) {
WARN("can't find reachable insn at %s+0x%x",
- rela->sym->sec->name, rela->addend);
+ reloc->sym->sec->name, reloc->addend);
return -1;
}
} else {
WARN("can't find reachable insn at %s+0x%x",
- rela->sym->sec->name, rela->addend);
+ reloc->sym->sec->name, reloc->addend);
return -1;
}
@@ -415,6 +426,103 @@ reachable:
return 0;
}
+static int create_static_call_sections(struct objtool_file *file)
+{
+ struct section *sec, *reloc_sec;
+ struct reloc *reloc;
+ struct static_call_site *site;
+ struct instruction *insn;
+ struct symbol *key_sym;
+ char *key_name, *tmp;
+ int idx;
+
+ sec = find_section_by_name(file->elf, ".static_call_sites");
+ if (sec) {
+ INIT_LIST_HEAD(&file->static_call_list);
+ WARN("file already has .static_call_sites section, skipping");
+ return 0;
+ }
+
+ if (list_empty(&file->static_call_list))
+ return 0;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node)
+ idx++;
+
+ sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
+ sizeof(struct static_call_site), idx);
+ if (!sec)
+ return -1;
+
+ reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+ if (!reloc_sec)
+ return -1;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+
+ site = (struct static_call_site *)sec->data->d_buf + idx;
+ memset(site, 0, sizeof(struct static_call_site));
+
+ /* populate reloc for 'addr' */
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+ reloc->sym = insn->sec->sym;
+ reloc->addend = insn->offset;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(struct static_call_site);
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ /* find key symbol */
+ key_name = strdup(insn->call_dest->name);
+ if (!key_name) {
+ perror("strdup");
+ return -1;
+ }
+ if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
+ STATIC_CALL_TRAMP_PREFIX_LEN)) {
+ WARN("static_call: trampoline name malformed: %s", key_name);
+ return -1;
+ }
+ tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
+ memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN);
+
+ key_sym = find_symbol_by_name(file->elf, tmp);
+ if (!key_sym) {
+ WARN("static_call: can't find static_call_key symbol: %s", tmp);
+ return -1;
+ }
+ free(key_name);
+
+ /* populate reloc for 'key' */
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+ reloc->sym = key_sym;
+ reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(struct static_call_site) + 4;
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ idx++;
+ }
+
+ if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+ return -1;
+
+ return 0;
+}
+
/*
* Warnings shouldn't be reported for ignored functions.
*/
@@ -423,26 +531,26 @@ static void add_ignores(struct objtool_file *file)
struct instruction *insn;
struct section *sec;
struct symbol *func;
- struct rela *rela;
+ struct reloc *reloc;
sec = find_section_by_name(file->elf, ".rela.discard.func_stack_frame_non_standard");
if (!sec)
return;
- list_for_each_entry(rela, &sec->rela_list, list) {
- switch (rela->sym->type) {
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ switch (reloc->sym->type) {
case STT_FUNC:
- func = rela->sym;
+ func = reloc->sym;
break;
case STT_SECTION:
- func = find_func_by_offset(rela->sym->sec, rela->addend);
+ func = find_func_by_offset(reloc->sym->sec, reloc->addend);
if (!func)
continue;
break;
default:
- WARN("unexpected relocation symbol type in %s: %d", sec->name, rela->sym->type);
+ WARN("unexpected relocation symbol type in %s: %d", sec->name, reloc->sym->type);
continue;
}
@@ -475,6 +583,8 @@ static const char *uaccess_safe_builtin[] = {
"__asan_store4_noabort",
"__asan_store8_noabort",
"__asan_store16_noabort",
+ "__kasan_check_read",
+ "__kasan_check_write",
/* KASAN in-line */
"__asan_report_load_n_noabort",
"__asan_report_load1_noabort",
@@ -488,8 +598,86 @@ static const char *uaccess_safe_builtin[] = {
"__asan_report_store4_noabort",
"__asan_report_store8_noabort",
"__asan_report_store16_noabort",
+ /* KCSAN */
+ "__kcsan_check_access",
+ "kcsan_found_watchpoint",
+ "kcsan_setup_watchpoint",
+ "kcsan_check_scoped_accesses",
+ "kcsan_disable_current",
+ "kcsan_enable_current_nowarn",
+ /* KCSAN/TSAN */
+ "__tsan_func_entry",
+ "__tsan_func_exit",
+ "__tsan_read_range",
+ "__tsan_write_range",
+ "__tsan_read1",
+ "__tsan_read2",
+ "__tsan_read4",
+ "__tsan_read8",
+ "__tsan_read16",
+ "__tsan_write1",
+ "__tsan_write2",
+ "__tsan_write4",
+ "__tsan_write8",
+ "__tsan_write16",
+ "__tsan_read_write1",
+ "__tsan_read_write2",
+ "__tsan_read_write4",
+ "__tsan_read_write8",
+ "__tsan_read_write16",
+ "__tsan_atomic8_load",
+ "__tsan_atomic16_load",
+ "__tsan_atomic32_load",
+ "__tsan_atomic64_load",
+ "__tsan_atomic8_store",
+ "__tsan_atomic16_store",
+ "__tsan_atomic32_store",
+ "__tsan_atomic64_store",
+ "__tsan_atomic8_exchange",
+ "__tsan_atomic16_exchange",
+ "__tsan_atomic32_exchange",
+ "__tsan_atomic64_exchange",
+ "__tsan_atomic8_fetch_add",
+ "__tsan_atomic16_fetch_add",
+ "__tsan_atomic32_fetch_add",
+ "__tsan_atomic64_fetch_add",
+ "__tsan_atomic8_fetch_sub",
+ "__tsan_atomic16_fetch_sub",
+ "__tsan_atomic32_fetch_sub",
+ "__tsan_atomic64_fetch_sub",
+ "__tsan_atomic8_fetch_and",
+ "__tsan_atomic16_fetch_and",
+ "__tsan_atomic32_fetch_and",
+ "__tsan_atomic64_fetch_and",
+ "__tsan_atomic8_fetch_or",
+ "__tsan_atomic16_fetch_or",
+ "__tsan_atomic32_fetch_or",
+ "__tsan_atomic64_fetch_or",
+ "__tsan_atomic8_fetch_xor",
+ "__tsan_atomic16_fetch_xor",
+ "__tsan_atomic32_fetch_xor",
+ "__tsan_atomic64_fetch_xor",
+ "__tsan_atomic8_fetch_nand",
+ "__tsan_atomic16_fetch_nand",
+ "__tsan_atomic32_fetch_nand",
+ "__tsan_atomic64_fetch_nand",
+ "__tsan_atomic8_compare_exchange_strong",
+ "__tsan_atomic16_compare_exchange_strong",
+ "__tsan_atomic32_compare_exchange_strong",
+ "__tsan_atomic64_compare_exchange_strong",
+ "__tsan_atomic8_compare_exchange_weak",
+ "__tsan_atomic16_compare_exchange_weak",
+ "__tsan_atomic32_compare_exchange_weak",
+ "__tsan_atomic64_compare_exchange_weak",
+ "__tsan_atomic8_compare_exchange_val",
+ "__tsan_atomic16_compare_exchange_val",
+ "__tsan_atomic32_compare_exchange_val",
+ "__tsan_atomic64_compare_exchange_val",
+ "__tsan_atomic_thread_fence",
+ "__tsan_atomic_signal_fence",
/* KCOV */
"write_comp_data",
+ "check_kcov_mode",
"__sanitizer_cov_trace_pc",
"__sanitizer_cov_trace_const_cmp1",
"__sanitizer_cov_trace_const_cmp2",
@@ -507,8 +695,9 @@ static const char *uaccess_safe_builtin[] = {
"__ubsan_handle_shift_out_of_bounds",
/* misc */
"csum_partial_copy_generic",
- "__memcpy_mcsafe",
- "mcsafe_handle_tail",
+ "copy_mc_fragile",
+ "copy_mc_fragile_handle_tail",
+ "copy_mc_enhanced_fast_string",
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
NULL
};
@@ -539,20 +728,20 @@ static void add_uaccess_safe(struct objtool_file *file)
static int add_ignore_alternatives(struct objtool_file *file)
{
struct section *sec;
- struct rela *rela;
+ struct reloc *reloc;
struct instruction *insn;
sec = find_section_by_name(file->elf, ".rela.discard.ignore_alts");
if (!sec)
return 0;
- list_for_each_entry(rela, &sec->rela_list, list) {
- if (rela->sym->type != STT_SECTION) {
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ if (reloc->sym->type != STT_SECTION) {
WARN("unexpected relocation symbol type in %s", sec->name);
return -1;
}
- insn = find_insn(file, rela->sym->sec, rela->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc->addend);
if (!insn) {
WARN("bad .discard.ignore_alts entry");
return -1;
@@ -570,7 +759,7 @@ static int add_ignore_alternatives(struct objtool_file *file)
static int add_jump_destinations(struct objtool_file *file)
{
struct instruction *insn;
- struct rela *rela;
+ struct reloc *reloc;
struct section *dest_sec;
unsigned long dest_off;
@@ -578,21 +767,22 @@ static int add_jump_destinations(struct objtool_file *file)
if (!is_static_jump(insn))
continue;
- if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET)
+ if (insn->offset == FAKE_JUMP_OFFSET)
continue;
- rela = find_rela_by_dest_range(file->elf, insn->sec,
+ reloc = find_reloc_by_dest_range(file->elf, insn->sec,
insn->offset, insn->len);
- if (!rela) {
+ if (!reloc) {
dest_sec = insn->sec;
- dest_off = insn->offset + insn->len + insn->immediate;
- } else if (rela->sym->type == STT_SECTION) {
- dest_sec = rela->sym->sec;
- dest_off = rela->addend + 4;
- } else if (rela->sym->sec->idx) {
- dest_sec = rela->sym->sec;
- dest_off = rela->sym->sym.st_value + rela->addend + 4;
- } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
+ dest_off = arch_jump_destination(insn);
+ } else if (reloc->sym->type == STT_SECTION) {
+ dest_sec = reloc->sym->sec;
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+ } else if (reloc->sym->sec->idx) {
+ dest_sec = reloc->sym->sec;
+ dest_off = reloc->sym->sym.st_value +
+ arch_dest_reloc_offset(reloc->addend);
+ } else if (strstr(reloc->sym->name, "_indirect_thunk_")) {
/*
* Retpoline jumps are really dynamic jumps in
* disguise, so convert them accordingly.
@@ -606,7 +796,11 @@ static int add_jump_destinations(struct objtool_file *file)
continue;
} else {
/* external sibling call */
- insn->call_dest = rela->sym;
+ insn->call_dest = reloc->sym;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
continue;
}
@@ -658,6 +852,10 @@ static int add_jump_destinations(struct objtool_file *file)
/* internal sibling call */
insn->call_dest = insn->jump_dest->func;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
}
}
}
@@ -665,6 +863,27 @@ static int add_jump_destinations(struct objtool_file *file)
return 0;
}
+static void remove_insn_ops(struct instruction *insn)
+{
+ struct stack_op *op, *tmp;
+
+ list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
+ list_del(&op->list);
+ free(op);
+ }
+}
+
+static struct symbol *find_call_destination(struct section *sec, unsigned long offset)
+{
+ struct symbol *call_dest;
+
+ call_dest = find_func_by_offset(sec, offset);
+ if (!call_dest)
+ call_dest = find_symbol_by_offset(sec, offset);
+
+ return call_dest;
+}
+
/*
* Find the destination instructions for all calls.
*/
@@ -672,28 +891,23 @@ static int add_call_destinations(struct objtool_file *file)
{
struct instruction *insn;
unsigned long dest_off;
- struct rela *rela;
+ struct reloc *reloc;
for_each_insn(file, insn) {
if (insn->type != INSN_CALL)
continue;
- rela = find_rela_by_dest_range(file->elf, insn->sec,
+ reloc = find_reloc_by_dest_range(file->elf, insn->sec,
insn->offset, insn->len);
- if (!rela) {
- dest_off = insn->offset + insn->len + insn->immediate;
- insn->call_dest = find_func_by_offset(insn->sec, dest_off);
- if (!insn->call_dest)
- insn->call_dest = find_symbol_by_offset(insn->sec, dest_off);
+ if (!reloc) {
+ dest_off = arch_jump_destination(insn);
+ insn->call_dest = find_call_destination(insn->sec, dest_off);
if (insn->ignore)
continue;
if (!insn->call_dest) {
- WARN_FUNC("unsupported intra-function call",
- insn->sec, insn->offset);
- if (retpoline)
- WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
+ WARN_FUNC("unannotated intra-function call", insn->sec, insn->offset);
return -1;
}
@@ -703,18 +917,46 @@ static int add_call_destinations(struct objtool_file *file)
return -1;
}
- } else if (rela->sym->type == STT_SECTION) {
- insn->call_dest = find_func_by_offset(rela->sym->sec,
- rela->addend+4);
+ } else if (reloc->sym->type == STT_SECTION) {
+ dest_off = arch_dest_reloc_offset(reloc->addend);
+ insn->call_dest = find_call_destination(reloc->sym->sec,
+ dest_off);
if (!insn->call_dest) {
- WARN_FUNC("can't find call dest symbol at %s+0x%x",
+ WARN_FUNC("can't find call dest symbol at %s+0x%lx",
insn->sec, insn->offset,
- rela->sym->sec->name,
- rela->addend + 4);
+ reloc->sym->sec->name,
+ dest_off);
return -1;
}
} else
- insn->call_dest = rela->sym;
+ insn->call_dest = reloc->sym;
+
+ /*
+ * Many compilers cannot disable KCOV with a function attribute
+ * so they need a little help, NOP out any KCOV calls from noinstr
+ * text.
+ */
+ if (insn->sec->noinstr &&
+ !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) {
+ if (reloc) {
+ reloc->type = R_NONE;
+ elf_write_reloc(file->elf, reloc);
+ }
+
+ elf_write_insn(file->elf, insn->sec,
+ insn->offset, insn->len,
+ arch_nop_insn(insn->len));
+ insn->type = INSN_NOP;
+ }
+
+ /*
+ * Whatever stack impact regular CALLs have, should be undone
+ * by the RETURN of the called function.
+ *
+ * Annotated intra-function calls retain the stack_ops but
+ * are converted to JUMP, see read_intra_function_calls().
+ */
+ remove_insn_ops(insn);
}
return 0;
@@ -742,7 +984,9 @@ static int handle_group_alt(struct objtool_file *file,
struct instruction *orig_insn,
struct instruction **new_insn)
{
+ static unsigned int alt_group_next_index = 1;
struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
+ unsigned int alt_group = alt_group_next_index++;
unsigned long dest_off;
last_orig_insn = NULL;
@@ -751,7 +995,7 @@ static int handle_group_alt(struct objtool_file *file,
if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
break;
- insn->alt_group = true;
+ insn->alt_group = alt_group;
last_orig_insn = insn;
}
@@ -763,7 +1007,8 @@ static int handle_group_alt(struct objtool_file *file,
}
memset(fake_jump, 0, sizeof(*fake_jump));
INIT_LIST_HEAD(&fake_jump->alts);
- clear_insn_state(&fake_jump->state);
+ INIT_LIST_HEAD(&fake_jump->stack_ops);
+ init_cfi_state(&fake_jump->cfi);
fake_jump->sec = special_alt->new_sec;
fake_jump->offset = FAKE_JUMP_OFFSET;
@@ -784,8 +1029,11 @@ static int handle_group_alt(struct objtool_file *file,
}
last_new_insn = NULL;
+ alt_group = alt_group_next_index++;
insn = *new_insn;
sec_for_each_insn_from(file, insn) {
+ struct reloc *alt_reloc;
+
if (insn->offset >= special_alt->new_off + special_alt->new_len)
break;
@@ -793,6 +1041,7 @@ static int handle_group_alt(struct objtool_file *file,
insn->ignore = orig_insn->ignore_alts;
insn->func = orig_insn->func;
+ insn->alt_group = alt_group;
/*
* Since alternative replacement code is copy/pasted by the
@@ -801,14 +1050,11 @@ static int handle_group_alt(struct objtool_file *file,
* .altinstr_replacement section, unless the arch's
* alternatives code can adjust the relative offsets
* accordingly.
- *
- * The x86 alternatives code adjusts the offsets only when it
- * encounters a branch instruction at the very beginning of the
- * replacement group.
*/
- if ((insn->offset != special_alt->new_off ||
- (insn->type != INSN_CALL && !is_static_jump(insn))) &&
- find_rela_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) {
+ alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (alt_reloc &&
+ !arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
WARN_FUNC("unsupported relocation in alternatives section",
insn->sec, insn->offset);
@@ -821,7 +1067,7 @@ static int handle_group_alt(struct objtool_file *file,
if (!insn->immediate)
continue;
- dest_off = insn->offset + insn->len + insn->immediate;
+ dest_off = arch_jump_destination(insn);
if (dest_off == special_alt->new_off + special_alt->new_len) {
if (!fake_jump) {
WARN("%s: alternative jump to end of section",
@@ -916,6 +1162,12 @@ static int add_special_section_alts(struct objtool_file *file)
}
if (special_alt->group) {
+ if (!special_alt->orig_len) {
+ WARN_FUNC("empty alternative entry",
+ orig_insn->sec, orig_insn->offset);
+ continue;
+ }
+
ret = handle_group_alt(file, special_alt, orig_insn,
&new_insn);
if (ret)
@@ -948,34 +1200,34 @@ out:
}
static int add_jump_table(struct objtool_file *file, struct instruction *insn,
- struct rela *table)
+ struct reloc *table)
{
- struct rela *rela = table;
+ struct reloc *reloc = table;
struct instruction *dest_insn;
struct alternative *alt;
struct symbol *pfunc = insn->func->pfunc;
unsigned int prev_offset = 0;
/*
- * Each @rela is a switch table relocation which points to the target
+ * Each @reloc is a switch table relocation which points to the target
* instruction.
*/
- list_for_each_entry_from(rela, &table->sec->rela_list, list) {
+ list_for_each_entry_from(reloc, &table->sec->reloc_list, list) {
/* Check for the end of the table: */
- if (rela != table && rela->jump_table_start)
+ if (reloc != table && reloc->jump_table_start)
break;
/* Make sure the table entries are consecutive: */
- if (prev_offset && rela->offset != prev_offset + 8)
+ if (prev_offset && reloc->offset != prev_offset + 8)
break;
/* Detect function pointers from contiguous objects: */
- if (rela->sym->sec == pfunc->sec &&
- rela->addend == pfunc->offset)
+ if (reloc->sym->sec == pfunc->sec &&
+ reloc->addend == pfunc->offset)
break;
- dest_insn = find_insn(file, rela->sym->sec, rela->addend);
+ dest_insn = find_insn(file, reloc->sym->sec, reloc->addend);
if (!dest_insn)
break;
@@ -991,7 +1243,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
alt->insn = dest_insn;
list_add_tail(&alt->list, &insn->alts);
- prev_offset = rela->offset;
+ prev_offset = reloc->offset;
}
if (!prev_offset) {
@@ -1004,56 +1256,15 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
}
/*
- * find_jump_table() - Given a dynamic jump, find the switch jump table in
- * .rodata associated with it.
- *
- * There are 3 basic patterns:
- *
- * 1. jmpq *[rodata addr](,%reg,8)
- *
- * This is the most common case by far. It jumps to an address in a simple
- * jump table which is stored in .rodata.
- *
- * 2. jmpq *[rodata addr](%rip)
- *
- * This is caused by a rare GCC quirk, currently only seen in three driver
- * functions in the kernel, only with certain obscure non-distro configs.
- *
- * As part of an optimization, GCC makes a copy of an existing switch jump
- * table, modifies it, and then hard-codes the jump (albeit with an indirect
- * jump) to use a single entry in the table. The rest of the jump table and
- * some of its jump targets remain as dead code.
- *
- * In such a case we can just crudely ignore all unreachable instruction
- * warnings for the entire object file. Ideally we would just ignore them
- * for the function, but that would require redesigning the code quite a
- * bit. And honestly that's just not worth doing: unreachable instruction
- * warnings are of questionable value anyway, and this is such a rare issue.
- *
- * 3. mov [rodata addr],%reg1
- * ... some instructions ...
- * jmpq *(%reg1,%reg2,8)
- *
- * This is a fairly uncommon pattern which is new for GCC 6. As of this
- * writing, there are 11 occurrences of it in the allmodconfig kernel.
- *
- * As of GCC 7 there are quite a few more of these and the 'in between' code
- * is significant. Esp. with KASAN enabled some of the code between the mov
- * and jmpq uses .rodata itself, which can confuse things.
- *
- * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
- * ensure the same register is used in the mov and jump instructions.
- *
- * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ * find_jump_table() - Given a dynamic jump, find the switch jump table
+ * associated with it.
*/
-static struct rela *find_jump_table(struct objtool_file *file,
+static struct reloc *find_jump_table(struct objtool_file *file,
struct symbol *func,
struct instruction *insn)
{
- struct rela *text_rela, *table_rela;
+ struct reloc *table_reloc;
struct instruction *dest_insn, *orig_insn = insn;
- struct section *table_sec;
- unsigned long table_offset;
/*
* Backward search using the @first_jump_src links, these help avoid
@@ -1074,53 +1285,14 @@ static struct rela *find_jump_table(struct objtool_file *file,
insn->jump_dest->offset > orig_insn->offset))
break;
- /* look for a relocation which references .rodata */
- text_rela = find_rela_by_dest_range(file->elf, insn->sec,
- insn->offset, insn->len);
- if (!text_rela || text_rela->sym->type != STT_SECTION ||
- !text_rela->sym->sec->rodata)
- continue;
-
- table_offset = text_rela->addend;
- table_sec = text_rela->sym->sec;
-
- if (text_rela->type == R_X86_64_PC32)
- table_offset += 4;
-
- /*
- * Make sure the .rodata address isn't associated with a
- * symbol. GCC jump tables are anonymous data.
- *
- * Also support C jump tables which are in the same format as
- * switch jump tables. For objtool to recognize them, they
- * need to be placed in the C_JUMP_TABLE_SECTION section. They
- * have symbols associated with them.
- */
- if (find_symbol_containing(table_sec, table_offset) &&
- strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
+ table_reloc = arch_find_switch_table(file, insn);
+ if (!table_reloc)
continue;
-
- /*
- * Each table entry has a rela associated with it. The rela
- * should reference text in the same function as the original
- * instruction.
- */
- table_rela = find_rela_by_dest(file->elf, table_sec, table_offset);
- if (!table_rela)
- continue;
- dest_insn = find_insn(file, table_rela->sym->sec, table_rela->addend);
+ dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend);
if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func)
continue;
- /*
- * Use of RIP-relative switch jumps is quite rare, and
- * indicates a rare GCC quirk/bug which can leave dead code
- * behind.
- */
- if (text_rela->type == R_X86_64_PC32)
- file->ignore_unreachables = true;
-
- return table_rela;
+ return table_reloc;
}
return NULL;
@@ -1134,7 +1306,7 @@ static void mark_func_jump_tables(struct objtool_file *file,
struct symbol *func)
{
struct instruction *insn, *last = NULL;
- struct rela *rela;
+ struct reloc *reloc;
func_for_each_insn(file, func, insn) {
if (!last)
@@ -1157,10 +1329,10 @@ static void mark_func_jump_tables(struct objtool_file *file,
if (insn->type != INSN_JUMP_DYNAMIC)
continue;
- rela = find_jump_table(file, func, insn);
- if (rela) {
- rela->jump_table_start = true;
- insn->jump_table = rela;
+ reloc = find_jump_table(file, func, insn);
+ if (reloc) {
+ reloc->jump_table_start = true;
+ insn->jump_table = reloc;
}
}
}
@@ -1214,8 +1386,8 @@ static int add_jump_table_alts(struct objtool_file *file)
static int read_unwind_hints(struct objtool_file *file)
{
- struct section *sec, *relasec;
- struct rela *rela;
+ struct section *sec, *relocsec;
+ struct reloc *reloc;
struct unwind_hint *hint;
struct instruction *insn;
struct cfi_reg *cfa;
@@ -1225,8 +1397,8 @@ static int read_unwind_hints(struct objtool_file *file)
if (!sec)
return 0;
- relasec = sec->rela;
- if (!relasec) {
+ relocsec = sec->reloc;
+ if (!relocsec) {
WARN("missing .rela.discard.unwind_hints section");
return -1;
}
@@ -1241,66 +1413,36 @@ static int read_unwind_hints(struct objtool_file *file)
for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
hint = (struct unwind_hint *)sec->data->d_buf + i;
- rela = find_rela_by_dest(file->elf, sec, i * sizeof(*hint));
- if (!rela) {
- WARN("can't find rela for unwind_hints[%d]", i);
+ reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint));
+ if (!reloc) {
+ WARN("can't find reloc for unwind_hints[%d]", i);
return -1;
}
- insn = find_insn(file, rela->sym->sec, rela->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc->addend);
if (!insn) {
WARN("can't find insn for unwind_hints[%d]", i);
return -1;
}
- cfa = &insn->state.cfa;
-
- if (hint->type == UNWIND_HINT_TYPE_SAVE) {
- insn->save = true;
- continue;
+ cfa = &insn->cfi.cfa;
- } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
- insn->restore = true;
- insn->hint = true;
+ if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) {
+ insn->ret_offset = hint->sp_offset;
continue;
}
insn->hint = true;
- switch (hint->sp_reg) {
- case ORC_REG_UNDEFINED:
- cfa->base = CFI_UNDEFINED;
- break;
- case ORC_REG_SP:
- cfa->base = CFI_SP;
- break;
- case ORC_REG_BP:
- cfa->base = CFI_BP;
- break;
- case ORC_REG_SP_INDIRECT:
- cfa->base = CFI_SP_INDIRECT;
- break;
- case ORC_REG_R10:
- cfa->base = CFI_R10;
- break;
- case ORC_REG_R13:
- cfa->base = CFI_R13;
- break;
- case ORC_REG_DI:
- cfa->base = CFI_DI;
- break;
- case ORC_REG_DX:
- cfa->base = CFI_DX;
- break;
- default:
+ if (arch_decode_hint_reg(insn, hint->sp_reg)) {
WARN_FUNC("unsupported unwind_hint sp base reg %d",
insn->sec, insn->offset, hint->sp_reg);
return -1;
}
cfa->offset = hint->sp_offset;
- insn->state.type = hint->type;
- insn->state.end = hint->end;
+ insn->cfi.type = hint->type;
+ insn->cfi.end = hint->end;
}
return 0;
@@ -1310,19 +1452,19 @@ static int read_retpoline_hints(struct objtool_file *file)
{
struct section *sec;
struct instruction *insn;
- struct rela *rela;
+ struct reloc *reloc;
sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
if (!sec)
return 0;
- list_for_each_entry(rela, &sec->rela_list, list) {
- if (rela->sym->type != STT_SECTION) {
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ if (reloc->sym->type != STT_SECTION) {
WARN("unexpected relocation symbol type in %s", sec->name);
return -1;
}
- insn = find_insn(file, rela->sym->sec, rela->addend);
+ insn = find_insn(file, reloc->sym->sec, reloc->addend);
if (!insn) {
WARN("bad .discard.retpoline_safe entry");
return -1;
@@ -1341,6 +1483,121 @@ static int read_retpoline_hints(struct objtool_file *file)
return 0;
}
+static int read_instr_hints(struct objtool_file *file)
+{
+ struct section *sec;
+ struct instruction *insn;
+ struct reloc *reloc;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.instr_end");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ if (reloc->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ if (!insn) {
+ WARN("bad .discard.instr_end entry");
+ return -1;
+ }
+
+ insn->instr--;
+ }
+
+ sec = find_section_by_name(file->elf, ".rela.discard.instr_begin");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ if (reloc->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ if (!insn) {
+ WARN("bad .discard.instr_begin entry");
+ return -1;
+ }
+
+ insn->instr++;
+ }
+
+ return 0;
+}
+
+static int read_intra_function_calls(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct section *sec;
+ struct reloc *reloc;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ unsigned long dest_off;
+
+ if (reloc->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s",
+ sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, reloc->sym->sec, reloc->addend);
+ if (!insn) {
+ WARN("bad .discard.intra_function_call entry");
+ return -1;
+ }
+
+ if (insn->type != INSN_CALL) {
+ WARN_FUNC("intra_function_call not a direct call",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ /*
+ * Treat intra-function CALLs as JMPs, but with a stack_op.
+ * See add_call_destinations(), which strips stack_ops from
+ * normal CALLs.
+ */
+ insn->type = INSN_JUMP_UNCONDITIONAL;
+
+ dest_off = insn->offset + insn->len + insn->immediate;
+ insn->jump_dest = find_insn(file, insn->sec, dest_off);
+ if (!insn->jump_dest) {
+ WARN_FUNC("can't find call dest at %s+0x%lx",
+ insn->sec, insn->offset,
+ insn->sec->name, dest_off);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int read_static_call_tramps(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->bind == STB_GLOBAL &&
+ !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+ strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+ func->static_call_tramp = true;
+ }
+ }
+
+ return 0;
+}
+
static void mark_rodata(struct objtool_file *file)
{
struct section *sec;
@@ -1357,8 +1614,8 @@ static void mark_rodata(struct objtool_file *file)
* .rodata.str1.* sections are ignored; they don't contain jump tables.
*/
for_each_sec(file, sec) {
- if ((!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.")) ||
- !strcmp(sec->name, C_JUMP_TABLE_SECTION)) {
+ if (!strncmp(sec->name, ".rodata", 7) &&
+ !strstr(sec->name, ".str1.")) {
sec->rodata = true;
found = true;
}
@@ -1388,6 +1645,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ ret = read_static_call_tramps(file);
+ if (ret)
+ return ret;
+
ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -1396,6 +1657,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ ret = read_intra_function_calls(file);
+ if (ret)
+ return ret;
+
ret = add_call_destinations(file);
if (ret)
return ret;
@@ -1412,12 +1677,16 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ ret = read_instr_hints(file);
+ if (ret)
+ return ret;
+
return 0;
}
static bool is_fentry_call(struct instruction *insn)
{
- if (insn->type == INSN_CALL &&
+ if (insn->type == INSN_CALL && insn->call_dest &&
insn->call_dest->type == STT_NOTYPE &&
!strcmp(insn->call_dest->name, "__fentry__"))
return true;
@@ -1425,40 +1694,57 @@ static bool is_fentry_call(struct instruction *insn)
return false;
}
-static bool has_modified_stack_frame(struct insn_state *state)
+static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
{
+ u8 ret_offset = insn->ret_offset;
+ struct cfi_state *cfi = &state->cfi;
int i;
- if (state->cfa.base != initial_func_cfi.cfa.base ||
- state->cfa.offset != initial_func_cfi.cfa.offset ||
- state->stack_size != initial_func_cfi.cfa.offset ||
- state->drap)
+ if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
+ return true;
+
+ if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset)
return true;
- for (i = 0; i < CFI_NUM_REGS; i++)
- if (state->regs[i].base != initial_func_cfi.regs[i].base ||
- state->regs[i].offset != initial_func_cfi.regs[i].offset)
+ if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
+ return true;
+
+ /*
+ * If there is a ret offset hint then don't check registers
+ * because a callee-saved register might have been pushed on
+ * the stack.
+ */
+ if (ret_offset)
+ return false;
+
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
+ cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
return true;
+ }
return false;
}
static bool has_valid_stack_frame(struct insn_state *state)
{
- if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA &&
- state->regs[CFI_BP].offset == -16)
+ struct cfi_state *cfi = &state->cfi;
+
+ if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA &&
+ cfi->regs[CFI_BP].offset == -16)
return true;
- if (state->drap && state->regs[CFI_BP].base == CFI_BP)
+ if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP)
return true;
return false;
}
-static int update_insn_state_regs(struct instruction *insn, struct insn_state *state)
+static int update_cfi_state_regs(struct instruction *insn,
+ struct cfi_state *cfi,
+ struct stack_op *op)
{
- struct cfi_reg *cfa = &state->cfa;
- struct stack_op *op = &insn->stack_op;
+ struct cfi_reg *cfa = &cfi->cfa;
if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
return 0;
@@ -1479,20 +1765,19 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
return 0;
}
-static void save_reg(struct insn_state *state, unsigned char reg, int base,
- int offset)
+static void save_reg(struct cfi_state *cfi, unsigned char reg, int base, int offset)
{
if (arch_callee_saved_reg(reg) &&
- state->regs[reg].base == CFI_UNDEFINED) {
- state->regs[reg].base = base;
- state->regs[reg].offset = offset;
+ cfi->regs[reg].base == CFI_UNDEFINED) {
+ cfi->regs[reg].base = base;
+ cfi->regs[reg].offset = offset;
}
}
-static void restore_reg(struct insn_state *state, unsigned char reg)
+static void restore_reg(struct cfi_state *cfi, unsigned char reg)
{
- state->regs[reg].base = CFI_UNDEFINED;
- state->regs[reg].offset = 0;
+ cfi->regs[reg].base = initial_func_cfi.regs[reg].base;
+ cfi->regs[reg].offset = initial_func_cfi.regs[reg].offset;
}
/*
@@ -1548,11 +1833,11 @@ static void restore_reg(struct insn_state *state, unsigned char reg)
* 41 5d pop %r13
* c3 retq
*/
-static int update_insn_state(struct instruction *insn, struct insn_state *state)
+static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
+ struct stack_op *op)
{
- struct stack_op *op = &insn->stack_op;
- struct cfi_reg *cfa = &state->cfa;
- struct cfi_reg *regs = state->regs;
+ struct cfi_reg *cfa = &cfi->cfa;
+ struct cfi_reg *regs = cfi->regs;
/* stack operations don't make sense with an undefined CFA */
if (cfa->base == CFI_UNDEFINED) {
@@ -1563,8 +1848,9 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
return 0;
}
- if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET)
- return update_insn_state_regs(insn, state);
+ if (cfi->type == UNWIND_HINT_TYPE_REGS ||
+ cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL)
+ return update_cfi_state_regs(insn, cfi, op);
switch (op->dest.type) {
@@ -1579,16 +1865,16 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
/* mov %rsp, %rbp */
cfa->base = op->dest.reg;
- state->bp_scratch = false;
+ cfi->bp_scratch = false;
}
else if (op->src.reg == CFI_SP &&
- op->dest.reg == CFI_BP && state->drap) {
+ op->dest.reg == CFI_BP && cfi->drap) {
/* drap: mov %rsp, %rbp */
regs[CFI_BP].base = CFI_BP;
- regs[CFI_BP].offset = -state->stack_size;
- state->bp_scratch = false;
+ regs[CFI_BP].offset = -cfi->stack_size;
+ cfi->bp_scratch = false;
}
else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
@@ -1603,8 +1889,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
* ...
* mov %rax, %rsp
*/
- state->vals[op->dest.reg].base = CFI_CFA;
- state->vals[op->dest.reg].offset = -state->stack_size;
+ cfi->vals[op->dest.reg].base = CFI_CFA;
+ cfi->vals[op->dest.reg].offset = -cfi->stack_size;
}
else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP &&
@@ -1615,14 +1901,14 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
*
* Restore the original stack pointer (Clang).
*/
- state->stack_size = -state->regs[CFI_BP].offset;
+ cfi->stack_size = -cfi->regs[CFI_BP].offset;
}
else if (op->dest.reg == cfa->base) {
/* mov %reg, %rsp */
if (cfa->base == CFI_SP &&
- state->vals[op->src.reg].base == CFI_CFA) {
+ cfi->vals[op->src.reg].base == CFI_CFA) {
/*
* This is needed for the rare case
@@ -1632,8 +1918,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
* ...
* mov %rcx, %rsp
*/
- cfa->offset = -state->vals[op->src.reg].offset;
- state->stack_size = cfa->offset;
+ cfa->offset = -cfi->vals[op->src.reg].offset;
+ cfi->stack_size = cfa->offset;
} else {
cfa->base = CFI_UNDEFINED;
@@ -1647,7 +1933,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
/* add imm, %rsp */
- state->stack_size -= op->src.offset;
+ cfi->stack_size -= op->src.offset;
if (cfa->base == CFI_SP)
cfa->offset -= op->src.offset;
break;
@@ -1656,14 +1942,14 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
/* lea disp(%rbp), %rsp */
- state->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+ cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset);
break;
}
if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
/* drap: lea disp(%rsp), %drap */
- state->drap_reg = op->dest.reg;
+ cfi->drap_reg = op->dest.reg;
/*
* lea disp(%rsp), %reg
@@ -1675,25 +1961,25 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
* ...
* mov %rcx, %rsp
*/
- state->vals[op->dest.reg].base = CFI_CFA;
- state->vals[op->dest.reg].offset = \
- -state->stack_size + op->src.offset;
+ cfi->vals[op->dest.reg].base = CFI_CFA;
+ cfi->vals[op->dest.reg].offset = \
+ -cfi->stack_size + op->src.offset;
break;
}
- if (state->drap && op->dest.reg == CFI_SP &&
- op->src.reg == state->drap_reg) {
+ if (cfi->drap && op->dest.reg == CFI_SP &&
+ op->src.reg == cfi->drap_reg) {
/* drap: lea disp(%drap), %rsp */
cfa->base = CFI_SP;
- cfa->offset = state->stack_size = -op->src.offset;
- state->drap_reg = CFI_UNDEFINED;
- state->drap = false;
+ cfa->offset = cfi->stack_size = -op->src.offset;
+ cfi->drap_reg = CFI_UNDEFINED;
+ cfi->drap = false;
break;
}
- if (op->dest.reg == state->cfa.base) {
+ if (op->dest.reg == cfi->cfa.base) {
WARN_FUNC("unsupported stack register modification",
insn->sec, insn->offset);
return -1;
@@ -1703,18 +1989,18 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
case OP_SRC_AND:
if (op->dest.reg != CFI_SP ||
- (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
- (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
+ (cfi->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
+ (cfi->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
WARN_FUNC("unsupported stack pointer realignment",
insn->sec, insn->offset);
return -1;
}
- if (state->drap_reg != CFI_UNDEFINED) {
+ if (cfi->drap_reg != CFI_UNDEFINED) {
/* drap: and imm, %rsp */
- cfa->base = state->drap_reg;
- cfa->offset = state->stack_size = 0;
- state->drap = true;
+ cfa->base = cfi->drap_reg;
+ cfa->offset = cfi->stack_size = 0;
+ cfi->drap = true;
}
/*
@@ -1726,57 +2012,55 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
case OP_SRC_POP:
case OP_SRC_POPF:
- if (!state->drap && op->dest.type == OP_DEST_REG &&
- op->dest.reg == cfa->base) {
+ if (!cfi->drap && op->dest.reg == cfa->base) {
/* pop %rbp */
cfa->base = CFI_SP;
}
- if (state->drap && cfa->base == CFI_BP_INDIRECT &&
- op->dest.type == OP_DEST_REG &&
- op->dest.reg == state->drap_reg &&
- state->drap_offset == -state->stack_size) {
+ if (cfi->drap && cfa->base == CFI_BP_INDIRECT &&
+ op->dest.reg == cfi->drap_reg &&
+ cfi->drap_offset == -cfi->stack_size) {
/* drap: pop %drap */
- cfa->base = state->drap_reg;
+ cfa->base = cfi->drap_reg;
cfa->offset = 0;
- state->drap_offset = -1;
+ cfi->drap_offset = -1;
- } else if (regs[op->dest.reg].offset == -state->stack_size) {
+ } else if (regs[op->dest.reg].offset == -cfi->stack_size) {
/* pop %reg */
- restore_reg(state, op->dest.reg);
+ restore_reg(cfi, op->dest.reg);
}
- state->stack_size -= 8;
+ cfi->stack_size -= 8;
if (cfa->base == CFI_SP)
cfa->offset -= 8;
break;
case OP_SRC_REG_INDIRECT:
- if (state->drap && op->src.reg == CFI_BP &&
- op->src.offset == state->drap_offset) {
+ if (cfi->drap && op->src.reg == CFI_BP &&
+ op->src.offset == cfi->drap_offset) {
/* drap: mov disp(%rbp), %drap */
- cfa->base = state->drap_reg;
+ cfa->base = cfi->drap_reg;
cfa->offset = 0;
- state->drap_offset = -1;
+ cfi->drap_offset = -1;
}
- if (state->drap && op->src.reg == CFI_BP &&
+ if (cfi->drap && op->src.reg == CFI_BP &&
op->src.offset == regs[op->dest.reg].offset) {
/* drap: mov disp(%rbp), %reg */
- restore_reg(state, op->dest.reg);
+ restore_reg(cfi, op->dest.reg);
} else if (op->src.reg == cfa->base &&
op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
/* mov disp(%rbp), %reg */
/* mov disp(%rsp), %reg */
- restore_reg(state, op->dest.reg);
+ restore_reg(cfi, op->dest.reg);
}
break;
@@ -1791,78 +2075,76 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
case OP_DEST_PUSH:
case OP_DEST_PUSHF:
- state->stack_size += 8;
+ cfi->stack_size += 8;
if (cfa->base == CFI_SP)
cfa->offset += 8;
if (op->src.type != OP_SRC_REG)
break;
- if (state->drap) {
- if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+ if (cfi->drap) {
+ if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) {
/* drap: push %drap */
cfa->base = CFI_BP_INDIRECT;
- cfa->offset = -state->stack_size;
+ cfa->offset = -cfi->stack_size;
/* save drap so we know when to restore it */
- state->drap_offset = -state->stack_size;
+ cfi->drap_offset = -cfi->stack_size;
- } else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) {
+ } else if (op->src.reg == CFI_BP && cfa->base == cfi->drap_reg) {
/* drap: push %rbp */
- state->stack_size = 0;
+ cfi->stack_size = 0;
- } else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+ } else {
/* drap: push %reg */
- save_reg(state, op->src.reg, CFI_BP, -state->stack_size);
+ save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size);
}
} else {
/* push %reg */
- save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+ save_reg(cfi, op->src.reg, CFI_CFA, -cfi->stack_size);
}
/* detect when asm code uses rbp as a scratch register */
if (!no_fp && insn->func && op->src.reg == CFI_BP &&
cfa->base != CFI_BP)
- state->bp_scratch = true;
+ cfi->bp_scratch = true;
break;
case OP_DEST_REG_INDIRECT:
- if (state->drap) {
- if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+ if (cfi->drap) {
+ if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) {
/* drap: mov %drap, disp(%rbp) */
cfa->base = CFI_BP_INDIRECT;
cfa->offset = op->dest.offset;
/* save drap offset so we know when to restore it */
- state->drap_offset = op->dest.offset;
- }
-
- else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+ cfi->drap_offset = op->dest.offset;
+ } else {
/* drap: mov reg, disp(%rbp) */
- save_reg(state, op->src.reg, CFI_BP, op->dest.offset);
+ save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset);
}
} else if (op->dest.reg == cfa->base) {
/* mov reg, disp(%rbp) */
/* mov reg, disp(%rsp) */
- save_reg(state, op->src.reg, CFI_CFA,
- op->dest.offset - state->cfa.offset);
+ save_reg(cfi, op->src.reg, CFI_CFA,
+ op->dest.offset - cfi->cfa.offset);
}
break;
case OP_DEST_LEAVE:
- if ((!state->drap && cfa->base != CFI_BP) ||
- (state->drap && cfa->base != state->drap_reg)) {
+ if ((!cfi->drap && cfa->base != CFI_BP) ||
+ (cfi->drap && cfa->base != cfi->drap_reg)) {
WARN_FUNC("leave instruction with modified stack frame",
insn->sec, insn->offset);
return -1;
@@ -1870,10 +2152,10 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
/* leave (mov %rbp, %rsp; pop %rbp) */
- state->stack_size = -state->regs[CFI_BP].offset - 8;
- restore_reg(state, CFI_BP);
+ cfi->stack_size = -cfi->regs[CFI_BP].offset - 8;
+ restore_reg(cfi, CFI_BP);
- if (!state->drap) {
+ if (!cfi->drap) {
cfa->base = CFI_SP;
cfa->offset -= 8;
}
@@ -1888,7 +2170,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
}
/* pop mem */
- state->stack_size -= 8;
+ cfi->stack_size -= 8;
if (cfa->base == CFI_SP)
cfa->offset -= 8;
@@ -1903,41 +2185,86 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
return 0;
}
-static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
+{
+ struct stack_op *op;
+
+ list_for_each_entry(op, &insn->stack_ops, list) {
+ struct cfi_state old_cfi = state->cfi;
+ int res;
+
+ res = update_cfi_state(insn, &state->cfi, op);
+ if (res)
+ return res;
+
+ if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) {
+ WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
+ return -1;
+ }
+
+ if (op->dest.type == OP_DEST_PUSHF) {
+ if (!state->uaccess_stack) {
+ state->uaccess_stack = 1;
+ } else if (state->uaccess_stack >> 31) {
+ WARN_FUNC("PUSHF stack exhausted",
+ insn->sec, insn->offset);
+ return 1;
+ }
+ state->uaccess_stack <<= 1;
+ state->uaccess_stack |= state->uaccess;
+ }
+
+ if (op->src.type == OP_SRC_POPF) {
+ if (state->uaccess_stack) {
+ state->uaccess = state->uaccess_stack & 1;
+ state->uaccess_stack >>= 1;
+ if (state->uaccess_stack == 1)
+ state->uaccess_stack = 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
{
- struct insn_state *state1 = &insn->state, *state2 = state;
+ struct cfi_state *cfi1 = &insn->cfi;
int i;
- if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) {
+ if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) {
+
WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
insn->sec, insn->offset,
- state1->cfa.base, state1->cfa.offset,
- state2->cfa.base, state2->cfa.offset);
+ cfi1->cfa.base, cfi1->cfa.offset,
+ cfi2->cfa.base, cfi2->cfa.offset);
- } else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) {
+ } else if (memcmp(&cfi1->regs, &cfi2->regs, sizeof(cfi1->regs))) {
for (i = 0; i < CFI_NUM_REGS; i++) {
- if (!memcmp(&state1->regs[i], &state2->regs[i],
+ if (!memcmp(&cfi1->regs[i], &cfi2->regs[i],
sizeof(struct cfi_reg)))
continue;
WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
insn->sec, insn->offset,
- i, state1->regs[i].base, state1->regs[i].offset,
- i, state2->regs[i].base, state2->regs[i].offset);
+ i, cfi1->regs[i].base, cfi1->regs[i].offset,
+ i, cfi2->regs[i].base, cfi2->regs[i].offset);
break;
}
- } else if (state1->type != state2->type) {
+ } else if (cfi1->type != cfi2->type) {
+
WARN_FUNC("stack state mismatch: type1=%d type2=%d",
- insn->sec, insn->offset, state1->type, state2->type);
+ insn->sec, insn->offset, cfi1->type, cfi2->type);
+
+ } else if (cfi1->drap != cfi2->drap ||
+ (cfi1->drap && cfi1->drap_reg != cfi2->drap_reg) ||
+ (cfi1->drap && cfi1->drap_offset != cfi2->drap_offset)) {
- } else if (state1->drap != state2->drap ||
- (state1->drap && state1->drap_reg != state2->drap_reg) ||
- (state1->drap && state1->drap_offset != state2->drap_offset)) {
WARN_FUNC("stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)",
insn->sec, insn->offset,
- state1->drap, state1->drap_reg, state1->drap_offset,
- state2->drap, state2->drap_reg, state2->drap_offset);
+ cfi1->drap, cfi1->drap_reg, cfi1->drap_offset,
+ cfi2->drap, cfi2->drap_reg, cfi2->drap_offset);
} else
return true;
@@ -1961,8 +2288,41 @@ static inline const char *call_dest_name(struct instruction *insn)
return "{dynamic}";
}
+static inline bool noinstr_call_dest(struct symbol *func)
+{
+ /*
+ * We can't deal with indirect function calls at present;
+ * assume they're instrumented.
+ */
+ if (!func)
+ return false;
+
+ /*
+ * If the symbol is from a noinstr section; we good.
+ */
+ if (func->sec->noinstr)
+ return true;
+
+ /*
+ * The __ubsan_handle_*() calls are like WARN(), they only happen when
+ * something 'BAD' happened. At the risk of taking the machine down,
+ * let them proceed to get the message out.
+ */
+ if (!strncmp(func->name, "__ubsan_handle_", 15))
+ return true;
+
+ return false;
+}
+
static int validate_call(struct instruction *insn, struct insn_state *state)
{
+ if (state->noinstr && state->instr <= 0 &&
+ !noinstr_call_dest(insn->call_dest)) {
+ WARN_FUNC("call to %s() leaves .noinstr.text section",
+ insn->sec, insn->offset, call_dest_name(insn));
+ return 1;
+ }
+
if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
WARN_FUNC("call to %s() with UACCESS enabled",
insn->sec, insn->offset, call_dest_name(insn));
@@ -1980,7 +2340,7 @@ static int validate_call(struct instruction *insn, struct insn_state *state)
static int validate_sibling_call(struct instruction *insn, struct insn_state *state)
{
- if (has_modified_stack_frame(state)) {
+ if (has_modified_stack_frame(insn, state)) {
WARN_FUNC("sibling call from callable instruction with modified stack frame",
insn->sec, insn->offset);
return 1;
@@ -1991,6 +2351,12 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st
static int validate_return(struct symbol *func, struct instruction *insn, struct insn_state *state)
{
+ if (state->noinstr && state->instr > 0) {
+ WARN_FUNC("return with instrumentation enabled",
+ insn->sec, insn->offset);
+ return 1;
+ }
+
if (state->uaccess && !func_uaccess_safe(func)) {
WARN_FUNC("return with UACCESS enabled",
insn->sec, insn->offset);
@@ -2009,13 +2375,13 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct
return 1;
}
- if (func && has_modified_stack_frame(state)) {
+ if (func && has_modified_stack_frame(insn, state)) {
WARN_FUNC("return with modified stack frame",
insn->sec, insn->offset);
return 1;
}
- if (state->bp_scratch) {
+ if (state->cfi.bp_scratch) {
WARN_FUNC("BP used as a scratch register",
insn->sec, insn->offset);
return 1;
@@ -2025,29 +2391,46 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct
}
/*
+ * Alternatives should not contain any ORC entries, this in turn means they
+ * should not contain any CFI ops, which implies all instructions should have
+ * the same same CFI state.
+ *
+ * It is possible to constuct alternatives that have unreachable holes that go
+ * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
+ * states which then results in ORC entries, which we just said we didn't want.
+ *
+ * Avoid them by copying the CFI entry of the first instruction into the whole
+ * alternative.
+ */
+static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+{
+ struct instruction *first_insn = insn;
+ int alt_group = insn->alt_group;
+
+ sec_for_each_insn_continue(file, insn) {
+ if (insn->alt_group != alt_group)
+ break;
+ insn->cfi = first_insn->cfi;
+ }
+}
+
+/*
* Follow the branch starting at the given instruction, and recursively follow
* any other branches (jumps). Meanwhile, track the frame pointer state at
* each instruction and validate all the rules described in
* tools/objtool/Documentation/stack-validation.txt.
*/
static int validate_branch(struct objtool_file *file, struct symbol *func,
- struct instruction *first, struct insn_state state)
+ struct instruction *insn, struct insn_state state)
{
struct alternative *alt;
- struct instruction *insn, *next_insn;
+ struct instruction *next_insn;
struct section *sec;
u8 visited;
int ret;
- insn = first;
sec = insn->sec;
- if (insn->alt_group && list_empty(&insn->alts)) {
- WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
- sec, insn->offset);
- return 1;
- }
-
while (1) {
next_insn = next_insn_same_sec(file, insn);
@@ -2065,59 +2448,24 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
visited = 1 << state.uaccess;
if (insn->visited) {
- if (!insn->hint && !insn_state_match(insn, &state))
+ if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
return 1;
if (insn->visited & visited)
return 0;
}
- if (insn->hint) {
- if (insn->restore) {
- struct instruction *save_insn, *i;
-
- i = insn;
- save_insn = NULL;
- sym_for_each_insn_continue_reverse(file, func, i) {
- if (i->save) {
- save_insn = i;
- break;
- }
- }
+ if (state.noinstr)
+ state.instr += insn->instr;
- if (!save_insn) {
- WARN_FUNC("no corresponding CFI save for CFI restore",
- sec, insn->offset);
- return 1;
- }
-
- if (!save_insn->visited) {
- /*
- * Oops, no state to copy yet.
- * Hopefully we can reach this
- * instruction from another branch
- * after the save insn has been
- * visited.
- */
- if (insn == first)
- return 0;
-
- WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
- sec, insn->offset);
- return 1;
- }
-
- insn->state = save_insn->state;
- }
-
- state = insn->state;
-
- } else
- insn->state = state;
+ if (insn->hint)
+ state.cfi = insn->cfi;
+ else
+ insn->cfi = state.cfi;
insn->visited |= visited;
- if (!insn->ignore_alts) {
+ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
bool skip_orig = false;
list_for_each_entry(alt, &insn->alts, list) {
@@ -2132,10 +2480,16 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
}
}
+ if (insn->alt_group)
+ fill_alternative_cfi(file, insn);
+
if (skip_orig)
return 0;
}
+ if (handle_insn_ops(insn, &state))
+ return 1;
+
switch (insn->type) {
case INSN_RETURN:
@@ -2157,6 +2511,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (dead_end_function(file, insn->call_dest))
return 0;
+ if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
+
break;
case INSN_JUMP_CONDITIONAL:
@@ -2202,32 +2561,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
}
return 0;
- case INSN_STACK:
- if (update_insn_state(insn, &state))
- return 1;
-
- if (insn->stack_op.dest.type == OP_DEST_PUSHF) {
- if (!state.uaccess_stack) {
- state.uaccess_stack = 1;
- } else if (state.uaccess_stack >> 31) {
- WARN_FUNC("PUSHF stack exhausted", sec, insn->offset);
- return 1;
- }
- state.uaccess_stack <<= 1;
- state.uaccess_stack |= state.uaccess;
- }
-
- if (insn->stack_op.src.type == OP_SRC_POPF) {
- if (state.uaccess_stack) {
- state.uaccess = state.uaccess_stack & 1;
- state.uaccess_stack >>= 1;
- if (state.uaccess_stack == 1)
- state.uaccess_stack = 0;
- }
- }
-
- break;
-
case INSN_STAC:
if (state.uaccess) {
WARN_FUNC("recursive UACCESS enable", sec, insn->offset);
@@ -2273,7 +2606,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 0;
if (!next_insn) {
- if (state.cfa.base == CFI_UNDEFINED)
+ if (state.cfi.cfa.base == CFI_UNDEFINED)
return 0;
WARN("%s: unexpected end of section", sec->name);
return 1;
@@ -2285,24 +2618,34 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 0;
}
-static int validate_unwind_hints(struct objtool_file *file)
+static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
{
struct instruction *insn;
- int ret, warnings = 0;
struct insn_state state;
+ int ret, warnings = 0;
if (!file->hints)
return 0;
- clear_insn_state(&state);
+ init_insn_state(&state, sec);
- for_each_insn(file, insn) {
+ if (sec) {
+ insn = find_insn(file, sec, 0);
+ if (!insn)
+ return 0;
+ } else {
+ insn = list_first_entry(&file->insn_list, typeof(*insn), list);
+ }
+
+ while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
if (insn->hint && !insn->visited) {
ret = validate_branch(file, insn->func, insn, state);
if (ret && backtrace)
BT_FUNC("<=== (hint)", insn);
warnings += ret;
}
+
+ insn = list_next_entry(insn, list);
}
return warnings;
@@ -2353,9 +2696,10 @@ static bool is_ubsan_insn(struct instruction *insn)
"__ubsan_handle_builtin_unreachable"));
}
-static bool ignore_unreachable_insn(struct instruction *insn)
+static bool ignore_unreachable_insn(struct objtool_file *file, struct instruction *insn)
{
int i;
+ struct instruction *prev_insn;
if (insn->ignore || insn->type == INSN_NOP)
return true;
@@ -2372,6 +2716,9 @@ static bool ignore_unreachable_insn(struct instruction *insn)
!strcmp(insn->sec->name, ".altinstr_aux"))
return true;
+ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
+ return true;
+
if (!insn->func)
return false;
@@ -2380,8 +2727,11 @@ static bool ignore_unreachable_insn(struct instruction *insn)
* __builtin_unreachable(). The BUG() macro has an unreachable() after
* the UD2, which causes GCC's undefined trap logic to emit another UD2
* (or occasionally a JMP to UD2).
+ *
+ * It may also insert a UD2 after calling a __noreturn function.
*/
- if (list_prev_entry(insn, list)->dead_end &&
+ prev_insn = list_prev_entry(insn, list);
+ if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) &&
(insn->type == INSN_BUG ||
(insn->type == INSN_JUMP_UNCONDITIONAL &&
insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
@@ -2417,43 +2767,69 @@ static bool ignore_unreachable_insn(struct instruction *insn)
return false;
}
-static int validate_section(struct objtool_file *file, struct section *sec)
+static int validate_symbol(struct objtool_file *file, struct section *sec,
+ struct symbol *sym, struct insn_state *state)
{
- struct symbol *func;
struct instruction *insn;
- struct insn_state state;
- int ret, warnings = 0;
+ int ret;
+
+ if (!sym->len) {
+ WARN("%s() is missing an ELF size annotation", sym->name);
+ return 1;
+ }
+
+ if (sym->pfunc != sym || sym->alias != sym)
+ return 0;
+
+ insn = find_insn(file, sec, sym->offset);
+ if (!insn || insn->ignore || insn->visited)
+ return 0;
- clear_insn_state(&state);
+ state->uaccess = sym->uaccess_safe;
- state.cfa = initial_func_cfi.cfa;
- memcpy(&state.regs, &initial_func_cfi.regs,
- CFI_NUM_REGS * sizeof(struct cfi_reg));
- state.stack_size = initial_func_cfi.cfa.offset;
+ ret = validate_branch(file, insn->func, insn, *state);
+ if (ret && backtrace)
+ BT_FUNC("<=== (sym)", insn);
+ return ret;
+}
+
+static int validate_section(struct objtool_file *file, struct section *sec)
+{
+ struct insn_state state;
+ struct symbol *func;
+ int warnings = 0;
list_for_each_entry(func, &sec->symbol_list, list) {
if (func->type != STT_FUNC)
continue;
- if (!func->len) {
- WARN("%s() is missing an ELF size annotation",
- func->name);
- warnings++;
- }
+ init_insn_state(&state, sec);
+ state.cfi.cfa = initial_func_cfi.cfa;
+ memcpy(&state.cfi.regs, &initial_func_cfi.regs,
+ CFI_NUM_REGS * sizeof(struct cfi_reg));
+ state.cfi.stack_size = initial_func_cfi.cfa.offset;
- if (func->pfunc != func || func->alias != func)
- continue;
+ warnings += validate_symbol(file, sec, func, &state);
+ }
- insn = find_insn(file, sec, func->offset);
- if (!insn || insn->ignore || insn->visited)
- continue;
+ return warnings;
+}
+
+static int validate_vmlinux_functions(struct objtool_file *file)
+{
+ struct section *sec;
+ int warnings = 0;
- state.uaccess = func->uaccess_safe;
+ sec = find_section_by_name(file->elf, ".noinstr.text");
+ if (sec) {
+ warnings += validate_section(file, sec);
+ warnings += validate_unwind_hints(file, sec);
+ }
- ret = validate_branch(file, func, insn, state);
- if (ret && backtrace)
- BT_FUNC("<=== (func)", insn);
- warnings += ret;
+ sec = find_section_by_name(file->elf, ".entry.text");
+ if (sec) {
+ warnings += validate_section(file, sec);
+ warnings += validate_unwind_hints(file, sec);
}
return warnings;
@@ -2464,8 +2840,12 @@ static int validate_functions(struct objtool_file *file)
struct section *sec;
int warnings = 0;
- for_each_sec(file, sec)
+ for_each_sec(file, sec) {
+ if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ continue;
+
warnings += validate_section(file, sec);
+ }
return warnings;
}
@@ -2478,7 +2858,7 @@ static int validate_reachable_instructions(struct objtool_file *file)
return 0;
for_each_insn(file, insn) {
- if (insn->visited || ignore_unreachable_insn(insn))
+ if (insn->visited || ignore_unreachable_insn(file, insn))
continue;
WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
@@ -2488,71 +2868,57 @@ static int validate_reachable_instructions(struct objtool_file *file)
return 0;
}
-static struct objtool_file file;
-
-int check(const char *_objname, bool orc)
+int check(struct objtool_file *file)
{
int ret, warnings = 0;
- objname = _objname;
-
- file.elf = elf_read(objname, orc ? O_RDWR : O_RDONLY);
- if (!file.elf)
- return 1;
-
- INIT_LIST_HEAD(&file.insn_list);
- hash_init(file.insn_hash);
- file.c_file = find_section_by_name(file.elf, ".comment");
- file.ignore_unreachables = no_unreachable;
- file.hints = false;
-
arch_initial_func_cfi_state(&initial_func_cfi);
- ret = decode_sections(&file);
+ ret = decode_sections(file);
if (ret < 0)
goto out;
warnings += ret;
- if (list_empty(&file.insn_list))
+ if (list_empty(&file->insn_list))
+ goto out;
+
+ if (vmlinux && !validate_dup) {
+ ret = validate_vmlinux_functions(file);
+ if (ret < 0)
+ goto out;
+
+ warnings += ret;
goto out;
+ }
if (retpoline) {
- ret = validate_retpoline(&file);
+ ret = validate_retpoline(file);
if (ret < 0)
return ret;
warnings += ret;
}
- ret = validate_functions(&file);
+ ret = validate_functions(file);
if (ret < 0)
goto out;
warnings += ret;
- ret = validate_unwind_hints(&file);
+ ret = validate_unwind_hints(file, NULL);
if (ret < 0)
goto out;
warnings += ret;
if (!warnings) {
- ret = validate_reachable_instructions(&file);
+ ret = validate_reachable_instructions(file);
if (ret < 0)
goto out;
warnings += ret;
}
- if (orc) {
- ret = create_orc(&file);
- if (ret < 0)
- goto out;
-
- ret = create_orc_sections(&file);
- if (ret < 0)
- goto out;
-
- ret = elf_write(file.elf);
- if (ret < 0)
- goto out;
- }
+ ret = create_static_call_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
out:
if (ret < 0) {
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index f0ce8ffe7135..5ec00a4b891b 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -7,54 +7,52 @@
#define _CHECK_H
#include <stdbool.h>
-#include "elf.h"
#include "cfi.h"
#include "arch.h"
-#include "orc.h"
-#include <linux/hashtable.h>
struct insn_state {
- struct cfi_reg cfa;
- struct cfi_reg regs[CFI_NUM_REGS];
- int stack_size;
- unsigned char type;
- bool bp_scratch;
- bool drap, end, uaccess, df;
+ struct cfi_state cfi;
unsigned int uaccess_stack;
- int drap_reg, drap_offset;
- struct cfi_reg vals[CFI_NUM_REGS];
+ bool uaccess;
+ bool df;
+ bool noinstr;
+ s8 instr;
};
struct instruction {
struct list_head list;
struct hlist_node hash;
+ struct list_head static_call_node;
struct section *sec;
unsigned long offset;
unsigned int len;
enum insn_type type;
unsigned long immediate;
- bool alt_group, dead_end, ignore, hint, save, restore, ignore_alts;
+ bool dead_end, ignore, ignore_alts;
+ bool hint;
bool retpoline_safe;
+ s8 instr;
u8 visited;
+ u8 ret_offset;
+ int alt_group;
struct symbol *call_dest;
struct instruction *jump_dest;
struct instruction *first_jump_src;
- struct rela *jump_table;
+ struct reloc *jump_table;
struct list_head alts;
struct symbol *func;
- struct stack_op stack_op;
- struct insn_state state;
+ struct list_head stack_ops;
+ struct cfi_state cfi;
+#ifdef INSN_USE_ORC
struct orc_entry orc;
+#endif
};
-struct objtool_file {
- struct elf *elf;
- struct list_head insn_list;
- DECLARE_HASHTABLE(insn_hash, 20);
- bool ignore_unreachables, c_file, hints, rodata;
-};
-
-int check(const char *objname, bool orc);
+static inline bool is_static_jump(struct instruction *insn)
+{
+ return insn->type == INSN_JUMP_CONDITIONAL ||
+ insn->type == INSN_JUMP_UNCONDITIONAL;
+}
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset);
@@ -68,5 +66,4 @@ struct instruction *find_insn(struct objtool_file *file,
insn->sec == sec; \
insn = list_next_entry(insn, list))
-
#endif /* _CHECK_H */
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index c4857fa3f1d1..4e1d7460574b 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -27,6 +27,22 @@ static inline u32 str_hash(const char *str)
return jhash(str, strlen(str), 0);
}
+static inline int elf_hash_bits(void)
+{
+ return vmlinux ? ELF_HASH_BITS : 16;
+}
+
+#define elf_hash_add(hashtable, node, key) \
+ hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+
+static void elf_hash_init(struct hlist_head *table)
+{
+ __hash_init(table, 1U << elf_hash_bits());
+}
+
+#define elf_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+
static void rb_add(struct rb_root *tree, struct rb_node *node,
int (*cmp)(struct rb_node *, const struct rb_node *))
{
@@ -45,7 +61,7 @@ static void rb_add(struct rb_root *tree, struct rb_node *node,
rb_insert_color(node, tree);
}
-static struct rb_node *rb_find_first(struct rb_root *tree, const void *key,
+static struct rb_node *rb_find_first(const struct rb_root *tree, const void *key,
int (*cmp)(const void *key, const struct rb_node *))
{
struct rb_node *node = tree->rb_node;
@@ -111,11 +127,11 @@ static int symbol_by_offset(const void *key, const struct rb_node *node)
return 0;
}
-struct section *find_section_by_name(struct elf *elf, const char *name)
+struct section *find_section_by_name(const struct elf *elf, const char *name)
{
struct section *sec;
- hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
+ elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
if (!strcmp(sec->name, name))
return sec;
@@ -127,7 +143,7 @@ static struct section *find_section_by_index(struct elf *elf,
{
struct section *sec;
- hash_for_each_possible(elf->section_hash, sec, hash, idx)
+ elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
if (sec->idx == idx)
return sec;
@@ -138,7 +154,7 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
{
struct symbol *sym;
- hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+ elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
if (sym->idx == idx)
return sym;
@@ -173,7 +189,7 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
return NULL;
}
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
{
struct rb_node *node;
@@ -201,37 +217,37 @@ struct symbol *find_func_containing(struct section *sec, unsigned long offset)
return NULL;
}
-struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
{
struct symbol *sym;
- hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+ elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
if (!strcmp(sym->name, name))
return sym;
return NULL;
}
-struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec,
+struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
unsigned long offset, unsigned int len)
{
- struct rela *rela, *r = NULL;
+ struct reloc *reloc, *r = NULL;
unsigned long o;
- if (!sec->rela)
+ if (!sec->reloc)
return NULL;
- sec = sec->rela;
+ sec = sec->reloc;
for_offset_range(o, offset, offset + len) {
- hash_for_each_possible(elf->rela_hash, rela, hash,
+ elf_hash_for_each_possible(elf->reloc_hash, reloc, hash,
sec_offset_hash(sec, o)) {
- if (rela->sec != sec)
+ if (reloc->sec != sec)
continue;
- if (rela->offset >= offset && rela->offset < offset + len) {
- if (!r || rela->offset < r->offset)
- r = rela;
+ if (reloc->offset >= offset && reloc->offset < offset + len) {
+ if (!r || reloc->offset < r->offset)
+ r = reloc;
}
}
if (r)
@@ -241,9 +257,9 @@ struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec,
return NULL;
}
-struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset)
+struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset)
{
- return find_rela_by_dest_range(elf, sec, offset, 1);
+ return find_reloc_by_dest_range(elf, sec, offset, 1);
}
static int read_sections(struct elf *elf)
@@ -272,7 +288,7 @@ static int read_sections(struct elf *elf)
memset(sec, 0, sizeof(*sec));
INIT_LIST_HEAD(&sec->symbol_list);
- INIT_LIST_HEAD(&sec->rela_list);
+ INIT_LIST_HEAD(&sec->reloc_list);
s = elf_getscn(elf->elf, i);
if (!s) {
@@ -309,8 +325,8 @@ static int read_sections(struct elf *elf)
sec->len = sec->sh.sh_size;
list_add_tail(&sec->list, &elf->sections);
- hash_add(elf->section_hash, &sec->hash, sec->idx);
- hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+ elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
}
if (stats)
@@ -327,12 +343,14 @@ static int read_sections(struct elf *elf)
static int read_symbols(struct elf *elf)
{
- struct section *symtab, *sec;
+ struct section *symtab, *symtab_shndx, *sec;
struct symbol *sym, *pfunc;
struct list_head *entry;
struct rb_node *pnode;
int symbols_nr, i;
char *coldstr;
+ Elf_Data *shndx_data = NULL;
+ Elf32_Word shndx;
symtab = find_section_by_name(elf, ".symtab");
if (!symtab) {
@@ -340,6 +358,10 @@ static int read_symbols(struct elf *elf)
return -1;
}
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ if (symtab_shndx)
+ shndx_data = symtab_shndx->data;
+
symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
for (i = 0; i < symbols_nr; i++) {
@@ -353,8 +375,9 @@ static int read_symbols(struct elf *elf)
sym->idx = i;
- if (!gelf_getsym(symtab->data, i, &sym->sym)) {
- WARN_ELF("gelf_getsym");
+ if (!gelf_getsymshndx(symtab->data, shndx_data, i, &sym->sym,
+ &shndx)) {
+ WARN_ELF("gelf_getsymshndx");
goto err;
}
@@ -368,10 +391,13 @@ static int read_symbols(struct elf *elf)
sym->type = GELF_ST_TYPE(sym->sym.st_info);
sym->bind = GELF_ST_BIND(sym->sym.st_info);
- if (sym->sym.st_shndx > SHN_UNDEF &&
- sym->sym.st_shndx < SHN_LORESERVE) {
- sym->sec = find_section_by_index(elf,
- sym->sym.st_shndx);
+ if ((sym->sym.st_shndx > SHN_UNDEF &&
+ sym->sym.st_shndx < SHN_LORESERVE) ||
+ (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
+ if (sym->sym.st_shndx != SHN_XINDEX)
+ shndx = sym->sym.st_shndx;
+
+ sym->sec = find_section_by_index(elf, shndx);
if (!sym->sec) {
WARN("couldn't find section for symbol %s",
sym->name);
@@ -394,8 +420,8 @@ static int read_symbols(struct elf *elf)
else
entry = &sym->sec->symbol_list;
list_add(&sym->list, entry);
- hash_add(elf->symbol_hash, &sym->hash, sym->idx);
- hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+ elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+ elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
}
if (stats)
@@ -408,7 +434,13 @@ static int read_symbols(struct elf *elf)
size_t pnamelen;
if (sym->type != STT_FUNC)
continue;
- sym->pfunc = sym->cfunc = sym;
+
+ if (sym->pfunc == NULL)
+ sym->pfunc = sym;
+
+ if (sym->cfunc == NULL)
+ sym->cfunc = sym;
+
coldstr = strstr(sym->name, ".cold");
if (!coldstr)
continue;
@@ -456,70 +488,107 @@ err:
return -1;
}
-static int read_relas(struct elf *elf)
+void elf_add_reloc(struct elf *elf, struct reloc *reloc)
+{
+ struct section *sec = reloc->sec;
+
+ list_add_tail(&reloc->list, &sec->reloc_list);
+ elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+}
+
+static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
+{
+ if (!gelf_getrel(sec->data, i, &reloc->rel)) {
+ WARN_ELF("gelf_getrel");
+ return -1;
+ }
+ reloc->type = GELF_R_TYPE(reloc->rel.r_info);
+ reloc->addend = 0;
+ reloc->offset = reloc->rel.r_offset;
+ *symndx = GELF_R_SYM(reloc->rel.r_info);
+ return 0;
+}
+
+static int read_rela_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
+{
+ if (!gelf_getrela(sec->data, i, &reloc->rela)) {
+ WARN_ELF("gelf_getrela");
+ return -1;
+ }
+ reloc->type = GELF_R_TYPE(reloc->rela.r_info);
+ reloc->addend = reloc->rela.r_addend;
+ reloc->offset = reloc->rela.r_offset;
+ *symndx = GELF_R_SYM(reloc->rela.r_info);
+ return 0;
+}
+
+static int read_relocs(struct elf *elf)
{
struct section *sec;
- struct rela *rela;
+ struct reloc *reloc;
int i;
unsigned int symndx;
- unsigned long nr_rela, max_rela = 0, tot_rela = 0;
+ unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
list_for_each_entry(sec, &elf->sections, list) {
- if (sec->sh.sh_type != SHT_RELA)
+ if ((sec->sh.sh_type != SHT_RELA) &&
+ (sec->sh.sh_type != SHT_REL))
continue;
- sec->base = find_section_by_name(elf, sec->name + 5);
+ sec->base = find_section_by_index(elf, sec->sh.sh_info);
if (!sec->base) {
- WARN("can't find base section for rela section %s",
+ WARN("can't find base section for reloc section %s",
sec->name);
return -1;
}
- sec->base->rela = sec;
+ sec->base->reloc = sec;
- nr_rela = 0;
+ nr_reloc = 0;
for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) {
- rela = malloc(sizeof(*rela));
- if (!rela) {
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
perror("malloc");
return -1;
}
- memset(rela, 0, sizeof(*rela));
-
- if (!gelf_getrela(sec->data, i, &rela->rela)) {
- WARN_ELF("gelf_getrela");
- return -1;
+ memset(reloc, 0, sizeof(*reloc));
+ switch (sec->sh.sh_type) {
+ case SHT_REL:
+ if (read_rel_reloc(sec, i, reloc, &symndx))
+ return -1;
+ break;
+ case SHT_RELA:
+ if (read_rela_reloc(sec, i, reloc, &symndx))
+ return -1;
+ break;
+ default: return -1;
}
- rela->type = GELF_R_TYPE(rela->rela.r_info);
- rela->addend = rela->rela.r_addend;
- rela->offset = rela->rela.r_offset;
- symndx = GELF_R_SYM(rela->rela.r_info);
- rela->sym = find_symbol_by_index(elf, symndx);
- rela->sec = sec;
- if (!rela->sym) {
- WARN("can't find rela entry symbol %d for %s",
+ reloc->sec = sec;
+ reloc->idx = i;
+ reloc->sym = find_symbol_by_index(elf, symndx);
+ if (!reloc->sym) {
+ WARN("can't find reloc entry symbol %d for %s",
symndx, sec->name);
return -1;
}
- list_add_tail(&rela->list, &sec->rela_list);
- hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
- nr_rela++;
+ elf_add_reloc(elf, reloc);
+ nr_reloc++;
}
- max_rela = max(max_rela, nr_rela);
- tot_rela += nr_rela;
+ max_reloc = max(max_reloc, nr_reloc);
+ tot_reloc += nr_reloc;
}
if (stats) {
- printf("max_rela: %lu\n", max_rela);
- printf("tot_rela: %lu\n", tot_rela);
+ printf("max_reloc: %lu\n", max_reloc);
+ printf("tot_reloc: %lu\n", tot_reloc);
}
return 0;
}
-struct elf *elf_read(const char *name, int flags)
+struct elf *elf_open_read(const char *name, int flags)
{
struct elf *elf;
Elf_Cmd cmd;
@@ -531,15 +600,16 @@ struct elf *elf_read(const char *name, int flags)
perror("malloc");
return NULL;
}
- memset(elf, 0, sizeof(*elf));
+ memset(elf, 0, offsetof(struct elf, sections));
- hash_init(elf->symbol_hash);
- hash_init(elf->symbol_name_hash);
- hash_init(elf->section_hash);
- hash_init(elf->section_name_hash);
- hash_init(elf->rela_hash);
INIT_LIST_HEAD(&elf->sections);
+ elf_hash_init(elf->symbol_hash);
+ elf_hash_init(elf->symbol_name_hash);
+ elf_hash_init(elf->section_hash);
+ elf_hash_init(elf->section_name_hash);
+ elf_hash_init(elf->reloc_hash);
+
elf->fd = open(name, flags);
if (elf->fd == -1) {
fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -571,7 +641,7 @@ struct elf *elf_read(const char *name, int flags)
if (read_symbols(elf))
goto err;
- if (read_relas(elf))
+ if (read_relocs(elf))
goto err;
return elf;
@@ -582,7 +652,7 @@ err:
}
struct section *elf_create_section(struct elf *elf, const char *name,
- size_t entsize, int nr)
+ unsigned int sh_flags, size_t entsize, int nr)
{
struct section *sec, *shstrtab;
size_t size = entsize * nr;
@@ -597,7 +667,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
memset(sec, 0, sizeof(*sec));
INIT_LIST_HEAD(&sec->symbol_list);
- INIT_LIST_HEAD(&sec->rela_list);
+ INIT_LIST_HEAD(&sec->reloc_list);
s = elf_newscn(elf->elf);
if (!s) {
@@ -642,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->sh.sh_entsize = entsize;
sec->sh.sh_type = SHT_PROGBITS;
sec->sh.sh_addralign = 1;
- sec->sh.sh_flags = SHF_ALLOC;
+ sec->sh.sh_flags = SHF_ALLOC | sh_flags;
/* Add section name to .shstrtab (or .strtab for Clang) */
@@ -676,31 +746,63 @@ struct section *elf_create_section(struct elf *elf, const char *name,
shstrtab->changed = true;
list_add_tail(&sec->list, &elf->sections);
- hash_add(elf->section_hash, &sec->hash, sec->idx);
- hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+ elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+ elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+
+ elf->changed = true;
return sec;
}
-struct section *elf_create_rela_section(struct elf *elf, struct section *base)
+static struct section *elf_create_rel_reloc_section(struct elf *elf, struct section *base)
{
- char *relaname;
+ char *relocname;
struct section *sec;
- relaname = malloc(strlen(base->name) + strlen(".rela") + 1);
- if (!relaname) {
+ relocname = malloc(strlen(base->name) + strlen(".rel") + 1);
+ if (!relocname) {
perror("malloc");
return NULL;
}
- strcpy(relaname, ".rela");
- strcat(relaname, base->name);
+ strcpy(relocname, ".rel");
+ strcat(relocname, base->name);
- sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0);
- free(relaname);
+ sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0);
+ free(relocname);
if (!sec)
return NULL;
- base->rela = sec;
+ base->reloc = sec;
+ sec->base = base;
+
+ sec->sh.sh_type = SHT_REL;
+ sec->sh.sh_addralign = 8;
+ sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
+ sec->sh.sh_info = base->idx;
+ sec->sh.sh_flags = SHF_INFO_LINK;
+
+ return sec;
+}
+
+static struct section *elf_create_rela_reloc_section(struct elf *elf, struct section *base)
+{
+ char *relocname;
+ struct section *sec;
+
+ relocname = malloc(strlen(base->name) + strlen(".rela") + 1);
+ if (!relocname) {
+ perror("malloc");
+ return NULL;
+ }
+ strcpy(relocname, ".rela");
+ strcat(relocname, base->name);
+
+ sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
+ free(relocname);
+ if (!sec)
+ return NULL;
+
+ base->reloc = sec;
sec->base = base;
sec->sh.sh_type = SHT_RELA;
@@ -712,39 +814,142 @@ struct section *elf_create_rela_section(struct elf *elf, struct section *base)
return sec;
}
-int elf_rebuild_rela_section(struct section *sec)
+struct section *elf_create_reloc_section(struct elf *elf,
+ struct section *base,
+ int reltype)
{
- struct rela *rela;
- int nr, idx = 0, size;
- GElf_Rela *relas;
+ switch (reltype) {
+ case SHT_REL: return elf_create_rel_reloc_section(elf, base);
+ case SHT_RELA: return elf_create_rela_reloc_section(elf, base);
+ default: return NULL;
+ }
+}
- nr = 0;
- list_for_each_entry(rela, &sec->rela_list, list)
- nr++;
+static int elf_rebuild_rel_reloc_section(struct section *sec, int nr)
+{
+ struct reloc *reloc;
+ int idx = 0, size;
+ GElf_Rel *relocs;
+
+ /* Allocate a buffer for relocations */
+ size = nr * sizeof(*relocs);
+ relocs = malloc(size);
+ if (!relocs) {
+ perror("malloc");
+ return -1;
+ }
+
+ sec->data->d_buf = relocs;
+ sec->data->d_size = size;
+
+ sec->sh.sh_size = size;
+
+ idx = 0;
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ relocs[idx].r_offset = reloc->offset;
+ relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ idx++;
+ }
- size = nr * sizeof(*relas);
- relas = malloc(size);
- if (!relas) {
+ return 0;
+}
+
+static int elf_rebuild_rela_reloc_section(struct section *sec, int nr)
+{
+ struct reloc *reloc;
+ int idx = 0, size;
+ GElf_Rela *relocs;
+
+ /* Allocate a buffer for relocations with addends */
+ size = nr * sizeof(*relocs);
+ relocs = malloc(size);
+ if (!relocs) {
perror("malloc");
return -1;
}
- sec->data->d_buf = relas;
+ sec->data->d_buf = relocs;
sec->data->d_size = size;
sec->sh.sh_size = size;
idx = 0;
- list_for_each_entry(rela, &sec->rela_list, list) {
- relas[idx].r_offset = rela->offset;
- relas[idx].r_addend = rela->addend;
- relas[idx].r_info = GELF_R_INFO(rela->sym->idx, rela->type);
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ relocs[idx].r_offset = reloc->offset;
+ relocs[idx].r_addend = reloc->addend;
+ relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
idx++;
}
return 0;
}
+int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
+{
+ struct reloc *reloc;
+ int nr;
+
+ sec->changed = true;
+ elf->changed = true;
+
+ nr = 0;
+ list_for_each_entry(reloc, &sec->reloc_list, list)
+ nr++;
+
+ switch (sec->sh.sh_type) {
+ case SHT_REL: return elf_rebuild_rel_reloc_section(sec, nr);
+ case SHT_RELA: return elf_rebuild_rela_reloc_section(sec, nr);
+ default: return -1;
+ }
+}
+
+int elf_write_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int len,
+ const char *insn)
+{
+ Elf_Data *data = sec->data;
+
+ if (data->d_type != ELF_T_BYTE || data->d_off) {
+ WARN("write to unexpected data for section: %s", sec->name);
+ return -1;
+ }
+
+ memcpy(data->d_buf + offset, insn, len);
+ elf_flagdata(data, ELF_C_SET, ELF_F_DIRTY);
+
+ elf->changed = true;
+
+ return 0;
+}
+
+int elf_write_reloc(struct elf *elf, struct reloc *reloc)
+{
+ struct section *sec = reloc->sec;
+
+ if (sec->sh.sh_type == SHT_REL) {
+ reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ reloc->rel.r_offset = reloc->offset;
+
+ if (!gelf_update_rel(sec->data, reloc->idx, &reloc->rel)) {
+ WARN_ELF("gelf_update_rel");
+ return -1;
+ }
+ } else {
+ reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+ reloc->rela.r_addend = reloc->addend;
+ reloc->rela.r_offset = reloc->offset;
+
+ if (!gelf_update_rela(sec->data, reloc->idx, &reloc->rela)) {
+ WARN_ELF("gelf_update_rela");
+ return -1;
+ }
+ }
+
+ elf->changed = true;
+
+ return 0;
+}
+
int elf_write(struct elf *elf)
{
struct section *sec;
@@ -762,6 +967,8 @@ int elf_write(struct elf *elf)
WARN_ELF("gelf_update_shdr");
return -1;
}
+
+ sec->changed = false;
}
}
@@ -774,6 +981,8 @@ int elf_write(struct elf *elf)
return -1;
}
+ elf->changed = false;
+
return 0;
}
@@ -781,7 +990,7 @@ void elf_close(struct elf *elf)
{
struct section *sec, *tmpsec;
struct symbol *sym, *tmpsym;
- struct rela *rela, *tmprela;
+ struct reloc *reloc, *tmpreloc;
if (elf->elf)
elf_end(elf->elf);
@@ -795,10 +1004,10 @@ void elf_close(struct elf *elf)
hash_del(&sym->hash);
free(sym);
}
- list_for_each_entry_safe(rela, tmprela, &sec->rela_list, list) {
- list_del(&rela->list);
- hash_del(&rela->hash);
- free(rela);
+ list_for_each_entry_safe(reloc, tmpreloc, &sec->reloc_list, list) {
+ list_del(&reloc->list);
+ hash_del(&reloc->hash);
+ free(reloc);
}
list_del(&sec->list);
free(sec);
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 12e01ac190ec..807f8c670097 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -32,14 +32,14 @@ struct section {
GElf_Shdr sh;
struct rb_root symbol_tree;
struct list_head symbol_list;
- struct list_head rela_list;
- struct section *base, *rela;
+ struct list_head reloc_list;
+ struct section *base, *reloc;
struct symbol *sym;
Elf_Data *data;
char *name;
int idx;
unsigned int len;
- bool changed, text, rodata;
+ bool changed, text, rodata, noinstr;
};
struct symbol {
@@ -56,31 +56,39 @@ struct symbol {
unsigned int len;
struct symbol *pfunc, *cfunc, *alias;
bool uaccess_safe;
+ bool static_call_tramp;
};
-struct rela {
+struct reloc {
struct list_head list;
struct hlist_node hash;
- GElf_Rela rela;
+ union {
+ GElf_Rela rela;
+ GElf_Rel rel;
+ };
struct section *sec;
struct symbol *sym;
- unsigned int type;
unsigned long offset;
+ unsigned int type;
int addend;
+ int idx;
bool jump_table_start;
};
+#define ELF_HASH_BITS 20
+
struct elf {
Elf *elf;
GElf_Ehdr ehdr;
int fd;
+ bool changed;
char *name;
struct list_head sections;
- DECLARE_HASHTABLE(symbol_hash, 20);
- DECLARE_HASHTABLE(symbol_name_hash, 20);
- DECLARE_HASHTABLE(section_hash, 16);
- DECLARE_HASHTABLE(section_name_hash, 16);
- DECLARE_HASHTABLE(rela_hash, 20);
+ DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS);
+ DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS);
+ DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS);
+ DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS);
+ DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS);
};
#define OFFSET_STRIDE_BITS 4
@@ -107,27 +115,32 @@ static inline u32 sec_offset_hash(struct section *sec, unsigned long offset)
return ol;
}
-static inline u32 rela_hash(struct rela *rela)
+static inline u32 reloc_hash(struct reloc *reloc)
{
- return sec_offset_hash(rela->sec, rela->offset);
+ return sec_offset_hash(reloc->sec, reloc->offset);
}
-struct elf *elf_read(const char *name, int flags);
-struct section *find_section_by_name(struct elf *elf, const char *name);
+struct elf *elf_open_read(const char *name, int flags);
+struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
+struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
+void elf_add_reloc(struct elf *elf, struct reloc *reloc);
+int elf_write_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int len,
+ const char *insn);
+int elf_write_reloc(struct elf *elf, struct reloc *reloc);
+int elf_write(struct elf *elf);
+void elf_close(struct elf *elf);
+
+struct section *find_section_by_name(const struct elf *elf, const char *name);
struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
-struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec,
+struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
+struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
+struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
unsigned long offset, unsigned int len);
struct symbol *find_func_containing(struct section *sec, unsigned long offset);
-struct section *elf_create_section(struct elf *elf, const char *name, size_t
- entsize, int nr);
-struct section *elf_create_rela_section(struct elf *elf, struct section *base);
-int elf_rebuild_rela_section(struct section *sec);
-int elf_write(struct elf *elf);
-void elf_close(struct elf *elf);
+int elf_rebuild_reloc_section(struct elf *elf, struct section *sec);
#define for_each_sec(file, sec) \
list_for_each_entry(sec, &file->elf->sections, list)
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 0b3528f05053..9df0cd86d310 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -22,6 +22,8 @@
#include <linux/kernel.h>
#include "builtin.h"
+#include "objtool.h"
+#include "warn.h"
struct cmd_struct {
const char *name;
@@ -39,6 +41,34 @@ static struct cmd_struct objtool_cmds[] = {
bool help;
+const char *objname;
+static struct objtool_file file;
+
+struct objtool_file *objtool_open_read(const char *_objname)
+{
+ if (objname) {
+ if (strcmp(objname, _objname)) {
+ WARN("won't handle more than one file at a time");
+ return NULL;
+ }
+ return &file;
+ }
+ objname = _objname;
+
+ file.elf = elf_open_read(objname, O_RDWR);
+ if (!file.elf)
+ return NULL;
+
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
+ INIT_LIST_HEAD(&file.static_call_list);
+ file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
+ file.ignore_unreachables = no_unreachable;
+ file.hints = false;
+
+ return &file;
+}
+
static void cmd_usage(void)
{
unsigned int i, longest = 0;
@@ -58,7 +88,9 @@ static void cmd_usage(void)
printf("\n");
- exit(129);
+ if (!help)
+ exit(129);
+ exit(0);
}
static void handle_options(int *argc, const char ***argv)
diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h
new file mode 100644
index 000000000000..4125d4578b23
--- /dev/null
+++ b/tools/objtool/objtool.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 Matt Helsley <mhelsley@vmware.com>
+ */
+
+#ifndef _OBJTOOL_H
+#define _OBJTOOL_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+#include <linux/hashtable.h>
+
+#include "elf.h"
+
+#define __weak __attribute__((weak))
+
+struct objtool_file {
+ struct elf *elf;
+ struct list_head insn_list;
+ DECLARE_HASHTABLE(insn_hash, 20);
+ struct list_head static_call_list;
+ bool ignore_unreachables, c_file, hints, rodata;
+};
+
+struct objtool_file *objtool_open_read(const char *_objname);
+
+int check(struct objtool_file *file);
+int orc_dump(const char *objname);
+int create_orc(struct objtool_file *file);
+int create_orc_sections(struct objtool_file *file);
+
+#endif /* _OBJTOOL_H */
diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h
deleted file mode 100644
index ee2832221e62..000000000000
--- a/tools/objtool/orc.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- */
-
-#ifndef _ORC_H
-#define _ORC_H
-
-#include <asm/orc_types.h>
-
-struct objtool_file;
-
-int create_orc(struct objtool_file *file);
-int create_orc_sections(struct objtool_file *file);
-
-int orc_dump(const char *objname);
-
-#endif /* _ORC_H */
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index ba4cbb1cdd63..5e6a95368d35 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -4,7 +4,9 @@
*/
#include <unistd.h>
-#include "orc.h"
+#include <linux/objtool.h>
+#include <asm/orc_types.h>
+#include "objtool.h"
#include "warn.h"
static const char *reg_name(unsigned int reg)
@@ -36,12 +38,12 @@ static const char *reg_name(unsigned int reg)
static const char *orc_type_name(unsigned int type)
{
switch (type) {
- case ORC_TYPE_CALL:
+ case UNWIND_HINT_TYPE_CALL:
return "call";
- case ORC_TYPE_REGS:
+ case UNWIND_HINT_TYPE_REGS:
return "regs";
- case ORC_TYPE_REGS_IRET:
- return "iret";
+ case UNWIND_HINT_TYPE_REGS_PARTIAL:
+ return "regs (partial)";
default:
return "?";
}
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 4c0dabd28000..235663b96adc 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -6,7 +6,9 @@
#include <stdlib.h>
#include <string.h>
-#include "orc.h"
+#include <linux/objtool.h>
+#include <asm/orc_types.h>
+
#include "check.h"
#include "warn.h"
@@ -16,10 +18,13 @@ int create_orc(struct objtool_file *file)
for_each_insn(file, insn) {
struct orc_entry *orc = &insn->orc;
- struct cfi_reg *cfa = &insn->state.cfa;
- struct cfi_reg *bp = &insn->state.regs[CFI_BP];
+ struct cfi_reg *cfa = &insn->cfi.cfa;
+ struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
+
+ if (!insn->sec->text)
+ continue;
- orc->end = insn->state.end;
+ orc->end = insn->cfi.end;
if (cfa->base == CFI_UNDEFINED) {
orc->sp_reg = ORC_REG_UNDEFINED;
@@ -75,63 +80,62 @@ int create_orc(struct objtool_file *file)
orc->sp_offset = cfa->offset;
orc->bp_offset = bp->offset;
- orc->type = insn->state.type;
+ orc->type = insn->cfi.type;
}
return 0;
}
-static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relasec,
+static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec,
unsigned int idx, struct section *insn_sec,
unsigned long insn_off, struct orc_entry *o)
{
struct orc_entry *orc;
- struct rela *rela;
+ struct reloc *reloc;
/* populate ORC data */
orc = (struct orc_entry *)u_sec->data->d_buf + idx;
memcpy(orc, o, sizeof(*orc));
- /* populate rela for ip */
- rela = malloc(sizeof(*rela));
- if (!rela) {
+ /* populate reloc for ip */
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
perror("malloc");
return -1;
}
- memset(rela, 0, sizeof(*rela));
+ memset(reloc, 0, sizeof(*reloc));
if (insn_sec->sym) {
- rela->sym = insn_sec->sym;
- rela->addend = insn_off;
+ reloc->sym = insn_sec->sym;
+ reloc->addend = insn_off;
} else {
/*
* The Clang assembler doesn't produce section symbols, so we
* have to reference the function symbol instead:
*/
- rela->sym = find_symbol_containing(insn_sec, insn_off);
- if (!rela->sym) {
+ reloc->sym = find_symbol_containing(insn_sec, insn_off);
+ if (!reloc->sym) {
/*
* Hack alert. This happens when we need to reference
* the NOP pad insn immediately after the function.
*/
- rela->sym = find_symbol_containing(insn_sec,
+ reloc->sym = find_symbol_containing(insn_sec,
insn_off - 1);
}
- if (!rela->sym) {
+ if (!reloc->sym) {
WARN("missing symbol for insn at offset 0x%lx\n",
insn_off);
return -1;
}
- rela->addend = insn_off - rela->sym->offset;
+ reloc->addend = insn_off - reloc->sym->offset;
}
- rela->type = R_X86_64_PC32;
- rela->offset = idx * sizeof(int);
- rela->sec = ip_relasec;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(int);
+ reloc->sec = ip_relocsec;
- list_add_tail(&rela->list, &ip_relasec->rela_list);
- hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+ elf_add_reloc(elf, reloc);
return 0;
}
@@ -139,13 +143,13 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti
int create_orc_sections(struct objtool_file *file)
{
struct instruction *insn, *prev_insn;
- struct section *sec, *u_sec, *ip_relasec;
+ struct section *sec, *u_sec, *ip_relocsec;
unsigned int idx;
struct orc_entry empty = {
.sp_reg = ORC_REG_UNDEFINED,
.bp_reg = ORC_REG_UNDEFINED,
- .type = ORC_TYPE_CALL,
+ .type = UNWIND_HINT_TYPE_CALL,
};
sec = find_section_by_name(file->elf, ".orc_unwind");
@@ -179,16 +183,16 @@ int create_orc_sections(struct objtool_file *file)
/* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
- sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
if (!sec)
return -1;
- ip_relasec = elf_create_rela_section(file->elf, sec);
- if (!ip_relasec)
+ ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+ if (!ip_relocsec)
return -1;
/* create .orc_unwind section */
- u_sec = elf_create_section(file->elf, ".orc_unwind",
+ u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
sizeof(struct orc_entry), idx);
/* populate sections */
@@ -202,7 +206,7 @@ int create_orc_sections(struct objtool_file *file)
if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
sizeof(struct orc_entry))) {
- if (create_orc_entry(file->elf, u_sec, ip_relasec, idx,
+ if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
insn->sec, insn->offset,
&insn->orc))
return -1;
@@ -214,7 +218,7 @@ int create_orc_sections(struct objtool_file *file)
/* section terminator */
if (prev_insn) {
- if (create_orc_entry(file->elf, u_sec, ip_relasec, idx,
+ if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
prev_insn->sec,
prev_insn->offset + prev_insn->len,
&empty))
@@ -224,7 +228,7 @@ int create_orc_sections(struct objtool_file *file)
}
}
- if (elf_rebuild_rela_section(ip_relasec))
+ if (elf_rebuild_reloc_section(file->elf, ip_relocsec))
return -1;
return 0;
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index e74e0189de22..1a2420febd08 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -14,24 +14,7 @@
#include "builtin.h"
#include "special.h"
#include "warn.h"
-
-#define EX_ENTRY_SIZE 12
-#define EX_ORIG_OFFSET 0
-#define EX_NEW_OFFSET 4
-
-#define JUMP_ENTRY_SIZE 16
-#define JUMP_ORIG_OFFSET 0
-#define JUMP_NEW_OFFSET 4
-
-#define ALT_ENTRY_SIZE 13
-#define ALT_ORIG_OFFSET 0
-#define ALT_NEW_OFFSET 4
-#define ALT_FEATURE_OFFSET 8
-#define ALT_ORIG_LEN_OFFSET 10
-#define ALT_NEW_LEN_OFFSET 11
-
-#define X86_FEATURE_POPCNT (4*32+23)
-#define X86_FEATURE_SMAP (9*32+20)
+#include "arch_special.h"
struct special_entry {
const char *sec;
@@ -68,11 +51,15 @@ struct special_entry entries[] = {
{},
};
+void __weak arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+{
+}
+
static int get_alt_entry(struct elf *elf, struct special_entry *entry,
struct section *sec, int idx,
struct special_alt *alt)
{
- struct rela *orig_rela, *new_rela;
+ struct reloc *orig_reloc, *new_reloc;
unsigned long offset;
offset = idx * entry->size;
@@ -92,56 +79,33 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
feature = *(unsigned short *)(sec->data->d_buf + offset +
entry->feature);
-
- /*
- * It has been requested that we don't validate the !POPCNT
- * feature path which is a "very very small percentage of
- * machines".
- */
- if (feature == X86_FEATURE_POPCNT)
- alt->skip_orig = true;
-
- /*
- * If UACCESS validation is enabled; force that alternative;
- * otherwise force it the other way.
- *
- * What we want to avoid is having both the original and the
- * alternative code flow at the same time, in that case we can
- * find paths that see the STAC but take the NOP instead of
- * CLAC and the other way around.
- */
- if (feature == X86_FEATURE_SMAP) {
- if (uaccess)
- alt->skip_orig = true;
- else
- alt->skip_alt = true;
- }
+ arch_handle_alternative(feature, alt);
}
- orig_rela = find_rela_by_dest(elf, sec, offset + entry->orig);
- if (!orig_rela) {
- WARN_FUNC("can't find orig rela", sec, offset + entry->orig);
+ orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
+ if (!orig_reloc) {
+ WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
return -1;
}
- if (orig_rela->sym->type != STT_SECTION) {
- WARN_FUNC("don't know how to handle non-section rela symbol %s",
- sec, offset + entry->orig, orig_rela->sym->name);
+ if (orig_reloc->sym->type != STT_SECTION) {
+ WARN_FUNC("don't know how to handle non-section reloc symbol %s",
+ sec, offset + entry->orig, orig_reloc->sym->name);
return -1;
}
- alt->orig_sec = orig_rela->sym->sec;
- alt->orig_off = orig_rela->addend;
+ alt->orig_sec = orig_reloc->sym->sec;
+ alt->orig_off = orig_reloc->addend;
if (!entry->group || alt->new_len) {
- new_rela = find_rela_by_dest(elf, sec, offset + entry->new);
- if (!new_rela) {
- WARN_FUNC("can't find new rela",
+ new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new);
+ if (!new_reloc) {
+ WARN_FUNC("can't find new reloc",
sec, offset + entry->new);
return -1;
}
- alt->new_sec = new_rela->sym->sec;
- alt->new_off = (unsigned int)new_rela->addend;
+ alt->new_sec = new_reloc->sym->sec;
+ alt->new_off = (unsigned int)new_reloc->addend;
/* _ASM_EXTABLE_EX hack */
if (alt->new_off >= 0x7ffffff0)
diff --git a/tools/objtool/special.h b/tools/objtool/special.h
index 35061530e46e..abddf38ef334 100644
--- a/tools/objtool/special.h
+++ b/tools/objtool/special.h
@@ -7,8 +7,11 @@
#define _SPECIAL_H
#include <stdbool.h>
+#include "check.h"
#include "elf.h"
+#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
+
struct special_alt {
struct list_head list;
@@ -28,4 +31,11 @@ struct special_alt {
int special_get_alts(struct elf *elf, struct list_head *alts);
+void arch_handle_alternative(unsigned short feature, struct special_alt *alt);
+
+bool arch_support_alt_relocation(struct special_alt *special_alt,
+ struct instruction *insn,
+ struct reloc *reloc);
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+ struct instruction *insn);
#endif /* _SPECIAL_H */
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 2a1261bfbb62..606a4b5e929f 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -1,13 +1,27 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-FILES='
+if [ -z "$SRCARCH" ]; then
+ echo 'sync-check.sh: error: missing $SRCARCH environment variable' >&2
+ exit 1
+fi
+
+FILES="include/linux/objtool.h"
+
+if [ "$SRCARCH" = "x86" ]; then
+FILES="$FILES
arch/x86/include/asm/inat_types.h
arch/x86/include/asm/orc_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
-'
+include/linux/static_call_types.h
+arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]'
+arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]'
+arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]'
+arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]'
+"
+fi
check_2 () {
file1=$1
@@ -40,11 +54,12 @@ fi
cd ../..
-for i in $FILES; do
- check $i
-done
+while read -r file_entry; do
+ if [ -z "$file_entry" ]; then
+ continue
+ fi
-check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
-check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
-check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
-check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
+ check $file_entry
+done <<EOF
+$FILES
+EOF
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
new file mode 100644
index 000000000000..7843e9a7a72f
--- /dev/null
+++ b/tools/objtool/weak.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Matt Helsley <mhelsley@vmware.com>
+ * Weak definitions necessary to compile objtool without
+ * some subcommands (e.g. check, orc).
+ */
+
+#include <stdbool.h>
+#include <errno.h>
+#include "objtool.h"
+
+#define UNSUPPORTED(name) \
+({ \
+ fprintf(stderr, "error: objtool: " name " not implemented\n"); \
+ return ENOSYS; \
+})
+
+int __weak check(struct objtool_file *file)
+{
+ UNSUPPORTED("check subcommand");
+}
+
+int __weak orc_dump(const char *_objname)
+{
+ UNSUPPORTED("orc");
+}
+
+int __weak create_orc(struct objtool_file *file)
+{
+ UNSUPPORTED("orc");
+}
+
+int __weak create_orc_sections(struct objtool_file *file)
+{
+ UNSUPPORTED("orc");
+}
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 31824d5269cc..6e54979c2124 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -48,7 +48,7 @@ man5dir=$(mandir)/man5
man7dir=$(mandir)/man7
ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
+ASCIIDOC_EXTRA += --unsafe -f asciidoc.conf
ASCIIDOC_HTML = xhtml11
MANPAGE_XSL = manpage-normal.xsl
XMLTO_EXTRA =
@@ -59,7 +59,7 @@ HTML_REF = origin/html
ifdef USE_ASCIIDOCTOR
ASCIIDOC = asciidoctor
-ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -a compat-mode
ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual"
ASCIIDOC_HTML = xhtml5
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 82ff7dad40c2..d3740c8f399b 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -1,5 +1,5 @@
i synthesize instructions events
- b synthesize branches events
+ b synthesize branches events (branch misses for Arm SPE)
c synthesize branches events (calls only)
r synthesize branches events (returns only)
x synthesize transactions events
@@ -9,9 +9,16 @@
of aux-output (refer to perf record)
e synthesize error events
d create a debug log
+ f synthesize first level cache events
+ m synthesize last level cache events
+ t synthesize TLB events
+ a synthesize remote access events
g synthesize a call chain (use with i or x)
+ G synthesize a call chain on existing event records
l synthesize last branch entries (use with i or x)
+ L synthesize last branch entries on existing event records
s skip initial number of events
+ q quicker (less detailed) decoding
The default is all events i.e. the same as --itrace=ibxwpe,
except for perf script where it is --itrace=ce
@@ -31,9 +38,26 @@
Also the number of last branch entries (default 64, max. 1024) for
instructions or transactions events can be specified.
+ Similar to options g and l, size may also be specified for options G and L.
+ On x86, note that G and L work poorly when data has been recorded with
+ large PEBS. Refer linkperf:perf-intel-pt[1] man page for details.
+
It is also possible to skip events generated (instructions, branches, transactions,
ptwrite, power) at the beginning. This is useful to ignore initialization code.
--itrace=i0nss1000000
skips the first million instructions.
+
+ The 'e' option may be followed by flags which affect what errors will or
+ will not be reported. Each flag must be preceded by either '+' or '-'.
+ The flags are:
+ o overflow
+ l trace data lost
+
+ If supported, the 'd' option may be followed by flags which affect what
+ debug messages will or will not be logged. Each flag must be preceded
+ by either '+' or '-'. The flags are:
+ a all perf events
+
+ If supported, the 'q' option may be repeated to increase the effect.
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 0921a3c67381..a0529c7fa5ef 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -49,6 +49,9 @@ SUBSYSTEM
'sched'::
Scheduler and IPC mechanisms.
+'syscall'::
+ System call performance (throughput).
+
'mem'::
Memory access performance.
@@ -61,6 +64,9 @@ SUBSYSTEM
'epoll'::
Eventpoll (epoll) stressing benchmarks.
+'internals'::
+ Benchmark internal perf functionality.
+
'all'::
All benchmark subsystems.
@@ -134,6 +140,14 @@ Example of *pipe*
59004 ops/sec
---------------------
+SUITES FOR 'syscall'
+~~~~~~~~~~~~~~~~~~
+*basic*::
+Suite for evaluating performance of core system call throughput (both usecs/op and ops/sec metrics).
+This uses a single thread simply doing getppid(2), which is a simple syscall where the result is not
+cached by glibc.
+
+
SUITES FOR 'mem'
~~~~~~~~~~~~~~~~
*memcpy*::
@@ -214,6 +228,11 @@ Suite for evaluating concurrent epoll_wait calls.
*ctl*::
Suite for evaluating multiple epoll_ctl calls.
+SUITES FOR 'internals'
+~~~~~~~~~~~~~~~~~~~~~~
+*synthesize*::
+Suite for evaluating perf's event synthesis performance.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index e6150f21267d..c81d72e3eecf 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -40,7 +40,7 @@ RECORD OPTIONS
--------------
-e::
--event=::
- Select the PMU event. Use 'perf mem record -e list'
+ Select the PMU event. Use 'perf c2c record -e list'
to list available events.
-v::
@@ -111,6 +111,17 @@ REPORT OPTIONS
--display::
Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The perf.data file must have been obtained using
+ perf c2c record --call-graph lbr.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
C2C RECORD
----------
The perf c2c record command setup options related to HITM cacheline analysis
@@ -163,34 +174,36 @@ For each cacheline in the 1) list we display following data:
Cacheline
- cacheline address (hex number)
- Total records
- - sum of all cachelines accesses
-
Rmt/Lcl Hitm
- cacheline percentage of all Remote/Local HITM accesses
- LLC Load Hitm - Total, Lcl, Rmt
+ LLC Load Hitm - Total, LclHitm, RmtHitm
- count of Total/Local/Remote load HITMs
- Store Reference - Total, L1Hit, L1Miss
- Total - all store accesses
- L1Hit - store accesses that hit L1
- L1Hit - store accesses that missed L1
+ Total records
+ - sum of all cachelines accesses
- Load Dram
- - count of local and remote DRAM accesses
+ Total loads
+ - sum of all load accesses
- LLC Ld Miss
- - count of all accesses that missed LLC
+ Total stores
+ - sum of all store accesses
- Total Loads
- - sum of all load accesses
+ Store Reference - L1Hit, L1Miss
+ L1Hit - store accesses that hit L1
+ L1Miss - store accesses that missed L1
Core Load Hit - FB, L1, L2
- count of load hits in FB (Fill Buffer), L1 and L2 cache
- LLC Load Hit - Llc, Rmt
- - count of LLC and Remote load hits
+ LLC Load Hit - LlcHit, LclHitm
+ - count of LLC load accesses, includes LLC hits and LLC HITMs
+
+ RMT Load Hit - RmtHit, RmtHitm
+ - count of remote load accesses, includes remote hits and remote HITMs
+
+ Load Dram - Lcl, Rmt
+ - count of local and remote DRAM accesses
For each offset in the 2) list we display following data:
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index f16d8a71d3f5..31069d8a5304 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -242,6 +242,11 @@ annotate.*::
These are in control of addresses, jump function, source code
in lines of assembly code from a specific program.
+ annotate.disassembler_style:
+ Use this to change the default disassembler style to some other value
+ supported by binutils, such as "intel", see the '-M' option help in the
+ 'objdump' man page.
+
annotate.hide_src_code::
If a program which is analyzed has source code,
this option lets 'annotate' print a list of assembly code with the source code.
@@ -614,8 +619,9 @@ trace.*::
ftrace.*::
ftrace.tracer::
- Can be used to select the default tracer. Possible values are
- 'function' and 'function_graph'.
+ Can be used to select the default tracer when neither -G nor
+ -F option is not specified. Possible values are 'function' and
+ 'function_graph'.
llvm.*::
llvm.clang-path::
@@ -667,6 +673,11 @@ convert.*::
Limit the size of ordered_events queue, so we could control
allocation size of perf data files without proper finished
round events.
+stat.*::
+
+ stat.big-num::
+ (boolean) Change the default for "--big-num". To make
+ "--no-big-num" the default, set "stat.big-num=false".
intel-pt.*::
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index c87180764829..726b9bc9e1a7 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -27,6 +27,9 @@ OPTIONS for 'convert'
--to-ctf::
Triggers the CTF conversion, specify the path of CTF data directory.
+--tod::
+ Convert time to wall clock time.
+
-i::
Specify input perf data file path.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index f50ca0fef0a4..be65bd55ab2a 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -182,6 +182,10 @@ OPTIONS
--tid=::
Only diff samples for given thread ID (comma separated list).
+--stream::
+ Enable hot streams comparison. Stream can be a callchain which is
+ aggregated by the branch records from samples.
+
COMPARISON
----------
The comparison is governed by the baseline file. The baseline perf.data
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index b80c84307dc9..1e91121bac0f 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -24,16 +24,29 @@ OPTIONS
-t::
--tracer=::
- Tracer to use: function_graph or function.
+ Tracer to use when neither -G nor -F option is not
+ specified: function_graph or function.
-v::
--verbose=::
Verbosity level.
+-F::
+--funcs::
+ List available functions to trace. It accepts a pattern to
+ only list interested functions.
+
-p::
--pid=::
Trace on existing process id (comma separated list).
+--tid=::
+ Trace on existing thread id (comma separated list).
+
+-D::
+--delay::
+ Time (ms) to wait before starting tracing after program start.
+
-a::
--all-cpus::
Force system-wide collection. Scripts run without a <command>
@@ -48,39 +61,58 @@ OPTIONS
Ranges of CPUs are specified with -: 0-2.
Default is to trace on all online CPUs.
+-m::
+--buffer-size::
+ Set the size of per-cpu tracing buffer, <size> is expected to
+ be a number with appended unit character - B/K/M/G.
+
+--inherit::
+ Trace children processes spawned by our target.
+
-T::
--trace-funcs=::
- Only trace functions given by the argument. Multiple functions
- can be given by using this option more than once. The function
- argument also can be a glob pattern. It will be passed to
- 'set_ftrace_filter' in tracefs.
+ Select function tracer and set function filter on the given
+ function (or a glob pattern). Multiple functions can be given
+ by using this option more than once. The function argument also
+ can be a glob pattern. It will be passed to 'set_ftrace_filter'
+ in tracefs.
-N::
--notrace-funcs=::
- Do not trace functions given by the argument. Like -T option,
- this can be used more than once to specify multiple functions
- (or glob patterns). It will be passed to 'set_ftrace_notrace'
- in tracefs.
+ Select function tracer and do not trace functions given by the
+ argument. Like -T option, this can be used more than once to
+ specify multiple functions (or glob patterns). It will be
+ passed to 'set_ftrace_notrace' in tracefs.
+
+--func-opts::
+ List of options allowed to set:
+ call-graph - Display kernel stack trace for function tracer.
+ irq-info - Display irq context info for function tracer.
-G::
--graph-funcs=::
- Set graph filter on the given function (or a glob pattern).
- This is useful for the function_graph tracer only and enables
- tracing for functions executed from the given function.
- This can be used more than once to specify multiple functions.
- It will be passed to 'set_graph_function' in tracefs.
+ Select function_graph tracer and set graph filter on the given
+ function (or a glob pattern). This is useful to trace for
+ functions executed from the given function. This can be used more
+ than once to specify multiple functions. It will be passed to
+ 'set_graph_function' in tracefs.
-g::
--nograph-funcs=::
- Set graph notrace filter on the given function (or a glob pattern).
- Like -G option, this is useful for the function_graph tracer only
- and disables tracing for function executed from the given function.
- This can be used more than once to specify multiple functions.
- It will be passed to 'set_graph_notrace' in tracefs.
+ Select function_graph tracer and set graph notrace filter on the
+ given function (or a glob pattern). Like -G option, this is useful
+ for the function_graph tracer only and disables tracing for function
+ executed from the given function. This can be used more than once to
+ specify multiple functions. It will be passed to 'set_graph_notrace'
+ in tracefs.
--D::
---graph-depth=::
- Set max depth for function graph tracer to follow
+--graph-opts::
+ List of options allowed to set:
+ nosleep-time - Measure on-CPU time only for function_graph tracer.
+ noirqs - Ignore functions that happen inside interrupt.
+ verbose - Show process names, PIDs, timestamps, etc.
+ thresh=<n> - Setup trace duration threshold in microseconds.
+ depth=<n> - Set max depth for function graph tracer to follow.
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 70969ea73e01..a8eccff21281 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -24,8 +24,12 @@ information could make use of this facility.
OPTIONS
-------
-b::
---build-ids=::
+--build-ids::
Inject build-ids into the output stream
+
+--buildid-all:
+ Inject build-ids of all DSOs into the output stream
+
-v::
--verbose::
Be more verbose.
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 456fdcbf26ac..cd362dc2af07 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -69,22 +69,22 @@ And profiled with 'perf report' e.g.
To also trace kernel space presents a problem, namely kernel self-modifying
code. A fairly good kernel image is available in /proc/kcore but to get an
accurate image a copy of /proc/kcore needs to be made under the same conditions
-as the data capture. A script perf-with-kcore can do that, but beware that the
-script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
-locally from the source tree you can do:
+as the data capture. 'perf record' can make a copy of /proc/kcore if the option
+--kcore is used, but access to /proc/kcore is restricted e.g.
- ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
+ sudo perf record -o pt_ls --kcore -e intel_pt// -- ls
-which will create a directory named 'pt_ls' and put the perf.data file and
-copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
-'perf report' becomes:
+which will create a directory named 'pt_ls' and put the perf.data file (named
+simply 'data') and copies of /proc/kcore, /proc/kallsyms and /proc/modules into
+it. The other tools understand the directory format, so to use 'perf report'
+becomes:
- ~/libexec/perf-core/perf-with-kcore report pt_ls
+ sudo perf report -i pt_ls
Because samples are synthesized after-the-fact, the sampling period can be
selected for reporting. e.g. sample every microsecond
- ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
+ sudo perf report pt_ls --itrace=i1usge
See the sections below for more information about the --itrace option.
@@ -112,6 +112,32 @@ The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
in transaction, respectively.
+perf script also supports higher level ways to dump instruction traces:
+
+ perf script --insn-trace --xed
+
+Dump all instructions. This requires installing the xed tool (see XED below)
+Dumping all instructions in a long trace can be fairly slow. It is usually better
+to start with higher level decoding, like
+
+ perf script --call-trace
+
+or
+
+ perf script --call-ret-trace
+
+and then select a time range of interest. The time range can then be examined
+in detail with
+
+ perf script --time starttime,stoptime --insn-trace --xed
+
+While examining the trace it's also useful to filter on specific CPUs using
+the -C option
+
+ perf script --time starttime,stoptime --insn-trace --xed -C 1
+
+Dump all instructions in time range on CPU 1.
+
Another interesting field that is not printed by default is 'ipc' which can be
displayed as follows:
@@ -558,7 +584,7 @@ The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g
Intel PT modes of operation
~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Intel PT can be used in 2 modes:
+Intel PT can be used in 3 modes:
full-trace mode
sample mode
snapshot mode
@@ -571,7 +597,8 @@ Sample mode attaches a Intel PT sample to other events e.g.
perf record --aux-sample -e intel_pt//u -e branch-misses:u
-Snapshot mode captures the available data when a signal is sent e.g.
+Snapshot mode captures the available data when a signal is sent or "snapshot"
+control command is issued. e.g. using a signal
perf record -v -e intel_pt//u -S ./loopy 1000000000 &
[1] 11435
@@ -582,7 +609,23 @@ Note that the signal sent is SIGUSR2.
Note that "Recording AUX area tracing snapshot" is displayed because the -v
option is used.
-The 2 modes cannot be used together.
+The advantage of using "snapshot" control command is that the access is
+controlled by access to a FIFO e.g.
+
+ $ mkfifo perf.control
+ $ mkfifo perf.ack
+ $ cat perf.ack &
+ [1] 15235
+ $ sudo ~/bin/perf record --control fifo:perf.control,perf.ack -S -e intel_pt//u -- sleep 60 &
+ [2] 15243
+ $ ps -e | grep perf
+ 15244 pts/1 00:00:00 perf
+ $ kill -USR2 15244
+ bash: kill: (15244) - Operation not permitted
+ $ echo snapshot > perf.control
+ ack
+
+The 3 Intel PT modes of operation cannot be used together.
Buffer handling
@@ -687,7 +730,7 @@ The v4.2 kernel introduced support for a context switch metadata event,
PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
are scheduled out and in, just not by whom, which is left for the
PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
-which in turn requires CAP_SYS_ADMIN.
+which in turn requires CAP_PERFMON or CAP_SYS_ADMIN.
Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
switches") commit, that introduces these metadata events for further info.
@@ -821,8 +864,11 @@ The letters are:
e synthesize tracing error events
d create a debug log
g synthesize a call chain (use with i or x)
+ G synthesize a call chain on existing event records
l synthesize last branch entries (use with i or x)
+ L synthesize last branch entries on existing event records
s skip initial number of events
+ q quicker (less detailed) decoding
"Instructions" events look like they were recorded by "perf record -e
instructions".
@@ -869,11 +915,24 @@ Developer Manuals.
Error events show where the decoder lost the trace. Error events
are quite important. Users must know if what they are seeing is a complete
-picture or not.
+picture or not. The "e" option may be followed by flags which affect what errors
+will or will not be reported. Each flag must be preceded by either '+' or '-'.
+The flags supported by Intel PT are:
+ -o Suppress overflow errors
+ -l Suppress trace data lost errors
+For example, for errors but not overflow or data lost errors:
+
+ --itrace=e-o-l
The "d" option will cause the creation of a file "intel_pt.log" containing all
decoded packets and instructions. Note that this option slows down the decoder
-and that the resulting file may be very large.
+and that the resulting file may be very large. The "d" option may be followed
+by flags which affect what debug messages will or will not be logged. Each flag
+must be preceded by either '+' or '-'. The flags support by Intel PT are:
+ -a Suppress logging of perf events
+ +a Log all perf events
+By default, logged perf events are filtered by any specified time ranges, but
+flag +a overrides that.
In addition, the period of the "instructions" event can be specified. e.g.
@@ -912,6 +971,39 @@ transactions events can be specified. e.g.
Note that last branch entries are cleared for each sample, so there is no overlap
from one sample to the next.
+The G and L options are designed in particular for sample mode, and work much
+like g and l but add call chain and branch stack to the other selected events
+instead of synthesized events. For example, to record branch-misses events for
+'ls' and then add a call chain derived from the Intel PT trace:
+
+ perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' -- ls
+ perf report --itrace=Ge
+
+Although in fact G is a default for perf report, so that is the same as just:
+
+ perf report
+
+One caveat with the G and L options is that they work poorly with "Large PEBS".
+Large PEBS means PEBS records will be accumulated by hardware and the written
+into the event buffer in one go. That reduces interrupts, but can give very
+late timestamps. Because the Intel PT trace is synchronized by timestamps,
+the PEBS events do not match the trace. Currently, Large PEBS is used only in
+certain circumstances:
+ - hardware supports it
+ - PEBS is used
+ - event period is specified, instead of frequency
+ - the sample type is limited to the following flags:
+ PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR |
+ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID |
+ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER |
+ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR |
+ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER |
+ PERF_SAMPLE_PERIOD (and sometimes) | PERF_SAMPLE_TIME
+Because Intel PT sample mode uses a different sample type to the list above,
+Large PEBS is not used with Intel PT sample mode. To avoid Large PEBS in other
+cases, avoid specifying the event period i.e. avoid the 'perf record' -c option,
+--count option, or 'period' config term.
+
To disable trace decoding entirely, use the option --no-itrace.
It is also possible to skip events generated (instructions, branches, transactions)
@@ -921,6 +1013,51 @@ at the beginning. This is useful to ignore initialization code.
skips the first million instructions.
+The q option changes the way the trace is decoded. The decoding is much faster
+but much less detailed. Specifically, with the q option, the decoder does not
+decode TNT packets, and does not walk object code, but gets the ip from FUP and
+TIP packets. The q option can be used with the b and i options but the period
+is not used. The q option decodes more quickly, but is useful only if the
+control flow of interest is represented or indicated by FUP, TIP, TIP.PGE, or
+TIP.PGD packets (refer below). However the q option could be used to find time
+ranges that could then be decoded fully using the --time option.
+
+What will *not* be decoded with the (single) q option:
+
+ - direct calls and jmps
+ - conditional branches
+ - non-branch instructions
+
+What *will* be decoded with the (single) q option:
+
+ - asynchronous branches such as interrupts
+ - indirect branches
+ - function return target address *if* the noretcomp config term (refer
+ config terms section) was used
+ - start of (control-flow) tracing
+ - end of (control-flow) tracing, if it is not out of context
+ - power events, ptwrite, transaction start and abort
+ - instruction pointer associated with PSB packets
+
+Note the q option does not specify what events will be synthesized e.g. the p
+option must be used also to show power events.
+
+Repeating the q option (double-q i.e. qq) results in even faster decoding and even
+less detail. The decoder decodes only extended PSB (PSB+) packets, getting the
+instruction pointer if there is a FUP packet within PSB+ (i.e. between PSB and
+PSBEND). Note PSB packets occur regularly in the trace based on the psb_period
+config term (refer config terms section). There will be a FUP packet if the
+PSB+ occurs while control flow is being traced.
+
+What will *not* be decoded with the qq option:
+
+ - everything except instruction pointer associated with PSB packets
+
+What *will* be decoded with the qq option:
+
+ - instruction pointer associated with PSB packets
+
+
dump option
~~~~~~~~~~~
@@ -999,6 +1136,10 @@ To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g.
perf script --itrace=oe
+XED
+---
+
+include::build-xed.txt[]
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 6345db33c533..4c7db1da8fcc 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -58,6 +58,7 @@ counted. The following modifiers exist:
S - read sample value (PERF_SAMPLE_READ)
D - pin the event to the PMU
W - group is weak and will fallback to non-group if not schedulable,
+ e - group or event are exclusive and do not share the PMU
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
@@ -115,6 +116,12 @@ raw encoding of 0x1A8 can be used:
perf stat -e r1a8 -a sleep 1
perf record -e r1a8 ...
+It's also possible to use pmu syntax:
+
+ perf record -e r1a8 -a sleep 1
+ perf record -e cpu/r1a8/ ...
+ perf record -e cpu/r0x1a8/ ...
+
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
@@ -258,6 +265,9 @@ Normally all events in an event group sample, but with :S only
the first event (the leader) samples, and it only reads the values of the
other events in the group.
+However, in the case AUX area events (e.g. Intel PT or CoreSight), the AUX
+area event must be the leader, so then the second event samples, not the first.
+
OPTIONS
-------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b3f3b3f1c161..768888b9326a 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -33,6 +33,10 @@ OPTIONS
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
+ - a symbolic or raw PMU event followed by an optional colon
+ and a list of event modifiers, e.g., cpu-cycles:p. See the
+ linkperf:perf-list[1] man page for details on event modifiers.
+
- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
'param1', 'param2', etc are defined as formats for the PMU in
/sys/bus/event_source/devices/<pmu>/format/*.
@@ -407,8 +411,9 @@ if combined with -a or -C options.
-D::
--delay=::
-After starting the program, wait msecs before measuring. This is useful to
-filter out the startup phase of the program, which is often very different.
+After starting the program, wait msecs before measuring (-1: start with events
+disabled). This is useful to filter out the startup phase of the program, which
+is often very different.
-I::
--intr-regs::
@@ -458,7 +463,9 @@ This option sets the time out limit. The default value is 500 ms.
--switch-events::
Record context switch events i.e. events of type PERF_RECORD_SWITCH or
-PERF_RECORD_SWITCH_CPU_WIDE.
+PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight)
+switch events will be enabled automatically, which can be suppressed by
+by the option --no-switch-events.
--clang-path=PATH::
Path to clang binary to use for compiling BPF scriptlets.
@@ -556,6 +563,19 @@ overhead. You can still switch them on with:
--switch-output --no-no-buildid --no-no-buildid-cache
+--switch-output-event::
+Events that will cause the switch of the perf.data file, auto-selecting
+--switch-output=signal, the results are similar as internally the side band
+thread will also send a SIGUSR2 to the main one.
+
+Uses the same syntax as --event, it will just not be recorded, serving only to
+switch the perf.data file as soon as the --switch-output event is processed by
+a separate sideband thread.
+
+This sideband thread is also used to other purposes, like processing the
+PERF_RECORD_BPF_EVENT records as they happen, asking the kernel for extra BPF
+information, etc.
+
--switch-max-files=N::
When rotating perf.data with --switch-output, only keep N files.
@@ -596,6 +616,62 @@ Make a copy of /proc/kcore and place it into a directory with the perf data file
Limit the sample data max size, <size> is expected to be a number with
appended unit character - B/K/M/G
+--num-thread-synthesize::
+ The number of threads to run when synthesizing events for existing processes.
+ By default, the number of threads equals 1.
+
+ifdef::HAVE_LIBPFM[]
+--pfm-events events::
+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+including support for event filters. For example '--pfm-events
+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
+option using the comma separator. Hardware events and generic hardware
+events cannot be mixed together. The latter must be used with the -e
+option. The -e option and this one can be mixed and matched. Events
+can be grouped using the {} notation.
+endif::HAVE_LIBPFM[]
+
+--control=fifo:ctl-fifo[,ack-fifo]::
+--control=fd:ctl-fd[,ack-fd]::
+ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows.
+Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
+'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be
+started with events disabled using --delay=-1 option. Optionally send control command
+completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process.
+Example of bash shell script to enable and disable events during measurements:
+
+ #!/bin/bash
+
+ ctl_dir=/tmp/
+
+ ctl_fifo=${ctl_dir}perf_ctl.fifo
+ test -p ${ctl_fifo} && unlink ${ctl_fifo}
+ mkfifo ${ctl_fifo}
+ exec {ctl_fd}<>${ctl_fifo}
+
+ ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+ test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+ mkfifo ${ctl_ack_fifo}
+ exec {ctl_fd_ack}<>${ctl_ack_fifo}
+
+ perf record -D -1 -e cpu-cycles -a \
+ --control fd:${ctl_fd},${ctl_fd_ack} \
+ -- sleep 30 &
+ perf_pid=$!
+
+ sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+ sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+
+ exec {ctl_fd_ack}>&-
+ unlink ${ctl_ack_fifo}
+
+ exec {ctl_fd}>&-
+ unlink ${ctl_fifo}
+
+ wait -n ${perf_pid}
+ exit $?
+
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f569b9ea4002..d068103690cc 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -488,6 +488,17 @@ include::itrace.txt[]
This option extends the perf report to show reference callgraphs,
which collected by reference event, in no callgraph event.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The perf.data file must have been obtained using
+ perf record --call-graph lbr.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
--socket-filter::
Only report the samples on the processor socket that match with this filter
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 963487e82edc..4f712fb8f175 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -322,6 +322,10 @@ OPTIONS
--show-cgroup-events
Display cgroup events i.e. events of type PERF_RECORD_CGROUP.
+--show-text-poke-events
+ Display text poke events i.e. events of type PERF_RECORD_TEXT_POKE and
+ PERF_RECORD_KSYMBOL.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
@@ -440,6 +444,17 @@ include::itrace.txt[]
--show-on-off-events::
Show the --switch-on/off events too.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The perf.data file must have been obtained using
+ perf record --call-graph lbr.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4d56586b2fb9..9f9f29025e49 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -39,6 +39,10 @@ report::
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
+ - a symbolic or raw PMU event followed by an optional colon
+ and a list of event modifiers, e.g., cpu-cycles:p. See the
+ linkperf:perf-list[1] man page for details on event modifiers.
+
- a symbolically formed event like 'pmu/param1=0x3,param2/' where
param1 and param2 are defined as formats for the PMU in
/sys/bus/event_source/devices/<pmu>/format/*
@@ -71,6 +75,16 @@ report::
--tid=<tid>::
stat events on existing thread id (comma separated list)
+ifdef::HAVE_LIBPFM[]
+--pfm-events events::
+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+including support for event filters. For example '--pfm-events
+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
+option using the comma separator. Hardware events and generic hardware
+events cannot be mixed together. The latter must be used with the -e
+option. The -e option and this one can be mixed and matched. Events
+can be grouped using the {} notation.
+endif::HAVE_LIBPFM[]
-a::
--all-cpus::
@@ -93,7 +107,9 @@ report::
-B::
--big-num::
- print large numbers with thousands' separators according to locale
+ print large numbers with thousands' separators according to locale.
+ Enabled by default. Use "--no-big-num" to disable.
+ Default setting can be changed with "perf config stat.big-num=false".
-C::
--cpu=::
@@ -150,6 +166,11 @@ use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
+--for-each-cgroup name::
+Expand event list for each cgroup in "name" (allow multiple cgroups separated
+by comma). This has same effect that repeating -e option and -G option for
+each event x name. This option cannot be used with -G/--cgroup option.
+
-o file::
--output file::
Print the output into the designated file.
@@ -164,6 +185,47 @@ with it. --append may be used here. Examples:
3>results perf stat --log-fd 3 -- $cmd
3>>results perf stat --log-fd 3 --append -- $cmd
+--control=fifo:ctl-fifo[,ack-fifo]::
+--control=fd:ctl-fd[,ack-fd]::
+ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows.
+Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
+'disable': disable events). Measurements can be started with events disabled using
+--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor
+to synchronize with the controlling process. Example of bash shell script to enable and
+disable events during measurements:
+
+ #!/bin/bash
+
+ ctl_dir=/tmp/
+
+ ctl_fifo=${ctl_dir}perf_ctl.fifo
+ test -p ${ctl_fifo} && unlink ${ctl_fifo}
+ mkfifo ${ctl_fifo}
+ exec {ctl_fd}<>${ctl_fifo}
+
+ ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+ test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+ mkfifo ${ctl_ack_fifo}
+ exec {ctl_fd_ack}<>${ctl_ack_fifo}
+
+ perf stat -D -1 -e cpu-cycles -a -I 1000 \
+ --control fd:${ctl_fd},${ctl_fd_ack} \
+ -- sleep 30 &
+ perf_pid=$!
+
+ sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+ sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+
+ exec {ctl_fd_ack}>&-
+ unlink ${ctl_ack_fifo}
+
+ exec {ctl_fd}>&-
+ unlink ${ctl_fifo}
+
+ wait -n ${perf_pid}
+ exit $?
+
+
--pre::
--post::
Pre and post measurement hooks, e.g.:
@@ -176,6 +238,8 @@ Print count deltas every N milliseconds (minimum: 1ms)
The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
example: 'perf stat -I 1000 -e cycles -a sleep 5'
+If the metric exists, it is calculated by the counts generated in this interval and the metric is printed after #.
+
--interval-count times::
Print count deltas for fixed number of times.
This option should be used together with "-I" option.
@@ -224,14 +288,34 @@ mode, use --per-node in addition to -a. (system-wide).
-D msecs::
--delay msecs::
-After starting the program, wait msecs before measuring. This is useful to
-filter out the startup phase of the program, which is often very different.
+After starting the program, wait msecs before measuring (-1: start with events
+disabled). This is useful to filter out the startup phase of the program,
+which is often very different.
-T::
--transaction::
Print statistics of transactional execution if supported.
+--metric-no-group::
+By default, events to compute a metric are placed in weak groups. The
+group tries to enforce scheduling all or none of the events. The
+--metric-no-group option places events outside of groups and may
+increase the chance of the event being scheduled - leading to more
+accuracy. However, as events may not be scheduled together accuracy
+for metrics like instructions per cycle can be lower - as both metrics
+may no longer be being measured at the same time.
+
+--metric-no-merge::
+By default metric events in different weak groups can be shared if one
+group contains all the events needed by another. In such cases one
+group will be eliminated reducing event multiplexing and making it so
+that certain groups of metrics sum to 100%. A downside to sharing a
+group is that the group may require multiplexing and so accuracy for a
+small group that need not have multiplexing is lowered. This option
+forbids the event merging logic from sharing events between groups and
+may be used to increase accuracy in this case.
+
STAT RECORD
-----------
Stores stat data into perf data file.
@@ -284,6 +368,11 @@ if the workload is actually bound by the CPU and not by something else.
For best results it is usually a good idea to use it with interval
mode like -I 1000, as the bottleneck of workloads can change often.
+This enables --metric-only, unless overridden with --no-metric-only.
+
+The following restrictions only apply to older Intel CPUs and Atom,
+on newer CPUs (IceLake and later) TopDown can be collected for any thread:
+
The top down metrics are collected per core instead of per
CPU thread. Per core mode is automatically enabled
and -a (global monitoring) is needed, requiring root rights or
@@ -295,8 +384,6 @@ echo 0 > /proc/sys/kernel/nmi_watchdog
for best results. Otherwise the bottlenecks may be inconsistent
on workload with changing phases.
-This enables --metric-only, unless overridden with --no-metric-only.
-
To interpret the results it is usually needed to know on which
CPUs the workload runs on. If needed the CPUs can be forced using
taskset.
@@ -343,6 +430,9 @@ counts for all hardware threads in a core but show the sum counts per
hardware thread. This is essentially a replacement for the any bit and
convenient for post processing.
+--summary::
+Print summary for interval mode (-I).
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 487737a725e9..ee2024691d46 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -319,6 +319,26 @@ Default is to monitor all CPUS.
go straight to the histogram browser, just like 'perf top' with no events
explicitely specified does.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The option must be used with --call-graph lbr recording.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
+ifdef::HAVE_LIBPFM[]
+--pfm-events events::
+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+including support for event filters. For example '--pfm-events
+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
+option using the comma separator. Hardware events and generic hardware
+events cannot be mixed together. The latter must be used with the -e
+option. The -e option and this one can be mixed and matched. Events
+can be grouped using the {} notation.
+endif::HAVE_LIBPFM[]
INTERACTIVE PROMPTING KEYS
--------------------------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index b0152e1095c5..9ee96640744e 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -373,6 +373,35 @@ struct {
Indicates that trace contains records of PERF_RECORD_COMPRESSED type
that have perf_events records in compressed form.
+ HEADER_CPU_PMU_CAPS = 28,
+
+ A list of cpu PMU capabilities. The format of data is as below.
+
+struct {
+ u32 nr_cpu_pmu_caps;
+ {
+ char name[];
+ char value[];
+ } [nr_cpu_pmu_caps]
+};
+
+
+Example:
+ cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake
+
+ HEADER_CLOCK_DATA = 29,
+
+ Contains clock id and its reference time together with wall clock
+ time taken at the 'same time', both values are in nanoseconds.
+ The format of data is as below.
+
+struct {
+ u32 version; /* version = 1 */
+ u32 clockid;
+ u64 wall_clock_ns;
+ u64 clockid_time_ns;
+};
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index 3f37ded13f8c..c130a3c46a90 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -12,32 +12,57 @@ SYNOPSIS
OPTIONS
-------
---debug::
- Setup debug variable (see list below) in value
- range (0, 10). Use like:
- --debug verbose # sets verbose = 1
- --debug verbose=2 # sets verbose = 2
-
- List of debug variables allowed to set:
- verbose - general debug messages
- ordered-events - ordered events object debug messages
- data-convert - data convert command debug messages
- stderr - write debug output (option -v) to stderr
- in browser mode
- perf-event-open - Print perf_event_open() arguments and
- return value
-
---buildid-dir::
- Setup buildid cache directory. It has higher priority than
- buildid.dir config file option.
+-h::
+--help::
+ Run perf help command.
-v::
--version::
- Display perf version.
+ Display perf version.
--h::
---help::
- Run perf help command.
+-vv::
+ Print the compiled-in status of libraries.
+
+--exec-path::
+ Display or set exec path.
+
+--html-path::
+ Display html documentation path.
+
+-p::
+--paginate::
+ Set up pager.
+
+--no-pager::
+ Do not set pager.
+
+--buildid-dir::
+ Setup buildid cache directory. It has higher priority
+ than buildid.dir config file option.
+
+--list-cmds::
+ List the most commonly used perf commands.
+
+--list-opts::
+ List available perf options.
+
+--debugfs-dir::
+ Set debugfs directory or set environment variable PERF_DEBUGFS_DIR.
+
+--debug::
+ Setup debug variable (see list below) in value
+ range (0, 10). Use like:
+ --debug verbose # sets verbose = 1
+ --debug verbose=2 # sets verbose = 2
+
+ List of debug variables allowed to set:
+ verbose - general debug messages
+ ordered-events - ordered events object debug messages
+ data-convert - data convert command debug messages
+ stderr - write debug output (option -v) to stderr
+ in browser mode
+ perf-event-open - Print perf_event_open() arguments and
+ return value
DESCRIPTION
-----------
diff --git a/tools/perf/Documentation/security.txt b/tools/perf/Documentation/security.txt
new file mode 100644
index 000000000000..4fe3b8b1958f
--- /dev/null
+++ b/tools/perf/Documentation/security.txt
@@ -0,0 +1,237 @@
+Overview
+========
+
+For general security related questions of perf_event_open() syscall usage,
+performance monitoring and observability operations by Perf see here:
+https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html
+
+Enabling LSM based mandatory access control (MAC) to perf_event_open() syscall
+==============================================================================
+
+LSM hooks for mandatory access control for perf_event_open() syscall can be
+used starting from Linux v5.3. Below are the steps to extend Fedora (v31) with
+Targeted policy with perf_event_open() access control capabilities:
+
+1. Download selinux-policy SRPM package (e.g. selinux-policy-3.14.4-48.fc31.src.rpm on FC31)
+ and install it so rpmbuild directory would exist in the current working directory:
+
+ # rpm -Uhv selinux-policy-3.14.4-48.fc31.src.rpm
+
+2. Get into rpmbuild/SPECS directory and unpack the source code:
+
+ # rpmbuild -bp selinux-policy.spec
+
+3. Place patch below at rpmbuild/BUILD/selinux-policy-b86eaaf4dbcf2d51dd4432df7185c0eaf3cbcc02
+ directory and apply it:
+
+ # patch -p1 < selinux-policy-perf-events-perfmon.patch
+ patching file policy/flask/access_vectors
+ patching file policy/flask/security_classes
+ # cat selinux-policy-perf-events-perfmon.patch
+diff -Nura a/policy/flask/access_vectors b/policy/flask/access_vectors
+--- a/policy/flask/access_vectors 2020-02-04 18:19:53.000000000 +0300
++++ b/policy/flask/access_vectors 2020-02-28 23:37:25.000000000 +0300
+@@ -174,6 +174,7 @@
+ wake_alarm
+ block_suspend
+ audit_read
++ perfmon
+ }
+
+ #
+@@ -1099,3 +1100,15 @@
+
+ class xdp_socket
+ inherits socket
++
++class perf_event
++{
++ open
++ cpu
++ kernel
++ tracepoint
++ read
++ write
++}
++
++
+diff -Nura a/policy/flask/security_classes b/policy/flask/security_classes
+--- a/policy/flask/security_classes 2020-02-04 18:19:53.000000000 +0300
++++ b/policy/flask/security_classes 2020-02-28 21:35:17.000000000 +0300
+@@ -200,4 +200,6 @@
+
+ class xdp_socket
+
++class perf_event
++
+ # FLASK
+
+4. Get into rpmbuild/SPECS directory and build policy packages from patched sources:
+
+ # rpmbuild --noclean --noprep -ba selinux-policy.spec
+
+ so you have this:
+
+ # ls -alh rpmbuild/RPMS/noarch/
+ total 33M
+ drwxr-xr-x. 2 root root 4.0K Mar 20 12:16 .
+ drwxr-xr-x. 3 root root 4.0K Mar 20 12:16 ..
+ -rw-r--r--. 1 root root 112K Mar 20 12:16 selinux-policy-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 1.2M Mar 20 12:17 selinux-policy-devel-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 2.3M Mar 20 12:17 selinux-policy-doc-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 12M Mar 20 12:17 selinux-policy-minimum-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 4.5M Mar 20 12:16 selinux-policy-mls-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 111K Mar 20 12:16 selinux-policy-sandbox-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 14M Mar 20 12:17 selinux-policy-targeted-3.14.4-48.fc31.noarch.rpm
+
+5. Install SELinux packages from Fedora repo, if not already done so, and
+ update with the patched rpms above:
+
+ # rpm -Uhv rpmbuild/RPMS/noarch/selinux-policy-*
+
+6. Enable SELinux Permissive mode for Targeted policy, if not already done so:
+
+ # cat /etc/selinux/config
+
+ # This file controls the state of SELinux on the system.
+ # SELINUX= can take one of these three values:
+ # enforcing - SELinux security policy is enforced.
+ # permissive - SELinux prints warnings instead of enforcing.
+ # disabled - No SELinux policy is loaded.
+ SELINUX=permissive
+ # SELINUXTYPE= can take one of these three values:
+ # targeted - Targeted processes are protected,
+ # minimum - Modification of targeted policy. Only selected processes are protected.
+ # mls - Multi Level Security protection.
+ SELINUXTYPE=targeted
+
+7. Enable filesystem SELinux labeling at the next reboot:
+
+ # touch /.autorelabel
+
+8. Reboot machine and it will label filesystems and load Targeted policy into the kernel;
+
+9. Login and check that dmesg output doesn't mention that perf_event class is unknown to SELinux subsystem;
+
+10. Check that SELinux is enabled and in Permissive mode
+
+ # getenforce
+ Permissive
+
+11. Turn SELinux into Enforcing mode:
+
+ # setenforce 1
+ # getenforce
+ Enforcing
+
+Opening access to perf_event_open() syscall on Fedora with SELinux
+==================================================================
+
+Access to performance monitoring and observability operations by Perf
+can be limited for superuser or CAP_PERFMON or CAP_SYS_ADMIN privileged
+processes. MAC policy settings (e.g. SELinux) can be loaded into the kernel
+and prevent unauthorized access to perf_event_open() syscall. In such case
+Perf tool provides a message similar to the one below:
+
+ # perf stat
+ Error:
+ Access to performance monitoring and observability operations is limited.
+ Enforced MAC policy settings (SELinux) can limit access to performance
+ monitoring and observability operations. Inspect system audit records for
+ more perf_event access control information and adjusting the policy.
+ Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open
+ access to performance monitoring and observability operations for users
+ without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.
+ perf_event_paranoid setting is -1:
+ -1: Allow use of (almost) all events by all users
+ Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
+ >= 0: Disallow raw and ftrace function tracepoint access
+ >= 1: Disallow CPU event access
+ >= 2: Disallow kernel profiling
+ To make the adjusted perf_event_paranoid setting permanent preserve it
+ in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)
+
+To make sure that access is limited by MAC policy settings inspect system
+audit records using journalctl command or /var/log/audit/audit.log so the
+output would contain AVC denied records related to perf_event:
+
+ # journalctl --reverse --no-pager | grep perf_event
+
+ python3[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t.
+ If you believe that perf should be allowed open access on perf_event labeled unconfined_t by default.
+ setroubleshoot[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t. For complete SELinux messages run: sealert -l 4595ce5b-e58f-462c-9d86-3bc2074935de
+ audit[1318098]: AVC avc: denied { open } for pid=1318098 comm="perf" scontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tcontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tclass=perf_event permissive=0
+
+In order to open access to perf_event_open() syscall MAC policy settings can
+require to be extended. On SELinux system this can be done by loading a special
+policy module extending base policy settings. Perf related policy module can
+be generated using the system audit records about blocking perf_event access.
+Run the command below to generate my-perf.te policy extension file with
+perf_event related rules:
+
+ # ausearch -c 'perf' --raw | audit2allow -M my-perf && cat my-perf.te
+
+ module my-perf 1.0;
+
+ require {
+ type unconfined_t;
+ class perf_event { cpu kernel open read tracepoint write };
+ }
+
+ #============= unconfined_t ==============
+ allow unconfined_t self:perf_event { cpu kernel open read tracepoint write };
+
+Now compile, pack and load my-perf.pp extension module into the kernel:
+
+ # checkmodule -M -m -o my-perf.mod my-perf.te
+ # semodule_package -o my-perf.pp -m my-perf.mod
+ # semodule -X 300 -i my-perf.pp
+
+After all those taken steps above access to perf_event_open() syscall should
+now be allowed by the policy settings. Check access running Perf like this:
+
+ # perf stat
+ ^C
+ Performance counter stats for 'system wide':
+
+ 36,387.41 msec cpu-clock # 7.999 CPUs utilized
+ 2,629 context-switches # 0.072 K/sec
+ 57 cpu-migrations # 0.002 K/sec
+ 1 page-faults # 0.000 K/sec
+ 263,721,559 cycles # 0.007 GHz
+ 175,746,713 instructions # 0.67 insn per cycle
+ 19,628,798 branches # 0.539 M/sec
+ 1,259,201 branch-misses # 6.42% of all branches
+
+ 4.549061439 seconds time elapsed
+
+The generated perf-event.pp related policy extension module can be removed
+from the kernel using this command:
+
+ # semodule -X 300 -r my-perf
+
+Alternatively the module can be temporarily disabled and enabled back using
+these two commands:
+
+ # semodule -d my-perf
+ # semodule -e my-perf
+
+If something went wrong
+=======================
+
+To turn SELinux into Permissive mode:
+ # setenforce 0
+
+To fully disable SELinux during kernel boot [3] set kernel command line parameter selinux=0
+
+To remove SELinux labeling from local filesystems:
+ # find / -mount -print0 | xargs -0 setfattr -h -x security.selinux
+
+To fully turn SELinux off a machine set SELINUX=disabled at /etc/selinux/config file and reboot;
+
+Links
+=====
+
+[1] https://download-ib01.fedoraproject.org/pub/fedora/linux/updates/31/Everything/SRPMS/Packages/s/selinux-policy-3.14.4-49.fc31.src.rpm
+[2] https://docs.fedoraproject.org/en-US/Fedora/11/html/Security-Enhanced_Linux/sect-Security-Enhanced_Linux-Working_with_SELinux-Enabling_and_Disabling_SELinux.html
+[3] https://danwalsh.livejournal.com/10972.html
diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt
new file mode 100644
index 000000000000..3c39bb3dc5fa
--- /dev/null
+++ b/tools/perf/Documentation/topdown.txt
@@ -0,0 +1,256 @@
+Using TopDown metrics in user space
+-----------------------------------
+
+Intel CPUs (since Sandy Bridge and Silvermont) support a TopDown
+methology to break down CPU pipeline execution into 4 bottlenecks:
+frontend bound, backend bound, bad speculation, retiring.
+
+For more details on Topdown see [1][5]
+
+Traditionally this was implemented by events in generic counters
+and specific formulas to compute the bottlenecks.
+
+perf stat --topdown implements this.
+
+Full Top Down includes more levels that can break down the
+bottlenecks further. This is not directly implemented in perf,
+but available in other tools that can run on top of perf,
+such as toplev[2] or vtune[3]
+
+New Topdown features in Ice Lake
+===============================
+
+With Ice Lake CPUs the TopDown metrics are directly available as
+fixed counters and do not require generic counters. This allows
+to collect TopDown always in addition to other events.
+
+% perf stat -a --topdown -I1000
+# time retiring bad speculation frontend bound backend bound
+ 1.001281330 23.0% 15.3% 29.6% 32.1%
+ 2.003009005 5.0% 6.8% 46.6% 41.6%
+ 3.004646182 6.7% 6.7% 46.0% 40.6%
+ 4.006326375 5.0% 6.4% 47.6% 41.0%
+ 5.007991804 5.1% 6.3% 46.3% 42.3%
+ 6.009626773 6.2% 7.1% 47.3% 39.3%
+ 7.011296356 4.7% 6.7% 46.2% 42.4%
+ 8.012951831 4.7% 6.7% 47.5% 41.1%
+...
+
+This also enables measuring TopDown per thread/process instead
+of only per core.
+
+Using TopDown through RDPMC in applications on Ice Lake
+======================================================
+
+For more fine grained measurements it can be useful to
+access the new directly from user space. This is more complicated,
+but drastically lowers overhead.
+
+On Ice Lake, there is a new fixed counter 3: SLOTS, which reports
+"pipeline SLOTS" (cycles multiplied by core issue width) and a
+metric register that reports slots ratios for the different bottleneck
+categories.
+
+The metrics counter is CPU model specific and is not available on older
+CPUs.
+
+Example code
+============
+
+Library functions to do the functionality described below
+is also available in libjevents [4]
+
+The application opens a group with fixed counter 3 (SLOTS) and any
+metric event, and allow user programs to read the performance counters.
+
+Fixed counter 3 is mapped to a pseudo event event=0x00, umask=04,
+so the perf_event_attr structure should be initialized with
+{ .config = 0x0400, .type = PERF_TYPE_RAW }
+The metric events are mapped to the pseudo event event=0x00, umask=0x8X.
+For example, the perf_event_attr structure can be initialized with
+{ .config = 0x8000, .type = PERF_TYPE_RAW } for Retiring metric event
+The Fixed counter 3 must be the leader of the group.
+
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+/* Provide own perf_event_open stub because glibc doesn't */
+__attribute__((weak))
+int perf_event_open(struct perf_event_attr *attr, pid_t pid,
+ int cpu, int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+/* Open slots counter file descriptor for current task. */
+struct perf_event_attr slots = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .config = 0x400,
+ .exclude_kernel = 1,
+};
+
+int slots_fd = perf_event_open(&slots, 0, -1, -1, 0);
+if (slots_fd < 0)
+ ... error ...
+
+/*
+ * Open metrics event file descriptor for current task.
+ * Set slots event as the leader of the group.
+ */
+struct perf_event_attr metrics = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(struct perf_event_attr),
+ .config = 0x8000,
+ .exclude_kernel = 1,
+};
+
+int metrics_fd = perf_event_open(&metrics, 0, -1, slots_fd, 0);
+if (metrics_fd < 0)
+ ... error ...
+
+
+The RDPMC instruction (or _rdpmc compiler intrinsic) can now be used
+to read slots and the topdown metrics at different points of the program:
+
+#include <stdint.h>
+#include <x86intrin.h>
+
+#define RDPMC_FIXED (1 << 30) /* return fixed counters */
+#define RDPMC_METRIC (1 << 29) /* return metric counters */
+
+#define FIXED_COUNTER_SLOTS 3
+#define METRIC_COUNTER_TOPDOWN_L1 0
+
+static inline uint64_t read_slots(void)
+{
+ return _rdpmc(RDPMC_FIXED | FIXED_COUNTER_SLOTS);
+}
+
+static inline uint64_t read_metrics(void)
+{
+ return _rdpmc(RDPMC_METRIC | METRIC_COUNTER_TOPDOWN_L1);
+}
+
+Then the program can be instrumented to read these metrics at different
+points.
+
+It's not a good idea to do this with too short code regions,
+as the parallelism and overlap in the CPU program execution will
+cause too much measurement inaccuracy. For example instrumenting
+individual basic blocks is definitely too fine grained.
+
+Decoding metrics values
+=======================
+
+The value reported by read_metrics() contains four 8 bit fields
+that represent a scaled ratio that represent the Level 1 bottleneck.
+All four fields add up to 0xff (= 100%)
+
+The binary ratios in the metric value can be converted to float ratios:
+
+#define GET_METRIC(m, i) (((m) >> (i*8)) & 0xff)
+
+#define TOPDOWN_RETIRING(val) ((float)GET_METRIC(val, 0) / 0xff)
+#define TOPDOWN_BAD_SPEC(val) ((float)GET_METRIC(val, 1) / 0xff)
+#define TOPDOWN_FE_BOUND(val) ((float)GET_METRIC(val, 2) / 0xff)
+#define TOPDOWN_BE_BOUND(val) ((float)GET_METRIC(val, 3) / 0xff)
+
+and then converted to percent for printing.
+
+The ratios in the metric accumulate for the time when the counter
+is enabled. For measuring programs it is often useful to measure
+specific sections. For this it is needed to deltas on metrics.
+
+This can be done by scaling the metrics with the slots counter
+read at the same time.
+
+Then it's possible to take deltas of these slots counts
+measured at different points, and determine the metrics
+for that time period.
+
+ slots_a = read_slots();
+ metric_a = read_metrics();
+
+ ... larger code region ...
+
+ slots_b = read_slots()
+ metric_b = read_metrics()
+
+ # compute scaled metrics for measurement a
+ retiring_slots_a = GET_METRIC(metric_a, 0) * slots_a
+ bad_spec_slots_a = GET_METRIC(metric_a, 1) * slots_a
+ fe_bound_slots_a = GET_METRIC(metric_a, 2) * slots_a
+ be_bound_slots_a = GET_METRIC(metric_a, 3) * slots_a
+
+ # compute delta scaled metrics between b and a
+ retiring_slots = GET_METRIC(metric_b, 0) * slots_b - retiring_slots_a
+ bad_spec_slots = GET_METRIC(metric_b, 1) * slots_b - bad_spec_slots_a
+ fe_bound_slots = GET_METRIC(metric_b, 2) * slots_b - fe_bound_slots_a
+ be_bound_slots = GET_METRIC(metric_b, 3) * slots_b - be_bound_slots_a
+
+Later the individual ratios for the measurement period can be recreated
+from these counts.
+
+ slots_delta = slots_b - slots_a
+ retiring_ratio = (float)retiring_slots / slots_delta
+ bad_spec_ratio = (float)bad_spec_slots / slots_delta
+ fe_bound_ratio = (float)fe_bound_slots / slots_delta
+ be_bound_ratio = (float)be_bound_slots / slota_delta
+
+ printf("Retiring %.2f%% Bad Speculation %.2f%% FE Bound %.2f%% BE Bound %.2f%%\n",
+ retiring_ratio * 100.,
+ bad_spec_ratio * 100.,
+ fe_bound_ratio * 100.,
+ be_bound_ratio * 100.);
+
+Resetting metrics counters
+==========================
+
+Since the individual metrics are only 8bit they lose precision for
+short regions over time because the number of cycles covered by each
+fraction bit shrinks. So the counters need to be reset regularly.
+
+When using the kernel perf API the kernel resets on every read.
+So as long as the reading is at reasonable intervals (every few
+seconds) the precision is good.
+
+When using perf stat it is recommended to always use the -I option,
+with no longer interval than a few seconds
+
+ perf stat -I 1000 --topdown ...
+
+For user programs using RDPMC directly the counter can
+be reset explicitly using ioctl:
+
+ ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0);
+
+This "opens" a new measurement period.
+
+A program using RDPMC for TopDown should schedule such a reset
+regularly, as in every few seconds.
+
+Limits on Ice Lake
+==================
+
+Four pseudo TopDown metric events are exposed for the end-users,
+topdown-retiring, topdown-bad-spec, topdown-fe-bound and topdown-be-bound.
+They can be used to collect the TopDown value under the following
+rules:
+- All the TopDown metric events must be in a group with the SLOTS event.
+- The SLOTS event must be the leader of the group.
+- The PERF_FORMAT_GROUP flag must be applied for each TopDown metric
+ events
+
+The SLOTS event and the TopDown metric events can be counting members of
+a sampling read group. Since the SLOTS event must be the leader of a TopDown
+group, the second event of the group is the sampling event.
+For example, perf record -e '{slots, $sampling_event, topdown-retiring}:S'
+
+
+[1] https://software.intel.com/en-us/top-down-microarchitecture-analysis-method-win
+[2] https://github.com/andikleen/pmu-tools/wiki/toplev-manual
+[3] https://software.intel.com/en-us/intel-vtune-amplifier-xe
+[4] https://github.com/andikleen/pmu-tools/tree/master/jevents
+[5] https://sites.google.com/site/analysismethods/yasin-pubs
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 12a8204d63c6..6890fc4b063a 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -16,19 +16,35 @@ $(shell printf "" > $(OUTPUT).config-detected)
detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
-CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
+CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
include $(srctree)/tools/scripts/Makefile.arch
$(call detected_var,SRCARCH)
NO_PERF_REGS := 1
-NO_SYSCALL_TABLE := 1
+
+ifneq ($(NO_SYSCALL_TABLE),1)
+ NO_SYSCALL_TABLE := 1
+
+ ifeq ($(SRCARCH),x86)
+ ifeq (${IS_64_BIT}, 1)
+ NO_SYSCALL_TABLE := 0
+ endif
+ else
+ ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390))
+ NO_SYSCALL_TABLE := 0
+ endif
+ endif
+
+ ifneq ($(NO_SYSCALL_TABLE),1)
+ CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
+ endif
+endif
# Additional ARCH settings for ppc
ifeq ($(SRCARCH),powerpc)
NO_PERF_REGS := 0
- NO_SYSCALL_TABLE := 0
CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
endif
@@ -37,7 +53,6 @@ endif
ifeq ($(SRCARCH),x86)
$(call detected,CONFIG_X86)
ifeq (${IS_64_BIT}, 1)
- NO_SYSCALL_TABLE := 0
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
@@ -55,7 +70,6 @@ endif
ifeq ($(SRCARCH),arm64)
NO_PERF_REGS := 0
- NO_SYSCALL_TABLE := 0
CFLAGS += -I$(OUTPUT)arch/arm64/include/generated
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
@@ -70,7 +84,6 @@ endif
ifeq ($(ARCH),s390)
NO_PERF_REGS := 0
- NO_SYSCALL_TABLE := 0
CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated
endif
@@ -78,10 +91,6 @@ ifeq ($(NO_PERF_REGS),0)
$(call detected,CONFIG_PERF_REGS)
endif
-ifneq ($(NO_SYSCALL_TABLE),1)
- CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
-endif
-
# So far there's only x86 and arm libdw unwind support merged in perf.
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
@@ -214,14 +223,17 @@ endif
# Try different combinations to accommodate systems that only have
# python[2][-config] in weird combinations but always preferring
-# python2 and python2-config as per pep-0394. If we catch a
-# python[-config] in version 3, the version check will kill it.
-PYTHON2 := $(if $(call get-executable,python2),python2,python)
-override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
-PYTHON2_CONFIG := \
+# python2 and python2-config as per pep-0394. If python2 or python
+# aren't found, then python3 is used.
+PYTHON_AUTO := python
+PYTHON_AUTO := $(if $(call get-executable,python3),python3,$(PYTHON_AUTO))
+PYTHON_AUTO := $(if $(call get-executable,python),python,$(PYTHON_AUTO))
+PYTHON_AUTO := $(if $(call get-executable,python2),python2,$(PYTHON_AUTO))
+override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON_AUTO))
+PYTHON_AUTO_CONFIG := \
$(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
override PYTHON_CONFIG := \
- $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
+ $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO_CONFIG))
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
@@ -346,7 +358,7 @@ ifndef NO_BIONIC
endif
ifeq ($(feature-eventfd), 1)
- CFLAGS += -DHAVE_EVENTFD
+ CFLAGS += -DHAVE_EVENTFD_SUPPORT
endif
ifeq ($(feature-get_current_dir_name), 1)
@@ -387,6 +399,18 @@ else
NO_LIBBPF := 1
NO_JVMTI := 1
else
+ ifneq ($(filter s% -fsanitize=address%,$(EXTRA_CFLAGS),),)
+ ifneq ($(shell ldconfig -p | grep libasan >/dev/null 2>&1; echo $$?), 0)
+ msg := $(error No libasan found, please install libasan);
+ endif
+ endif
+
+ ifneq ($(filter s% -fsanitize=undefined%,$(EXTRA_CFLAGS),),)
+ ifneq ($(shell ldconfig -p | grep libubsan >/dev/null 2>&1; echo $$?), 0)
+ msg := $(error No libubsan found, please install libubsan);
+ endif
+ endif
+
ifneq ($(filter s% -static%,$(LDFLAGS),),)
msg := $(error No static glibc found, please install glibc-static);
else
@@ -462,10 +486,6 @@ ifndef NO_LIBELF
EXTLIBS += -lelf
$(call detected,CONFIG_LIBELF)
- ifeq ($(feature-libelf-mmap), 1)
- CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
- endif
-
ifeq ($(feature-libelf-getphdrnum), 1)
CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
endif
@@ -480,6 +500,14 @@ ifndef NO_LIBELF
CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
endif
+ ifndef NO_LIBDEBUGINFOD
+ $(call feature_check,libdebuginfod)
+ ifeq ($(feature-libdebuginfod), 1)
+ CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT
+ EXTLIBS += -ldebuginfod
+ endif
+ endif
+
ifndef NO_DWARF
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
@@ -651,6 +679,7 @@ ifeq ($(NO_SYSCALL_TABLE),0)
$(call detected,CONFIG_TRACE)
else
ifndef NO_LIBAUDIT
+ $(call feature_check,libaudit)
ifneq ($(feature-libaudit), 1)
msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
NO_LIBAUDIT := 1
@@ -694,12 +723,14 @@ ifndef NO_SLANG
endif
endif
-ifndef NO_GTK2
+ifdef GTK2
FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
+ $(call feature_check,gtk2)
ifneq ($(feature-gtk2), 1)
msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
NO_GTK2 := 1
else
+ $(call feature_check,gtk2-infobar)
ifeq ($(feature-gtk2-infobar), 1)
GTK_CFLAGS := -DHAVE_GTK_INFO_BAR_SUPPORT
endif
@@ -795,6 +826,12 @@ else
$(call feature_check,disassembler-four-args)
endif
+ifeq ($(feature-libbfd-buildid), 1)
+ CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
+else
+ msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available);
+endif
+
ifdef NO_DEMANGLE
CFLAGS += -DNO_DEMANGLE
else
@@ -1012,6 +1049,19 @@ ifdef LIBCLANGLLVM
endif
endif
+ifdef LIBPFM4
+ $(call feature_check,libpfm4)
+ ifeq ($(feature-libpfm4), 1)
+ CFLAGS += -DHAVE_LIBPFM
+ EXTLIBS += -lpfm
+ ASCIIDOC_EXTRA = -aHAVE_LIBPFM=1
+ $(call detected,CONFIG_LIBPFM4)
+ else
+ msg := $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev);
+ NO_LIBPFM4 := 1
+ endif
+endif
+
# Among the variables below, these:
# perfexecdir
# perf_include_dir
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index d15a311408f1..7ce3f2e8b9c7 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -48,7 +48,7 @@ include ../scripts/utilities.mak
#
# Define NO_SLANG if you do not want TUI support.
#
-# Define NO_GTK2 if you do not want GTK+ GUI support.
+# Define GTK2 if you want GTK+ GUI support.
#
# Define NO_DEMANGLE if you do not want C++ symbol demangling.
#
@@ -118,6 +118,14 @@ include ../scripts/utilities.mak
#
# Define LIBBPF_DYNAMIC to enable libbpf dynamic linking.
#
+# Define NO_SYSCALL_TABLE=1 to disable the use of syscall id to/from name tables
+# generated from the kernel .tbl or unistd.h files and use, if available, libaudit
+# for doing the conversions to/from strings/id.
+#
+# Define LIBPFM4 to enable libpfm4 events extension.
+#
+# Define NO_LIBDEBUGINFOD if you do not want support debuginfod
+#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
@@ -188,7 +196,7 @@ AWK = awk
# non-config cases
config := 1
-NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
+NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help
ifdef MAKECMDGOALS
ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
@@ -278,6 +286,7 @@ strip-libs = $(filter-out -l%,$(1))
ifneq ($(OUTPUT),)
TE_PATH=$(OUTPUT)
+ PLUGINS_PATH=$(OUTPUT)
BPF_PATH=$(OUTPUT)
SUBCMD_PATH=$(OUTPUT)
LIBPERF_PATH=$(OUTPUT)
@@ -288,6 +297,7 @@ else
endif
else
TE_PATH=$(TRACE_EVENT_DIR)
+ PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/
API_PATH=$(LIB_DIR)
BPF_PATH=$(BPF_DIR)
SUBCMD_PATH=$(SUBCMD_DIR)
@@ -297,7 +307,7 @@ endif
LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
export LIBTRACEEVENT
-LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)plugins/libtraceevent-dynamic-list
+LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list
#
# The static build has no dynsym table, so this does not work for
@@ -376,7 +386,7 @@ ifneq ($(OUTPUT),)
CFLAGS += -I$(OUTPUT)
endif
-ifndef NO_GTK2
+ifdef GTK2
ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
GTK_IN := $(OUTPUT)gtk-in.o
endif
@@ -410,6 +420,7 @@ export INSTALL SHELL_PATH
SHELL = $(SHELL_PATH)
+beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/
linux_uapi_dir := $(srctree)/tools/include/uapi/linux
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
@@ -493,6 +504,12 @@ socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
$(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
+socket_arrays := $(beauty_outdir)/socket_arrays.c
+socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
+
+$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl)
+ $(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@
+
vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -520,6 +537,12 @@ mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
$(mmap_flags_array): $(linux_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl)
$(Q)$(SHELL) '$(mmap_flags_tbl)' $(linux_uapi_dir) $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+mremap_flags_array := $(beauty_outdir)/mremap_flags_array.c
+mremap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mremap_flags.sh
+
+$(mremap_flags_array): $(linux_uapi_dir)/mman.h $(mremap_flags_tbl)
+ $(Q)$(SHELL) '$(mremap_flags_tbl)' $(linux_uapi_dir) > $@
+
mount_flags_array := $(beauty_outdir)/mount_flags_array.c
mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
@@ -532,6 +555,13 @@ move_mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/move_mount_flags.sh
$(move_mount_flags_array): $(linux_uapi_dir)/fs.h $(move_mount_flags_tbl)
$(Q)$(SHELL) '$(move_mount_flags_tbl)' $(linux_uapi_dir) > $@
+
+mmap_prot_array := $(beauty_outdir)/mmap_prot_array.c
+mmap_prot_tbl := $(srctree)/tools/perf/trace/beauty/mmap_prot.sh
+
+$(mmap_prot_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_prot_tbl)
+ $(Q)$(SHELL) '$(mmap_prot_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+
prctl_option_array := $(beauty_outdir)/prctl_option_array.c
prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
@@ -689,9 +719,12 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(kcmp_type_array) \
$(kvm_ioctl_array) \
$(socket_ipproto_array) \
+ $(socket_arrays) \
$(vhost_virtio_ioctl_array) \
$(madvise_behavior_array) \
$(mmap_flags_array) \
+ $(mmap_prot_array) \
+ $(mremap_flags_array) \
$(mount_flags_array) \
$(move_mount_flags_array) \
$(perf_ioctl_array) \
@@ -756,10 +789,10 @@ $(LIBTRACEEVENT): FORCE
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
libtraceevent_plugins: FORCE
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)plugins/libtraceevent-dynamic-list
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
$(LIBTRACEEVENT)-clean:
$(call QUIET_CLEAN, libtraceevent)
@@ -832,7 +865,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
# 'make doc' should call 'make -C Documentation all'
$(DOC_TARGETS):
- $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all)
+ $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA)
TAG_FOLDERS= . ../lib ../include
TAG_FILES= ../../include/uapi/linux/perf_event.h
@@ -868,7 +901,7 @@ check: $(OUTPUT)common-cmds.h
### Installation rules
-ifndef NO_GTK2
+ifdef GTK2
install-gtk: $(OUTPUT)libperf-gtk.so
$(call QUIET_INSTALL, 'GTK UI') \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
@@ -943,6 +976,7 @@ install-tests: all install-gtk
$(call QUIET_INSTALL, tests) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) tests/pe-file.exe* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
@@ -959,7 +993,7 @@ install-python_ext:
# 'make install-doc' should call 'make -C Documentation install'
$(INSTALL_DOC_TARGETS):
- $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=)
+ $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA)
### Cleaning rules
@@ -989,6 +1023,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(fspick_arrays) \
$(OUTPUT)$(madvise_behavior_array) \
$(OUTPUT)$(mmap_flags_array) \
+ $(OUTPUT)$(mmap_prot_array) \
+ $(OUTPUT)$(mremap_flags_array) \
$(OUTPUT)$(mount_flags_array) \
$(OUTPUT)$(move_mount_flags_array) \
$(OUTPUT)$(drm_ioctl_array) \
@@ -998,6 +1034,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(kvm_ioctl_array) \
$(OUTPUT)$(kcmp_type_array) \
$(OUTPUT)$(socket_ipproto_array) \
+ $(OUTPUT)$(socket_arrays) \
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 0a6e75b8777a..b187bddbd01a 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -56,34 +56,33 @@ struct auxtrace_record
struct perf_pmu *cs_etm_pmu;
struct evsel *evsel;
bool found_etm = false;
- bool found_spe = false;
- static struct perf_pmu **arm_spe_pmus = NULL;
- static int nr_spes = 0;
+ struct perf_pmu *found_spe = NULL;
+ struct perf_pmu **arm_spe_pmus = NULL;
+ int nr_spes = 0;
int i = 0;
if (!evlist)
return NULL;
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
-
- if (!arm_spe_pmus)
- arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
+ arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
evlist__for_each_entry(evlist, evsel) {
if (cs_etm_pmu &&
evsel->core.attr.type == cs_etm_pmu->type)
found_etm = true;
- if (!nr_spes)
+ if (!nr_spes || found_spe)
continue;
for (i = 0; i < nr_spes; i++) {
if (evsel->core.attr.type == arm_spe_pmus[i]->type) {
- found_spe = true;
+ found_spe = arm_spe_pmus[i];
break;
}
}
}
+ free(arm_spe_pmus);
if (found_etm && found_spe) {
pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
@@ -96,7 +95,7 @@ struct auxtrace_record
#if defined(__aarch64__)
if (found_spe)
- return arm_spe_recording_init(err, arm_spe_pmus[i]);
+ return arm_spe_recording_init(err, found_spe);
#endif
/*
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 941f814820b8..cad7bf783413 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -23,6 +23,7 @@
#include "../../util/event.h"
#include "../../util/evlist.h"
#include "../../util/evsel.h"
+#include "../../util/perf_api_probe.h"
#include "../../util/evsel_config.h"
#include "../../util/pmu.h"
#include "../../util/cs-etm.h"
@@ -215,7 +216,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
struct evsel *evsel)
{
char msg[BUFSIZ], path[PATH_MAX], *sink;
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
int ret = -EINVAL;
u32 hash;
@@ -223,7 +224,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
return 0;
list_for_each_entry(term, &evsel->config_terms, list) {
- if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
+ if (term->type != EVSEL__CONFIG_TERM_DRV_CFG)
continue;
sink = term->val.str;
@@ -232,7 +233,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
ret = perf_pmu__scan_file(pmu, path, "%x", &hash);
if (ret != 1) {
pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n",
- sink, perf_evsel__name(evsel), errno,
+ sink, evsel__name(evsel), errno,
str_error_r(errno, msg, sizeof(msg)));
return ret;
}
@@ -242,10 +243,10 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
}
/*
- * No sink was provided on the command line - for _now_ treat
- * this as an error.
+ * No sink was provided on the command line - allow the CoreSight
+ * system to look for a default
*/
- return ret;
+ return 0;
}
static int cs_etm_recording_options(struct auxtrace_record *itr,
@@ -264,7 +265,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
ptr->evlist = evlist;
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
- if (perf_can_record_switch_events())
+ if (!record_opts__no_switch_events(opts) &&
+ perf_can_record_switch_events())
opts->record_switch_events = true;
evlist__for_each_entry(evlist, evsel) {
@@ -401,7 +403,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
* when a context switch happened.
*/
if (!perf_cpu_map__empty(cpus)) {
- perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
+ evsel__set_sample_bit(cs_etm_evsel, CPU);
err = cs_etm_set_option(itr, cs_etm_evsel,
ETM_OPT_CTXTID | ETM_OPT_TS);
@@ -425,7 +427,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
/* In per-cpu case, always need the time of mmap events etc */
if (!perf_cpu_map__empty(cpus))
- perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ evsel__set_sample_bit(tracking_evsel, TIME);
}
out:
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 5c13438c7bd4..b53294d74b01 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,6 +1,7 @@
perf-y += header.o
perf-y += machine.o
perf-y += perf_regs.o
+perf-y += tsc.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 27653be24447..e3593063b3d1 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -120,9 +120,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
*/
perf_evlist__to_front(evlist, arm_spe_evsel);
- perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
- perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
- perf_evsel__set_sample_bit(arm_spe_evsel, TID);
+ evsel__set_sample_bit(arm_spe_evsel, CPU);
+ evsel__set_sample_bit(arm_spe_evsel, TIME);
+ evsel__set_sample_bit(arm_spe_evsel, TID);
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
@@ -134,9 +134,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
tracking_evsel->core.attr.freq = 0;
tracking_evsel->core.attr.sample_period = 1;
- perf_evsel__set_sample_bit(tracking_evsel, TIME);
- perf_evsel__set_sample_bit(tracking_evsel, CPU);
- perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+ evsel__set_sample_bit(tracking_evsel, TIME);
+ evsel__set_sample_bit(tracking_evsel, CPU);
+ evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
return 0;
}
diff --git a/tools/perf/arch/arm64/util/tsc.c b/tools/perf/arch/arm64/util/tsc.c
new file mode 100644
index 000000000000..cc85bd9e73f1
--- /dev/null
+++ b/tools/perf/arch/arm64/util/tsc.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+
+#include "../../../util/tsc.h"
+
+u64 rdtsc(void)
+{
+ u64 val;
+
+ /*
+ * According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the
+ * system counter is at least 56 bits wide; from Armv8.6, the counter
+ * must be 64 bits wide. So the system counter could be less than 64
+ * bits wide and it is attributed with the flag 'cap_user_time_short'
+ * is true.
+ */
+ asm volatile("mrs %0, cntvct_el0" : "=r" (val));
+
+ return val;
+}
diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c
index 7623d85e77f3..a50941629649 100644
--- a/tools/perf/arch/arm64/util/unwind-libdw.c
+++ b/tools/perf/arch/arm64/util/unwind-libdw.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 35b61bfc1b1a..b168364ac050 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -32,7 +32,7 @@
18 spu oldstat sys_ni_syscall
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid
-21 nospu mount sys_mount compat_sys_mount
+21 nospu mount sys_mount
22 32 umount sys_oldumount
22 64 umount sys_ni_syscall
22 spu umount sys_ni_syscall
@@ -189,11 +189,11 @@
142 common _newselect sys_select compat_sys_select
143 common flock sys_flock
144 common msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 nospu _sysctl sys_sysctl compat_sys_sysctl
+149 nospu _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
@@ -363,7 +363,7 @@
282 common unshare sys_unshare
283 common splice sys_splice
284 common tee sys_tee
-285 common vmsplice sys_vmsplice compat_sys_vmsplice
+285 common vmsplice sys_vmsplice
286 common openat sys_openat compat_sys_openat
287 common mkdirat sys_mkdirat
288 common mknodat sys_mknodat
@@ -427,8 +427,8 @@
336 common recv sys_recv compat_sys_recv
337 common recvfrom sys_recvfrom compat_sys_recvfrom
338 common shutdown sys_shutdown
-339 common setsockopt sys_setsockopt compat_sys_setsockopt
-340 common getsockopt sys_getsockopt compat_sys_getsockopt
+339 common setsockopt sys_setsockopt sys_setsockopt
+340 common getsockopt sys_getsockopt sys_getsockopt
341 common sendmsg sys_sendmsg compat_sys_sendmsg
342 common recvmsg sys_recvmsg compat_sys_recvmsg
343 32 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
@@ -443,8 +443,8 @@
348 common syncfs sys_syncfs
349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
350 common setns sys_setns
-351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+351 nospu process_vm_readv sys_process_vm_readv
+352 nospu process_vm_writev sys_process_vm_writev
353 nospu finit_module sys_finit_module
354 nospu kcmp sys_kcmp
355 common sched_setattr sys_sched_setattr
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index e18a3556f5e3..63f3ac91049f 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -64,7 +64,13 @@ static const char *reg_names[] = {
[PERF_REG_POWERPC_DAR] = "dar",
[PERF_REG_POWERPC_DSISR] = "dsisr",
[PERF_REG_POWERPC_SIER] = "sier",
- [PERF_REG_POWERPC_MMCRA] = "mmcra"
+ [PERF_REG_POWERPC_MMCRA] = "mmcra",
+ [PERF_REG_POWERPC_MMCR0] = "mmcr0",
+ [PERF_REG_POWERPC_MMCR1] = "mmcr1",
+ [PERF_REG_POWERPC_MMCR2] = "mmcr2",
+ [PERF_REG_POWERPC_MMCR3] = "mmcr3",
+ [PERF_REG_POWERPC_SIER2] = "sier2",
+ [PERF_REG_POWERPC_SIER3] = "sier3",
};
static inline const char *perf_reg_name(int id)
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index e5c9504f8586..e86e210bf514 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -2,6 +2,7 @@ perf-y += header.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += mem-events.o
+perf-y += sym-handling.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
index 54cfa0530e86..488f4339b83c 100644
--- a/tools/perf/arch/powerpc/util/book3s_hcalls.h
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -84,7 +84,7 @@
{0x1a4, "H_CREATE_RPT"}, \
{0x1a8, "H_REMOVE_RPT"}, \
{0x1ac, "H_REGISTER_RPAGES"}, \
- {0x1b0, "H_DISABLE_AND_GETC"}, \
+ {0x1b0, "H_DISABLE_AND_GET"}, \
{0x1b4, "H_ERROR_DATA"}, \
{0x1b8, "H_GET_HCA_INFO"}, \
{0x1bc, "H_GET_PERF_COUNT"}, \
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 3b4cdfc5efd6..58b2d610aadb 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -7,14 +7,9 @@
#include <string.h>
#include <linux/stringify.h>
#include "header.h"
-
-#define mfspr(rn) ({unsigned long rval; \
- asm volatile("mfspr %0," __stringify(rn) \
- : "=r" (rval)); rval; })
-
-#define SPRN_PVR 0x11F /* Processor Version Register */
-#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
-#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
+#include "utils_header.h"
+#include "metricgroup.h"
+#include <api/fs/fs.h>
int
get_cpuid(char *buffer, size_t sz)
@@ -44,3 +39,12 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
return bufp;
}
+
+int arch_get_runtimeparam(struct pmu_event *pe)
+{
+ int count;
+ char path[PATH_MAX] = "/devices/hv_24x7/interface/";
+
+ atoi(pe->aggr_mode) == PerChip ? strcat(path, "sockets") : strcat(path, "coresperchip");
+ return sysfs__read_int(path, &count) < 0 ? 1 : count;
+}
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
index 16807269317c..eed9e5a42935 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -39,7 +39,7 @@ static void hcall_event_get_key(struct evsel *evsel,
struct event_key *key)
{
key->info = 0;
- key->key = perf_evsel__intval(evsel, sample, "req");
+ key->key = evsel__intval(evsel, sample, "req");
}
static const char *get_hcall_exit_reason(u64 exit_code)
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index 0a5242900248..2b6d4704e3aa 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -6,9 +6,16 @@
#include "../../../util/perf_regs.h"
#include "../../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/header.h"
+#include "../../../perf-sys.h"
+#include "utils_header.h"
#include <linux/kernel.h>
+#define PVR_POWER9 0x004E
+#define PVR_POWER10 0x0080
+
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(r0, PERF_REG_POWERPC_R0),
SMPL_REG(r1, PERF_REG_POWERPC_R1),
@@ -55,6 +62,12 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
SMPL_REG(sier, PERF_REG_POWERPC_SIER),
SMPL_REG(mmcra, PERF_REG_POWERPC_MMCRA),
+ SMPL_REG(mmcr0, PERF_REG_POWERPC_MMCR0),
+ SMPL_REG(mmcr1, PERF_REG_POWERPC_MMCR1),
+ SMPL_REG(mmcr2, PERF_REG_POWERPC_MMCR2),
+ SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3),
+ SMPL_REG(sier2, PERF_REG_POWERPC_SIER2),
+ SMPL_REG(sier3, PERF_REG_POWERPC_SIER3),
SMPL_REG_END
};
@@ -163,3 +176,45 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
+
+uint64_t arch__intr_reg_mask(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_REGS_INTR,
+ .precise_ip = 1,
+ .disabled = 1,
+ .exclude_kernel = 1,
+ };
+ int fd;
+ u32 version;
+ u64 extended_mask = 0, mask = PERF_REGS_MASK;
+
+ /*
+ * Get the PVR value to set the extended
+ * mask specific to platform.
+ */
+ version = (((mfspr(SPRN_PVR)) >> 16) & 0xFFFF);
+ if (version == PVR_POWER9)
+ extended_mask = PERF_REG_PMU_MASK_300;
+ else if (version == PVR_POWER10)
+ extended_mask = PERF_REG_PMU_MASK_31;
+ else
+ return mask;
+
+ attr.sample_regs_intr = extended_mask;
+ attr.sample_period = 1;
+ event_attr_init(&attr);
+
+ /*
+ * check if the pmu supports perf extended regs, before
+ * returning the register mask to sample.
+ */
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd != -1) {
+ close(fd);
+ mask |= extended_mask;
+ }
+ return mask;
+}
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c
index abf2dbc7f829..7b2d96ec28e3 100644
--- a/tools/perf/arch/powerpc/util/unwind-libdw.c
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
#include <linux/kernel.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */
static const int special_regs[3][2] = {
diff --git a/tools/perf/arch/powerpc/util/utils_header.h b/tools/perf/arch/powerpc/util/utils_header.h
new file mode 100644
index 000000000000..5788eb1f1fe3
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/utils_header.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UTIL_HEADER_H
+#define __PERF_UTIL_HEADER_H
+
+#include <linux/stringify.h>
+
+#define mfspr(rn) ({unsigned long rval; \
+ asm volatile("mfspr %0," __stringify(rn) \
+ : "=r" (rval)); rval; })
+
+#define SPRN_PVR 0x11F /* Processor Version Register */
+#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
+#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
+
+#endif /* __PERF_UTIL_HEADER_H */
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index b38d48464368..d2fa9647ce25 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -26,7 +26,7 @@
16 32 lchown - compat_sys_s390_lchown16
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid sys_getpid
-21 common mount sys_mount compat_sys_mount
+21 common mount sys_mount
22 common umount sys_oldumount compat_sys_oldumount
23 32 setuid - compat_sys_s390_setuid16
24 32 getuid - compat_sys_s390_getuid16
@@ -134,11 +134,11 @@
142 64 select sys_select -
143 common flock sys_flock sys_flock
144 common msync sys_msync compat_sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl compat_sys_sysctl
+149 common _sysctl - -
150 common mlock sys_mlock compat_sys_mlock
151 common munlock sys_munlock compat_sys_munlock
152 common mlockall sys_mlockall sys_mlockall
@@ -316,7 +316,7 @@
306 common splice sys_splice compat_sys_splice
307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
308 common tee sys_tee compat_sys_tee
-309 common vmsplice sys_vmsplice compat_sys_vmsplice
+309 common vmsplice sys_vmsplice sys_vmsplice
310 common move_pages sys_move_pages compat_sys_move_pages
311 common getcpu sys_getcpu compat_sys_getcpu
312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
@@ -347,8 +347,8 @@
337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
338 common syncfs sys_syncfs sys_syncfs
339 common setns sys_setns sys_setns
-340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev
342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
343 common kcmp sys_kcmp compat_sys_kcmp
344 common finit_module sys_finit_module compat_sys_finit_module
@@ -372,8 +372,8 @@
362 common connect sys_connect compat_sys_connect
363 common listen sys_listen sys_listen
364 common accept4 sys_accept4 compat_sys_accept4
-365 common getsockopt sys_getsockopt compat_sys_getsockopt
-366 common setsockopt sys_setsockopt compat_sys_setsockopt
+365 common getsockopt sys_getsockopt sys_getsockopt
+366 common setsockopt sys_setsockopt sys_setsockopt
367 common getsockname sys_getsockname compat_sys_getsockname
368 common getpeername sys_getpeername compat_sys_getpeername
369 common sendto sys_sendto compat_sys_sendto
diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c
index 0fd4e9f49ed0..34da89ced29a 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -30,7 +30,7 @@ static void event_icpt_insn_get_key(struct evsel *evsel,
{
unsigned long insn;
- insn = perf_evsel__intval(evsel, sample, "instruction");
+ insn = evsel__intval(evsel, sample, "instruction");
key->key = icpt_insn_decoder(insn);
key->exit_reasons = sie_icpt_insn_codes;
}
@@ -39,7 +39,7 @@ static void event_sigp_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "order_code");
+ key->key = evsel__intval(evsel, sample, "order_code");
key->exit_reasons = sie_sigp_order_codes;
}
@@ -47,7 +47,7 @@ static void event_diag_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "code");
+ key->key = evsel__intval(evsel, sample, "code");
key->exit_reasons = sie_diagnose_codes;
}
@@ -55,7 +55,7 @@ static void event_icpt_prog_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "code");
+ key->key = evsel__intval(evsel, sample, "code");
key->exit_reasons = sie_icpt_prog_codes;
}
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 7eb5621c021d..24ea12ec7e02 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -110,6 +110,7 @@ static struct ins x86__instructions[] = {
{ .name = "por", .ops = &mov_ops, },
{ .name = "rclb", .ops = &mov_ops, },
{ .name = "rcll", .ops = &mov_ops, },
+ { .name = "ret", .ops = &ret_ops, },
{ .name = "retq", .ops = &ret_ops, },
{ .name = "sbb", .ops = &mov_ops, },
{ .name = "sbbl", .ops = &mov_ops, },
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 37b844f839bc..347809649ba2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -164,7 +164,7 @@
153 common vhangup sys_vhangup
154 common modify_ldt sys_modify_ldt
155 common pivot_root sys_pivot_root
-156 64 _sysctl sys_sysctl
+156 64 _sysctl sys_ni_syscall
157 common prctl sys_prctl
158 common arch_prctl sys_arch_prctl
159 common adjtimex sys_adjtimex
@@ -357,8 +357,10 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
+436 common close_range sys_close_range
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -369,8 +371,8 @@
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
+515 x32 readv sys_readv
+516 x32 writev sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
@@ -386,17 +388,17 @@
529 x32 waitid compat_sys_waitid
530 x32 set_robust_list compat_sys_set_robust_list
531 x32 get_robust_list compat_sys_get_robust_list
-532 x32 vmsplice compat_sys_vmsplice
+532 x32 vmsplice sys_vmsplice
533 x32 move_pages compat_sys_move_pages
534 x32 preadv compat_sys_preadv64
535 x32 pwritev compat_sys_pwritev64
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
537 x32 recvmmsg compat_sys_recvmmsg_time64
538 x32 sendmmsg compat_sys_sendmmsg
-539 x32 process_vm_readv compat_sys_process_vm_readv
-540 x32 process_vm_writev compat_sys_process_vm_writev
-541 x32 setsockopt compat_sys_setsockopt
-542 x32 getsockopt compat_sys_getsockopt
+539 x32 process_vm_readv sys_process_vm_readv
+540 x32 process_vm_writev sys_process_vm_writev
+541 x32 setsockopt sys_setsockopt
+542 x32 getsockopt sys_getsockopt
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
545 x32 execveat compat_sys_execveat
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index ef43be9b6ec2..4e40402a4f81 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -55,6 +55,14 @@ int test__arch_unwind_sample(struct perf_sample *sample,
return -1;
}
+#ifdef MEMORY_SANITIZER
+ /*
+ * Assignments to buf in the assembly function perf_regs_load aren't
+ * seen by memory sanitizer. Zero the memory to convince memory
+ * sanitizer the memory is initialized.
+ */
+ memset(buf, 0, sizeof(u64) * PERF_REGS_MAX);
+#endif
perf_regs_load(buf);
regs->abi = PERF_SAMPLE_REGS_ABI;
regs->regs = buf;
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 909ead08a6f6..026d32ed078e 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -130,13 +130,11 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
goto next_event;
if (strcmp(event->comm.comm, comm1) == 0) {
- CHECK__(perf_evsel__parse_sample(evsel, event,
- &sample));
+ CHECK__(evsel__parse_sample(evsel, event, &sample));
comm1_time = sample.time;
}
if (strcmp(event->comm.comm, comm2) == 0) {
- CHECK__(perf_evsel__parse_sample(evsel, event,
- &sample));
+ CHECK__(evsel__parse_sample(evsel, event, &sample));
comm2_time = sample.time;
}
next_event:
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 47f9c56e744f..347c39b960eb 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,7 +3,7 @@ perf-y += tsc.o
perf-y += pmu.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
-perf-y += group.o
+perf-y += topdown.o
perf-y += machine.o
perf-y += event.o
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
deleted file mode 100644
index e2f8034b8973..000000000000
--- a/tools/perf/arch/x86/util/group.c
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include "api/fs/fs.h"
-#include "util/group.h"
-
-/*
- * Check whether we can use a group for top down.
- * Without a group may get bad results due to multiplexing.
- */
-bool arch_topdown_check_group(bool *warn)
-{
- int n;
-
- if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
- return false;
- if (n > 0) {
- *warn = true;
- return false;
- }
- return true;
-}
-
-void arch_topdown_group_warn(void)
-{
- fprintf(stderr,
- "nmi_watchdog enabled with topdown. May give wrong results.\n"
- "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
-}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 09f93800bffd..0dc09b5809c1 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
* AUX event.
*/
if (!perf_cpu_map__empty(cpus))
- perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
+ evsel__set_sample_bit(intel_bts_evsel, CPU);
}
/* Add dummy event to keep tracking */
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 1643aed8c4c8..082e5f2a415a 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -25,6 +25,7 @@
#include "../../../util/pmu.h"
#include "../../../util/debug.h"
#include "../../../util/auxtrace.h"
+#include "../../../util/perf_api_probe.h"
#include "../../../util/record.h"
#include "../../../util/target.h"
#include "../../../util/tsc.h"
@@ -58,7 +59,8 @@ struct intel_pt_recording {
size_t priv_size;
};
-static int intel_pt_parse_terms_with_default(struct list_head *formats,
+static int intel_pt_parse_terms_with_default(const char *pmu_name,
+ struct list_head *formats,
const char *str,
u64 *config)
{
@@ -77,7 +79,8 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats,
goto out_free;
attr.config = *config;
- err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
+ err = perf_pmu__config_terms(pmu_name, formats, &attr, terms, true,
+ NULL);
if (err)
goto out_free;
@@ -87,11 +90,12 @@ out_free:
return err;
}
-static int intel_pt_parse_terms(struct list_head *formats, const char *str,
- u64 *config)
+static int intel_pt_parse_terms(const char *pmu_name, struct list_head *formats,
+ const char *str, u64 *config)
{
*config = 0;
- return intel_pt_parse_terms_with_default(formats, str, config);
+ return intel_pt_parse_terms_with_default(pmu_name, formats, str,
+ config);
}
static u64 intel_pt_masked_bits(u64 mask, u64 bits)
@@ -228,7 +232,8 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
- intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, buf,
+ &config);
return config;
}
@@ -336,13 +341,16 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
if (priv_size != ptr->priv_size)
return -EINVAL;
- intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
- intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
- &noretcomp_bit);
- intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "tsc", &tsc_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "noretcomp", &noretcomp_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "mtc", &mtc_bit);
mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
"mtc_period");
- intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "cyc", &cyc_bit);
intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
@@ -420,8 +428,8 @@ static int intel_pt_track_switches(struct evlist *evlist)
evsel = evlist__last(evlist);
- perf_evsel__set_sample_bit(evsel, CPU);
- perf_evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, TIME);
evsel->core.system_wide = true;
evsel->no_aux_samples = true;
@@ -555,10 +563,9 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu,
struct evsel *evsel)
{
- struct perf_evsel_config_term *term;
u64 user_bits = 0, bits;
+ struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG);
- term = perf_evsel__get_config_term(evsel, CFG_CHG);
if (term)
user_bits = term->val.cfg_chg;
@@ -634,6 +641,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
evsel->core.attr.freq = 0;
evsel->core.attr.sample_period = 1;
+ evsel->no_aux_samples = true;
intel_pt_evsel = evsel;
opts->full_auxtrace = true;
}
@@ -768,7 +776,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
}
- intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "tsc", &tsc_bit);
if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit))
have_timing_info = true;
@@ -779,7 +788,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
- if (have_timing_info && !perf_cpu_map__empty(cpus)) {
+ if (have_timing_info && !perf_cpu_map__empty(cpus) &&
+ !record_opts__no_switch_events(opts)) {
if (perf_can_record_switch_events()) {
bool cpu_wide = !target__none(&opts->target) &&
!target__has_task(&opts->target);
@@ -801,10 +811,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
switch_evsel->no_aux_samples = true;
switch_evsel->immediate = true;
- perf_evsel__set_sample_bit(switch_evsel, TID);
- perf_evsel__set_sample_bit(switch_evsel, TIME);
- perf_evsel__set_sample_bit(switch_evsel, CPU);
- perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
+ evsel__set_sample_bit(switch_evsel, TID);
+ evsel__set_sample_bit(switch_evsel, TIME);
+ evsel__set_sample_bit(switch_evsel, CPU);
+ evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
opts->record_switch_events = false;
ptr->have_sched_switch = 3;
@@ -827,6 +837,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
}
+ if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel &&
+ perf_can_record_text_poke_events() && perf_can_record_cpu_wide())
+ opts->text_poke = true;
+
if (intel_pt_evsel) {
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace
@@ -838,7 +852,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* AUX event.
*/
if (!perf_cpu_map__empty(cpus))
- perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
+ evsel__set_sample_bit(intel_pt_evsel, CPU);
}
/* Add dummy event to keep tracking */
@@ -862,11 +876,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
/* In per-cpu case, always need the time of mmap events etc */
if (!perf_cpu_map__empty(cpus)) {
- perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ evsel__set_sample_bit(tracking_evsel, TIME);
/* And the CPU for switch events */
- perf_evsel__set_sample_bit(tracking_evsel, CPU);
+ evsel__set_sample_bit(tracking_evsel, CPU);
}
- perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+ evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
}
/*
@@ -874,7 +888,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* per-cpu with no sched_switch (except workload-only).
*/
if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
- !target__none(&opts->target))
+ !target__none(&opts->target) &&
+ !intel_pt_evsel->core.attr.exclude_user)
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
return 0;
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index c0775c39227f..072920475b65 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -31,8 +31,8 @@ const char *kvm_exit_trace = "kvm:kvm_exit";
static void mmio_event_get_key(struct evsel *evsel, struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "gpa");
- key->info = perf_evsel__intval(evsel, sample, "type");
+ key->key = evsel__intval(evsel, sample, "gpa");
+ key->info = evsel__intval(evsel, sample, "type");
}
#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
@@ -48,7 +48,7 @@ static bool mmio_event_begin(struct evsel *evsel,
/* MMIO write begin event in kernel. */
if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
- perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
+ evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
mmio_event_get_key(evsel, sample, key);
return true;
}
@@ -65,7 +65,7 @@ static bool mmio_event_end(struct evsel *evsel, struct perf_sample *sample,
/* MMIO read end event in kernel.*/
if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
- perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
+ evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
mmio_event_get_key(evsel, sample, key);
return true;
}
@@ -94,8 +94,8 @@ static void ioport_event_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "port");
- key->info = perf_evsel__intval(evsel, sample, "rw");
+ key->key = evsel__intval(evsel, sample, "port");
+ key->info = evsel__intval(evsel, sample, "rw");
}
static bool ioport_event_begin(struct evsel *evsel,
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
new file mode 100644
index 000000000000..2f3d96aa92a5
--- /dev/null
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/pmu.h"
+#include "util/topdown.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+ int n;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+ return false;
+ if (n > 0) {
+ *warn = true;
+ return false;
+ }
+ return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+ fprintf(stderr,
+ "nmi_watchdog enabled with topdown. May give wrong results.\n"
+ "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
+
+#define TOPDOWN_SLOTS 0x0400
+
+static bool is_topdown_slots_event(struct evsel *counter)
+{
+ if (!counter->pmu_name)
+ return false;
+
+ if (strcmp(counter->pmu_name, "cpu"))
+ return false;
+
+ if (counter->core.attr.config == TOPDOWN_SLOTS)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check whether a topdown group supports sample-read.
+ *
+ * Only Topdown metic supports sample-read. The slots
+ * event must be the leader of the topdown group.
+ */
+
+bool arch_topdown_sample_read(struct evsel *leader)
+{
+ if (!pmu_have_event("cpu", "slots"))
+ return false;
+
+ if (is_topdown_slots_event(leader))
+ return true;
+
+ return false;
+}
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 2f55afb14e1f..559365f8fe52 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -1,45 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stdbool.h>
-#include <errno.h>
-
-#include <linux/stddef.h>
-#include <linux/perf_event.h>
-
#include <linux/types.h>
-#include <asm/barrier.h>
-#include "../../../util/debug.h"
-#include "../../../util/event.h"
-#include "../../../util/synthetic-events.h"
-#include "../../../util/tsc.h"
-
-int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
- struct perf_tsc_conversion *tc)
-{
- bool cap_user_time_zero;
- u32 seq;
- int i = 0;
-
- while (1) {
- seq = pc->lock;
- rmb();
- tc->time_mult = pc->time_mult;
- tc->time_shift = pc->time_shift;
- tc->time_zero = pc->time_zero;
- cap_user_time_zero = pc->cap_user_time_zero;
- rmb();
- if (pc->lock == seq && !(seq & 1))
- break;
- if (++i > 10000) {
- pr_debug("failed to get perf_event_mmap_page lock\n");
- return -EINVAL;
- }
- }
- if (!cap_user_time_zero)
- return -EOPNOTSUPP;
-
- return 0;
-}
+#include "../../../util/tsc.h"
u64 rdtsc(void)
{
@@ -49,36 +11,3 @@ u64 rdtsc(void)
return low | ((u64)high) << 32;
}
-
-int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
- struct perf_tool *tool,
- perf_event__handler_t process,
- struct machine *machine)
-{
- union perf_event event = {
- .time_conv = {
- .header = {
- .type = PERF_RECORD_TIME_CONV,
- .size = sizeof(struct perf_record_time_conv),
- },
- },
- };
- struct perf_tsc_conversion tc;
- int err;
-
- if (!pc)
- return 0;
- err = perf_read_tsc_conversion(pc, &tc);
- if (err == -EOPNOTSUPP)
- return 0;
- if (err)
- return err;
-
- pr_debug2("Synthesizing TSC conversion information\n");
-
- event.time_conv.time_mult = tc.time_mult;
- event.time_conv.time_shift = tc.time_shift;
- event.time_conv.time_zero = tc.time_zero;
-
- return process(tool, &event, NULL, machine);
-}
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
index fda8f4206ee4..eea2bf87232b 100644
--- a/tools/perf/arch/x86/util/unwind-libdw.c
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index e4e321b6f883..e43f46931b41 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -1,16 +1,19 @@
perf-y += sched-messaging.o
perf-y += sched-pipe.o
+perf-y += syscall.o
perf-y += mem-functions.o
perf-y += futex-hash.o
perf-y += futex-wake.o
perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
perf-y += futex-lock-pi.o
-
perf-y += epoll-wait.o
perf-y += epoll-ctl.o
+perf-y += synthesize.o
+perf-y += kallsyms-parse.o
+perf-y += find-bit-bench.o
+perf-y += inject-buildid.o
-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 4aa6de1aa67d..eac36afab2b3 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,17 +33,21 @@ extern struct timeval bench__start, bench__end, bench__runtime;
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);
int bench_sched_pipe(int argc, const char **argv);
+int bench_syscall_basic(int argc, const char **argv);
int bench_mem_memcpy(int argc, const char **argv);
int bench_mem_memset(int argc, const char **argv);
+int bench_mem_find_bit(int argc, const char **argv);
int bench_futex_hash(int argc, const char **argv);
int bench_futex_wake(int argc, const char **argv);
int bench_futex_wake_parallel(int argc, const char **argv);
int bench_futex_requeue(int argc, const char **argv);
/* pi futexes */
int bench_futex_lock_pi(int argc, const char **argv);
-
int bench_epoll_wait(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);
+int bench_synthesize(int argc, const char **argv);
+int bench_kallsyms_parse(int argc, const char **argv);
+int bench_inject_build_id(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index cadc18d42aa4..ca2d591aad8a 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -5,7 +5,7 @@
* Benchmark the various operations allowed for epoll_ctl(2).
* The idea is to concurrently stress a single epoll instance
*/
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
/* For the CLR_() macros */
#include <string.h>
#include <pthread.h>
@@ -412,4 +412,4 @@ int bench_epoll_ctl(int argc, const char **argv)
errmem:
err(EXIT_FAILURE, "calloc");
}
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index f938c585d512..75dca9773186 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
/*
* Copyright (C) 2018 Davidlohr Bueso.
*
@@ -519,7 +519,8 @@ int bench_epoll_wait(int argc, const char **argv)
qsort(worker, nthreads, sizeof(struct worker), cmpworker);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ unsigned long t = bench__runtime.tv_sec > 0 ?
+ worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
@@ -539,4 +540,4 @@ int bench_epoll_wait(int argc, const char **argv)
errmem:
err(EXIT_FAILURE, "calloc");
}
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
new file mode 100644
index 000000000000..73b5bcc5946a
--- /dev/null
+++ b/tools/perf/bench/find-bit-bench.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark find_next_bit and related bit operations.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int outer_iterations = 5;
+static unsigned int inner_iterations = 100000;
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
+ "Number of outer iterations used"),
+ OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
+ "Number of inner iterations used"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench mem find_bit <options>",
+ NULL
+};
+
+static unsigned int accumulator;
+static unsigned int use_of_val;
+
+static noinline void workload(int val)
+{
+ use_of_val += val;
+ accumulator++;
+}
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
+static bool asm_test_bit(long nr, const unsigned long *addr)
+{
+ bool oldbit;
+
+ asm volatile("bt %2,%1"
+ : "=@ccc" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+#else
+#define asm_test_bit test_bit
+#endif
+
+static int do_for_each_set_bit(unsigned int num_bits)
+{
+ unsigned long *to_test = bitmap_alloc(num_bits);
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ struct stats fb_time_stats, tb_time_stats;
+ double time_average, time_stddev;
+ unsigned int bit, i, j;
+ unsigned int set_bits, skip;
+ unsigned int old;
+
+ init_stats(&fb_time_stats);
+ init_stats(&tb_time_stats);
+
+ for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
+ bitmap_zero(to_test, num_bits);
+ skip = num_bits / set_bits;
+ for (i = 0; i < num_bits; i += skip)
+ set_bit(i, to_test);
+
+ for (i = 0; i < outer_iterations; i++) {
+ old = accumulator;
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for_each_set_bit(bit, to_test, num_bits)
+ workload(bit);
+ }
+ gettimeofday(&end, NULL);
+ assert(old + (inner_iterations * set_bits) == accumulator);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&fb_time_stats, runtime_us);
+
+ old = accumulator;
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for (bit = 0; bit < num_bits; bit++) {
+ if (asm_test_bit(bit, to_test))
+ workload(bit);
+ }
+ }
+ gettimeofday(&end, NULL);
+ assert(old + (inner_iterations * set_bits) == accumulator);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&tb_time_stats, runtime_us);
+ }
+
+ printf("%d operations %d bits set of %d bits\n",
+ inner_iterations, set_bits, num_bits);
+ time_average = avg_stats(&fb_time_stats);
+ time_stddev = stddev_stats(&fb_time_stats);
+ printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+ time_average = avg_stats(&tb_time_stats);
+ time_stddev = stddev_stats(&tb_time_stats);
+ printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ if (use_of_val == accumulator) /* Try to avoid compiler tricks. */
+ printf("\n");
+ }
+ bitmap_free(to_test);
+ return 0;
+}
+
+int bench_mem_find_bit(int argc, const char **argv)
+{
+ int err = 0, i;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 1; i <= 2048; i <<= 1)
+ do_for_each_set_bit(i);
+
+ return err;
+}
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 65eebe06c04d..915bf3da7ce2 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -205,7 +205,8 @@ int bench_futex_hash(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ unsigned long t = bench__runtime.tv_sec > 0 ?
+ worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
if (!silent) {
if (nfutexes == 1)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 89fd8f325f38..bb25d8beb3b8 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -211,7 +211,8 @@ int bench_futex_lock_pi(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ unsigned long t = bench__runtime.tv_sec > 0 ?
+ worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
if (!silent)
diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c
new file mode 100644
index 000000000000..280227e3ffd7
--- /dev/null
+++ b/tools/perf/bench/inject-buildid.c
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <stddef.h>
+#include <ftw.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <internal/lib.h>
+#include <subcmd/parse-options.h>
+
+#include "bench.h"
+#include "util/data.h"
+#include "util/stat.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/symbol.h"
+#include "util/session.h"
+#include "util/build-id.h"
+#include "util/synthetic-events.h"
+
+#define MMAP_DEV_MAJOR 8
+#define DSO_MMAP_RATIO 4
+
+static unsigned int iterations = 100;
+static unsigned int nr_mmaps = 100;
+static unsigned int nr_samples = 100; /* samples per mmap */
+
+static u64 bench_sample_type;
+static u16 bench_id_hdr_size;
+
+struct bench_data {
+ int pid;
+ int input_pipe[2];
+ int output_pipe[2];
+ pthread_t th;
+};
+
+struct bench_dso {
+ struct list_head list;
+ char *name;
+ int ino;
+};
+
+static int nr_dsos;
+static struct bench_dso *dsos;
+
+extern int cmd_inject(int argc, const char *argv[]);
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average (default: 100)"),
+ OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps,
+ "Number of mmap events for each iteration (default: 100)"),
+ OPT_UINTEGER('n', "nr-samples", &nr_samples,
+ "Number of sample events per mmap event (default: 100)"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show iteration count, DSO name, etc)"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals inject-build-id <options>",
+ NULL
+};
+
+/*
+ * Helper for collect_dso that adds the given file as a dso to dso_list
+ * if it contains a build-id. Stops after collecting 4 times more than
+ * we need (for MMAP2 events).
+ */
+static int add_dso(const char *fpath, const struct stat *sb __maybe_unused,
+ int typeflag, struct FTW *ftwbuf __maybe_unused)
+{
+ struct bench_dso *dso = &dsos[nr_dsos];
+ struct build_id bid;
+
+ if (typeflag == FTW_D || typeflag == FTW_SL)
+ return 0;
+
+ if (filename__read_build_id(fpath, &bid) < 0)
+ return 0;
+
+ dso->name = realpath(fpath, NULL);
+ if (dso->name == NULL)
+ return -1;
+
+ dso->ino = nr_dsos++;
+ pr_debug2(" Adding DSO: %s\n", fpath);
+
+ /* stop if we collected enough DSOs */
+ if ((unsigned int)nr_dsos == DSO_MMAP_RATIO * nr_mmaps)
+ return 1;
+
+ return 0;
+}
+
+static void collect_dso(void)
+{
+ dsos = calloc(nr_mmaps * DSO_MMAP_RATIO, sizeof(*dsos));
+ if (dsos == NULL) {
+ printf(" Memory allocation failed\n");
+ exit(1);
+ }
+
+ if (nftw("/usr/lib/", add_dso, 10, FTW_PHYS) < 0)
+ return;
+
+ pr_debug(" Collected %d DSOs\n", nr_dsos);
+}
+
+static void release_dso(void)
+{
+ int i;
+
+ for (i = 0; i < nr_dsos; i++) {
+ struct bench_dso *dso = &dsos[i];
+
+ free(dso->name);
+ }
+ free(dsos);
+}
+
+/* Fake address used by mmap and sample events */
+static u64 dso_map_addr(struct bench_dso *dso)
+{
+ return 0x400000ULL + dso->ino * 8192ULL;
+}
+
+static u32 synthesize_attr(struct bench_data *data)
+{
+ union perf_event event;
+
+ memset(&event, 0, sizeof(event.attr) + sizeof(u64));
+
+ event.header.type = PERF_RECORD_HEADER_ATTR;
+ event.header.size = sizeof(event.attr) + sizeof(u64);
+
+ event.attr.attr.type = PERF_TYPE_SOFTWARE;
+ event.attr.attr.config = PERF_COUNT_SW_TASK_CLOCK;
+ event.attr.attr.exclude_kernel = 1;
+ event.attr.attr.sample_id_all = 1;
+ event.attr.attr.sample_type = bench_sample_type;
+
+ return writen(data->input_pipe[1], &event, event.header.size);
+}
+
+static u32 synthesize_fork(struct bench_data *data)
+{
+ union perf_event event;
+
+ memset(&event, 0, sizeof(event.fork) + bench_id_hdr_size);
+
+ event.header.type = PERF_RECORD_FORK;
+ event.header.misc = PERF_RECORD_MISC_FORK_EXEC;
+ event.header.size = sizeof(event.fork) + bench_id_hdr_size;
+
+ event.fork.ppid = 1;
+ event.fork.ptid = 1;
+ event.fork.pid = data->pid;
+ event.fork.tid = data->pid;
+
+ return writen(data->input_pipe[1], &event, event.header.size);
+}
+
+static u32 synthesize_mmap(struct bench_data *data, struct bench_dso *dso,
+ u64 timestamp)
+{
+ union perf_event event;
+ size_t len = offsetof(struct perf_record_mmap2, filename);
+ u64 *id_hdr_ptr = (void *)&event;
+ int ts_idx;
+
+ len += roundup(strlen(dso->name) + 1, 8) + bench_id_hdr_size;
+
+ memset(&event, 0, min(len, sizeof(event.mmap2)));
+
+ event.header.type = PERF_RECORD_MMAP2;
+ event.header.misc = PERF_RECORD_MISC_USER;
+ event.header.size = len;
+
+ event.mmap2.pid = data->pid;
+ event.mmap2.tid = data->pid;
+ event.mmap2.maj = MMAP_DEV_MAJOR;
+ event.mmap2.ino = dso->ino;
+
+ strcpy(event.mmap2.filename, dso->name);
+
+ event.mmap2.start = dso_map_addr(dso);
+ event.mmap2.len = 4096;
+ event.mmap2.prot = PROT_EXEC;
+
+ if (len > sizeof(event.mmap2)) {
+ /* write mmap2 event first */
+ writen(data->input_pipe[1], &event, len - bench_id_hdr_size);
+ /* zero-fill sample id header */
+ memset(id_hdr_ptr, 0, bench_id_hdr_size);
+ /* put timestamp in the right position */
+ ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2;
+ id_hdr_ptr[ts_idx] = timestamp;
+ writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size);
+ } else {
+ ts_idx = (len / sizeof(u64)) - 2;
+ id_hdr_ptr[ts_idx] = timestamp;
+ writen(data->input_pipe[1], &event, len);
+ }
+ return len;
+}
+
+static u32 synthesize_sample(struct bench_data *data, struct bench_dso *dso,
+ u64 timestamp)
+{
+ union perf_event event;
+ struct perf_sample sample = {
+ .tid = data->pid,
+ .pid = data->pid,
+ .ip = dso_map_addr(dso),
+ .time = timestamp,
+ };
+
+ event.header.type = PERF_RECORD_SAMPLE;
+ event.header.misc = PERF_RECORD_MISC_USER;
+ event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0);
+
+ perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample);
+
+ return writen(data->input_pipe[1], &event, event.header.size);
+}
+
+static u32 synthesize_flush(struct bench_data *data)
+{
+ struct perf_event_header header = {
+ .size = sizeof(header),
+ .type = PERF_RECORD_FINISHED_ROUND,
+ };
+
+ return writen(data->input_pipe[1], &header, header.size);
+}
+
+static void *data_reader(void *arg)
+{
+ struct bench_data *data = arg;
+ char buf[8192];
+ int flag;
+ int n;
+
+ flag = fcntl(data->output_pipe[0], F_GETFL);
+ fcntl(data->output_pipe[0], F_SETFL, flag | O_NONBLOCK);
+
+ /* read out data from child */
+ while (true) {
+ n = read(data->output_pipe[0], buf, sizeof(buf));
+ if (n > 0)
+ continue;
+ if (n == 0)
+ break;
+
+ if (errno != EINTR && errno != EAGAIN)
+ break;
+
+ usleep(100);
+ }
+
+ close(data->output_pipe[0]);
+ return NULL;
+}
+
+static int setup_injection(struct bench_data *data, bool build_id_all)
+{
+ int ready_pipe[2];
+ int dev_null_fd;
+ char buf;
+
+ if (pipe(ready_pipe) < 0)
+ return -1;
+
+ if (pipe(data->input_pipe) < 0)
+ return -1;
+
+ if (pipe(data->output_pipe) < 0)
+ return -1;
+
+ data->pid = fork();
+ if (data->pid < 0)
+ return -1;
+
+ if (data->pid == 0) {
+ const char **inject_argv;
+ int inject_argc = 2;
+
+ close(data->input_pipe[1]);
+ close(data->output_pipe[0]);
+ close(ready_pipe[0]);
+
+ dup2(data->input_pipe[0], STDIN_FILENO);
+ close(data->input_pipe[0]);
+ dup2(data->output_pipe[1], STDOUT_FILENO);
+ close(data->output_pipe[1]);
+
+ dev_null_fd = open("/dev/null", O_WRONLY);
+ if (dev_null_fd < 0)
+ exit(1);
+
+ dup2(dev_null_fd, STDERR_FILENO);
+
+ if (build_id_all)
+ inject_argc++;
+
+ inject_argv = calloc(inject_argc + 1, sizeof(*inject_argv));
+ if (inject_argv == NULL)
+ exit(1);
+
+ inject_argv[0] = strdup("inject");
+ inject_argv[1] = strdup("-b");
+ if (build_id_all)
+ inject_argv[2] = strdup("--buildid-all");
+
+ /* signal that we're ready to go */
+ close(ready_pipe[1]);
+
+ cmd_inject(inject_argc, inject_argv);
+
+ exit(0);
+ }
+
+ pthread_create(&data->th, NULL, data_reader, data);
+
+ close(ready_pipe[1]);
+ close(data->input_pipe[0]);
+ close(data->output_pipe[1]);
+
+ /* wait for child ready */
+ if (read(ready_pipe[0], &buf, 1) < 0)
+ return -1;
+ close(ready_pipe[0]);
+
+ return 0;
+}
+
+static int inject_build_id(struct bench_data *data, u64 *max_rss)
+{
+ int status;
+ unsigned int i, k;
+ struct rusage rusage;
+ u64 len = 0;
+
+ /* this makes the child to run */
+ if (perf_header__write_pipe(data->input_pipe[1]) < 0)
+ return -1;
+
+ len += synthesize_attr(data);
+ len += synthesize_fork(data);
+
+ for (i = 0; i < nr_mmaps; i++) {
+ int idx = rand() % (nr_dsos - 1);
+ struct bench_dso *dso = &dsos[idx];
+ u64 timestamp = rand() % 1000000;
+
+ pr_debug2(" [%d] injecting: %s\n", i+1, dso->name);
+ len += synthesize_mmap(data, dso, timestamp);
+
+ for (k = 0; k < nr_samples; k++)
+ len += synthesize_sample(data, dso, timestamp + k * 1000);
+
+ if ((i + 1) % 10 == 0)
+ len += synthesize_flush(data);
+ }
+
+ /* tihs makes the child to finish */
+ close(data->input_pipe[1]);
+
+ wait4(data->pid, &status, 0, &rusage);
+ *max_rss = rusage.ru_maxrss;
+
+ pr_debug(" Child %d exited with %d\n", data->pid, status);
+
+ return 0;
+}
+
+static void do_inject_loop(struct bench_data *data, bool build_id_all)
+{
+ unsigned int i;
+ struct stats time_stats, mem_stats;
+ double time_average, time_stddev;
+ double mem_average, mem_stddev;
+
+ init_stats(&time_stats);
+ init_stats(&mem_stats);
+
+ pr_debug(" Build-id%s injection benchmark\n", build_id_all ? "-all" : "");
+
+ for (i = 0; i < iterations; i++) {
+ struct timeval start, end, diff;
+ u64 runtime_us, max_rss;
+
+ pr_debug(" Iteration #%d\n", i+1);
+
+ if (setup_injection(data, build_id_all) < 0) {
+ printf(" Build-id injection setup failed\n");
+ break;
+ }
+
+ gettimeofday(&start, NULL);
+ if (inject_build_id(data, &max_rss) < 0) {
+ printf(" Build-id injection failed\n");
+ break;
+ }
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ update_stats(&mem_stats, max_rss);
+
+ pthread_join(data->th, NULL);
+ }
+
+ time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
+ time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
+ printf(" Average build-id%s injection took: %.3f msec (+- %.3f msec)\n",
+ build_id_all ? "-all" : "", time_average, time_stddev);
+
+ /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */
+ time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
+ time_stddev = stddev_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
+ printf(" Average time per event: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ mem_average = avg_stats(&mem_stats);
+ mem_stddev = stddev_stats(&mem_stats);
+ printf(" Average memory usage: %.0f KB (+- %.0f KB)\n",
+ mem_average, mem_stddev);
+}
+
+static int do_inject_loops(struct bench_data *data)
+{
+
+ srand(time(NULL));
+ symbol__init(NULL);
+
+ bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP;
+ bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME;
+ bench_id_hdr_size = 32;
+
+ collect_dso();
+ if (nr_dsos == 0) {
+ printf(" Cannot collect DSOs for injection\n");
+ return -1;
+ }
+
+ do_inject_loop(data, false);
+ do_inject_loop(data, true);
+
+ release_dso();
+ return 0;
+}
+
+int bench_inject_build_id(int argc, const char **argv)
+{
+ struct bench_data data;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ return do_inject_loops(&data);
+}
+
diff --git a/tools/perf/bench/kallsyms-parse.c b/tools/perf/bench/kallsyms-parse.c
new file mode 100644
index 000000000000..2b0d0f980ae9
--- /dev/null
+++ b/tools/perf/bench/kallsyms-parse.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark of /proc/kallsyms parsing.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+#include <symbol/kallsyms.h>
+
+static unsigned int iterations = 100;
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals kallsyms-parse <options>",
+ NULL
+};
+
+static int bench_process_symbol(void *arg __maybe_unused,
+ const char *name __maybe_unused,
+ char type __maybe_unused,
+ u64 start __maybe_unused)
+{
+ return 0;
+}
+
+static int do_kallsyms_parse(void)
+{
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double time_average, time_stddev;
+ int err;
+ struct stats time_stats;
+
+ init_stats(&time_stats);
+
+ for (i = 0; i < iterations; i++) {
+ gettimeofday(&start, NULL);
+ err = kallsyms__parse("/proc/kallsyms", NULL,
+ bench_process_symbol);
+ if (err)
+ return err;
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ }
+
+ time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
+ time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
+ printf(" Average kallsyms__parse took: %.3f ms (+- %.3f ms)\n",
+ time_average, time_stddev);
+ return 0;
+}
+
+int bench_kallsyms_parse(int argc, const char **argv)
+{
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ return do_kallsyms_parse();
+}
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 9235b76501be..19d45c377ac1 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 0;
}
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
- memcpy_t fn = r->fn.memcpy;
- int i;
-
/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
memset(src, 0, size);
@@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
* to not measure page fault overhead:
*/
fn(dst, src, size);
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+{
+ u64 cycle_start = 0ULL, cycle_end = 0ULL;
+ memcpy_t fn = r->fn.memcpy;
+ int i;
+
+ memcpy_prefault(fn, size, src, dst);
cycle_start = get_cycles();
for (i = 0; i < nr_loops; ++i)
@@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
memcpy_t fn = r->fn.memcpy;
int i;
- /*
- * We prefault the freshly allocated memory range here,
- * to not measure page fault overhead:
- */
- fn(dst, src, size);
+ memcpy_prefault(fn, size, src, dst);
BUG_ON(gettimeofday(&tv_start, NULL));
for (i = 0; i < nr_loops; ++i)
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
deleted file mode 100644
index 4130734dde84..000000000000
--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
- * happy.
- */
-#include <linux/types.h>
-
-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++, from++) {
- /*
- * Call the assembly routine back directly since
- * memcpy_mcsafe() may silently fallback to memcpy.
- */
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
- if (rem)
- break;
- }
- return len;
-}
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5797253b9700..11726ec6285f 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -137,12 +137,13 @@ struct global_info {
u8 *data;
pthread_mutex_t startup_mutex;
+ pthread_cond_t startup_cond;
int nr_tasks_started;
- pthread_mutex_t startup_done_mutex;
-
pthread_mutex_t start_work_mutex;
+ pthread_cond_t start_work_cond;
int nr_tasks_working;
+ bool start_work;
pthread_mutex_t stop_work_mutex;
u64 bytes_done;
@@ -247,17 +248,22 @@ static int is_node_present(int node)
*/
static bool node_has_cpus(int node)
{
- struct bitmask *cpu = numa_allocate_cpumask();
- unsigned int i;
+ struct bitmask *cpumask = numa_allocate_cpumask();
+ bool ret = false; /* fall back to nocpus */
+ int cpu;
- if (cpu && !numa_node_to_cpus(node, cpu)) {
- for (i = 0; i < cpu->size; i++) {
- if (numa_bitmask_isbitset(cpu, i))
- return true;
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu)) {
+ ret = true;
+ break;
+ }
}
}
+ numa_free_cpumask(cpumask);
- return false; /* lets fall back to nocpus safely */
+ return ret;
}
static cpu_set_t bind_to_cpu(int target_cpu)
@@ -288,14 +294,10 @@ static cpu_set_t bind_to_cpu(int target_cpu)
static cpu_set_t bind_to_node(int target_node)
{
- int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
cpu_set_t orig_mask, mask;
int cpu;
int ret;
- BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
- BUG_ON(!cpus_per_node);
-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
@@ -305,13 +307,16 @@ static cpu_set_t bind_to_node(int target_node)
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
} else {
- int cpu_start = (target_node + 0) * cpus_per_node;
- int cpu_stop = (target_node + 1) * cpus_per_node;
-
- BUG_ON(cpu_stop > g->p.nr_cpus);
+ struct bitmask *cpumask = numa_allocate_cpumask();
- for (cpu = cpu_start; cpu < cpu_stop; cpu++)
- CPU_SET(cpu, &mask);
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(target_node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu))
+ CPU_SET(cpu, &mask);
+ }
+ }
+ numa_free_cpumask(cpumask);
}
ret = sched_setaffinity(0, sizeof(mask), &mask);
@@ -479,6 +484,18 @@ static void init_global_mutex(pthread_mutex_t *mutex)
pthread_mutex_init(mutex, &attr);
}
+/*
+ * Return a process-shared (global) condition variable:
+ */
+static void init_global_cond(pthread_cond_t *cond)
+{
+ pthread_condattr_t attr;
+
+ pthread_condattr_init(&attr);
+ pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+ pthread_cond_init(cond, &attr);
+}
+
static int parse_cpu_list(const char *arg)
{
p0.cpu_list_str = strdup(arg);
@@ -729,8 +746,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
return -1;
return parse_node_list(arg);
-
- return 0;
}
#define BIT(x) (1ul << x)
@@ -813,12 +828,12 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
}
}
} else if (!g->p.data_backwards || (nr + loop) & 1) {
+ /* Process data forwards: */
d0 = data + off;
d = data + off + 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d >= d1))
d = data;
@@ -836,7 +851,6 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
d = data + off - 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d < data))
d = data + words-1;
@@ -1135,15 +1149,18 @@ static void *worker_thread(void *__tdata)
if (g->p.serialize_startup) {
pthread_mutex_lock(&g->startup_mutex);
g->nr_tasks_started++;
+ /* The last thread wakes the main process. */
+ if (g->nr_tasks_started == g->p.nr_tasks)
+ pthread_cond_signal(&g->startup_cond);
+
pthread_mutex_unlock(&g->startup_mutex);
/* Here we will wait for the main process to start us all at once: */
pthread_mutex_lock(&g->start_work_mutex);
+ g->start_work = false;
g->nr_tasks_working++;
-
- /* Last one wake the main process: */
- if (g->nr_tasks_working == g->p.nr_tasks)
- pthread_mutex_unlock(&g->startup_done_mutex);
+ while (!g->start_work)
+ pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex);
pthread_mutex_unlock(&g->start_work_mutex);
}
@@ -1440,8 +1457,9 @@ static int init(void)
/* Startup serialization: */
init_global_mutex(&g->start_work_mutex);
+ init_global_cond(&g->start_work_cond);
init_global_mutex(&g->startup_mutex);
- init_global_mutex(&g->startup_done_mutex);
+ init_global_cond(&g->startup_cond);
init_global_mutex(&g->stop_work_mutex);
init_thread_data();
@@ -1501,9 +1519,6 @@ static int __bench_numa(const char *name)
pids = zalloc(g->p.nr_proc * sizeof(*pids));
pid = -1;
- /* All threads try to acquire it, this way we can wait for them to start up: */
- pthread_mutex_lock(&g->start_work_mutex);
-
if (g->p.serialize_startup) {
tprintf(" #\n");
tprintf(" # Startup synchronization: ..."); fflush(stdout);
@@ -1525,22 +1540,29 @@ static int __bench_numa(const char *name)
pids[i] = pid;
}
- /* Wait for all the threads to start up: */
- while (g->nr_tasks_started != g->p.nr_tasks)
- usleep(USEC_PER_MSEC);
-
- BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
if (g->p.serialize_startup) {
+ bool threads_ready = false;
double startup_sec;
- pthread_mutex_lock(&g->startup_done_mutex);
+ /*
+ * Wait for all the threads to start up. The last thread will
+ * signal this process.
+ */
+ pthread_mutex_lock(&g->startup_mutex);
+ while (g->nr_tasks_started != g->p.nr_tasks)
+ pthread_cond_wait(&g->startup_cond, &g->startup_mutex);
- /* This will start all threads: */
- pthread_mutex_unlock(&g->start_work_mutex);
+ pthread_mutex_unlock(&g->startup_mutex);
- /* This mutex is locked - the last started thread will wake us: */
- pthread_mutex_lock(&g->startup_done_mutex);
+ /* Wait for all threads to be at the start_work_cond. */
+ while (!threads_ready) {
+ pthread_mutex_lock(&g->start_work_mutex);
+ threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
+ pthread_mutex_unlock(&g->start_work_mutex);
+ if (!threads_ready)
+ usleep(1);
+ }
gettimeofday(&stop, NULL);
@@ -1554,7 +1576,11 @@ static int __bench_numa(const char *name)
tprintf(" #\n");
start = stop;
- pthread_mutex_unlock(&g->startup_done_mutex);
+ /* Start all threads running. */
+ pthread_mutex_lock(&g->start_work_mutex);
+ g->start_work = true;
+ pthread_mutex_unlock(&g->start_work_mutex);
+ pthread_cond_broadcast(&g->start_work_cond);
} else {
gettimeofday(&start, NULL);
}
@@ -1733,12 +1759,12 @@ err:
*/
static const char *tests[][MAX_ARGS] = {
/* Basic single-stream NUMA bandwidth measurements: */
- { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM },
{ "RAM-bw-local-NOTHP,",
"mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
- { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "1", OPT_BW_RAM },
/* 2-stream NUMA bandwidth measurements: */
@@ -1755,7 +1781,7 @@ static const char *tests[][MAX_ARGS] = {
{ " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
{ " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
{ " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
- { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV },
{ " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
{ " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
{ " 4x4-convergence-NOTHP,",
@@ -1780,24 +1806,24 @@ static const char *tests[][MAX_ARGS] = {
"mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
{ "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
- { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
- { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
- { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
- { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
+ { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
+ { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
+ { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
- { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
- { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
- { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
- { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
- { " 4x8-bw-thread-NOTHP,",
+ { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
+ { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
+ { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
+ { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
+ { " 4x8-bw-process-NOTHP,",
"mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
- { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
- { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
+ { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
+ { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
- { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
- { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
+ { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
+ { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
- { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
{ "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
{ "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
{ "numa01-bw-thread-NOTHP,",
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 97e4a4fb3362..cecce93ccc63 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -40,7 +40,7 @@ struct sender_context {
unsigned int num_fds;
int ready_out;
int wakefd;
- int out_fds[0];
+ int out_fds[];
};
struct receiver_context {
@@ -66,11 +66,10 @@ static void fdpair(int fds[2])
/* Block until we're ready to go */
static void ready(int ready_out, int wakefd)
{
- char dummy;
struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
/* Tell them we're ready. */
- if (write(ready_out, &dummy, 1) != 1)
+ if (write(ready_out, "R", 1) != 1)
err(EXIT_FAILURE, "CLIENT: ready write");
/* Wait for "GO" signal */
@@ -85,6 +84,7 @@ static void *sender(struct sender_context *ctx)
unsigned int i, j;
ready(ctx->ready_out, ctx->wakefd);
+ memset(data, 'S', sizeof(data));
/* Now pump to every receiver. */
for (i = 0; i < nr_loops; i++) {
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
new file mode 100644
index 000000000000..b2924e3181dc
--- /dev/null
+++ b/tools/perf/bench/synthesize.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark synthesis of perf events such as at the start of a 'perf
+ * record'. Synthesis is done on the current process and the 'dummy' event
+ * handlers are invoked that support dump_trace but otherwise do nothing.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#include <stdio.h>
+#include "bench.h"
+#include "../util/debug.h"
+#include "../util/session.h"
+#include "../util/stat.h"
+#include "../util/synthetic-events.h"
+#include "../util/target.h"
+#include "../util/thread_map.h"
+#include "../util/tool.h"
+#include "../util/util.h"
+#include <linux/atomic.h>
+#include <linux/err.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int min_threads = 1;
+static unsigned int max_threads = UINT_MAX;
+static unsigned int single_iterations = 10000;
+static unsigned int multi_iterations = 10;
+static bool run_st;
+static bool run_mt;
+
+static const struct option options[] = {
+ OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
+ OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
+ OPT_UINTEGER('m', "min-threads", &min_threads,
+ "Minimum number of threads in multithreaded bench"),
+ OPT_UINTEGER('M', "max-threads", &max_threads,
+ "Maximum number of threads in multithreaded bench"),
+ OPT_UINTEGER('i', "single-iterations", &single_iterations,
+ "Number of iterations used to compute single-threaded average"),
+ OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
+ "Number of iterations used to compute multi-threaded average"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals synthesize <options>",
+ NULL
+};
+
+static atomic_t event_count;
+
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ atomic_inc(&event_count);
+ return 0;
+}
+
+static int do_run_single_threaded(struct perf_session *session,
+ struct perf_thread_map *threads,
+ struct target *target, bool data_mmap)
+{
+ const unsigned int nr_threads_synthesize = 1;
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double time_average, time_stddev, event_average, event_stddev;
+ int err;
+ struct stats time_stats, event_stats;
+
+ init_stats(&time_stats);
+ init_stats(&event_stats);
+
+ for (i = 0; i < single_iterations; i++) {
+ atomic_set(&event_count, 0);
+ gettimeofday(&start, NULL);
+ err = __machine__synthesize_threads(&session->machines.host,
+ NULL,
+ target, threads,
+ process_synthesized_event,
+ data_mmap,
+ nr_threads_synthesize);
+ if (err)
+ return err;
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ update_stats(&event_stats, atomic_read(&event_count));
+ }
+
+ time_average = avg_stats(&time_stats);
+ time_stddev = stddev_stats(&time_stats);
+ printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
+ data_mmap ? "data " : "", time_average, time_stddev);
+
+ event_average = avg_stats(&event_stats);
+ event_stddev = stddev_stats(&event_stats);
+ printf(" Average num. events: %.3f (+- %.3f)\n",
+ event_average, event_stddev);
+
+ printf(" Average time per event %.3f usec\n",
+ time_average / event_average);
+ return 0;
+}
+
+static int run_single_threaded(void)
+{
+ struct perf_session *session;
+ struct target target = {
+ .pid = "self",
+ };
+ struct perf_thread_map *threads;
+ int err;
+
+ perf_set_singlethreaded();
+ session = perf_session__new(NULL, false, NULL);
+ if (IS_ERR(session)) {
+ pr_err("Session creation failed.\n");
+ return PTR_ERR(session);
+ }
+ threads = thread_map__new_by_pid(getpid());
+ if (!threads) {
+ pr_err("Thread map creation failed.\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ puts(
+"Computing performance of single threaded perf event synthesis by\n"
+"synthesizing events on the perf process itself:");
+
+ err = do_run_single_threaded(session, threads, &target, false);
+ if (err)
+ goto err_out;
+
+ err = do_run_single_threaded(session, threads, &target, true);
+
+err_out:
+ if (threads)
+ perf_thread_map__put(threads);
+
+ perf_session__delete(session);
+ return err;
+}
+
+static int do_run_multi_threaded(struct target *target,
+ unsigned int nr_threads_synthesize)
+{
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double time_average, time_stddev, event_average, event_stddev;
+ int err;
+ struct stats time_stats, event_stats;
+ struct perf_session *session;
+
+ init_stats(&time_stats);
+ init_stats(&event_stats);
+ for (i = 0; i < multi_iterations; i++) {
+ session = perf_session__new(NULL, false, NULL);
+ if (IS_ERR(session))
+ return PTR_ERR(session);
+
+ atomic_set(&event_count, 0);
+ gettimeofday(&start, NULL);
+ err = __machine__synthesize_threads(&session->machines.host,
+ NULL,
+ target, NULL,
+ process_synthesized_event,
+ false,
+ nr_threads_synthesize);
+ if (err) {
+ perf_session__delete(session);
+ return err;
+ }
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ update_stats(&event_stats, atomic_read(&event_count));
+ perf_session__delete(session);
+ }
+
+ time_average = avg_stats(&time_stats);
+ time_stddev = stddev_stats(&time_stats);
+ printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ event_average = avg_stats(&event_stats);
+ event_stddev = stddev_stats(&event_stats);
+ printf(" Average num. events: %.3f (+- %.3f)\n",
+ event_average, event_stddev);
+
+ printf(" Average time per event %.3f usec\n",
+ time_average / event_average);
+ return 0;
+}
+
+static int run_multi_threaded(void)
+{
+ struct target target = {
+ .cpu_list = "0"
+ };
+ unsigned int nr_threads_synthesize;
+ int err;
+
+ if (max_threads == UINT_MAX)
+ max_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+ puts(
+"Computing performance of multi threaded perf event synthesis by\n"
+"synthesizing events on CPU 0:");
+
+ for (nr_threads_synthesize = min_threads;
+ nr_threads_synthesize <= max_threads;
+ nr_threads_synthesize++) {
+ if (nr_threads_synthesize == 1)
+ perf_set_singlethreaded();
+ else
+ perf_set_multithreaded();
+
+ printf(" Number of synthesis threads: %u\n",
+ nr_threads_synthesize);
+
+ err = do_run_multi_threaded(&target, nr_threads_synthesize);
+ if (err)
+ return err;
+ }
+ perf_set_singlethreaded();
+ return 0;
+}
+
+int bench_synthesize(int argc, const char **argv)
+{
+ int err = 0;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * If neither single threaded or multi-threaded are specified, default
+ * to running just single threaded.
+ */
+ if (!run_st && !run_mt)
+ run_st = true;
+
+ if (run_st)
+ err = run_single_threaded();
+
+ if (!err && run_mt)
+ err = run_multi_threaded();
+
+ return err;
+}
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
new file mode 100644
index 000000000000..5fe621cff8e9
--- /dev/null
+++ b/tools/perf/bench/syscall.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * syscall.c
+ *
+ * syscall: Benchmark for system call performance
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#define LOOPS_DEFAULT 10000000
+static int loops = LOOPS_DEFAULT;
+
+static const struct option options[] = {
+ OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
+ OPT_END()
+};
+
+static const char * const bench_syscall_usage[] = {
+ "perf bench syscall <options>",
+ NULL
+};
+
+int bench_syscall_basic(int argc, const char **argv)
+{
+ struct timeval start, stop, diff;
+ unsigned long long result_usec = 0;
+ int i;
+
+ argc = parse_options(argc, argv, options, bench_syscall_usage, 0);
+
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < loops; i++)
+ getppid();
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Executed %'d getppid() calls\n", loops);
+
+ result_usec = diff.tv_sec * 1000000;
+ result_usec += diff.tv_usec;
+
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec/1000));
+
+ printf(" %14lf usecs/op\n",
+ (double)result_usec / (double)loops);
+ printf(" %'14d ops/sec\n",
+ (int)((double)loops /
+ ((double)result_usec / (double)1000000)));
+ break;
+
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec / 1000));
+ break;
+
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6c0a0412502e..4940d10074c3 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -212,11 +212,9 @@ static bool has_annotation(struct perf_annotate *ann)
return ui__has_annotation() || ann->use_stdio2;
}
-static int perf_evsel__add_sample(struct evsel *evsel,
- struct perf_sample *sample,
- struct addr_location *al,
- struct perf_annotate *ann,
- struct machine *machine)
+static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample,
+ struct addr_location *al, struct perf_annotate *ann,
+ struct machine *machine)
{
struct hists *hists = evsel__hists(evsel);
struct hist_entry *he;
@@ -278,7 +276,7 @@ static int process_sample_event(struct perf_tool *tool,
goto out_put;
if (!al.filtered &&
- perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
+ evsel__add_sample(evsel, sample, &al, ann, machine)) {
pr_warning("problem incrementing symbol count, "
"skipping event\n");
ret = -1;
@@ -433,11 +431,10 @@ static int __cmd_annotate(struct perf_annotate *ann)
total_nr_samples += nr_samples;
hists__collapse_resort(hists, NULL);
/* Don't sort callchain */
- perf_evsel__reset_sample_bit(pos, CALLCHAIN);
- perf_evsel__output_resort(pos, NULL);
+ evsel__reset_sample_bit(pos, CALLCHAIN);
+ evsel__output_resort(pos, NULL);
- if (symbol_conf.event_group &&
- !perf_evsel__is_group_leader(pos))
+ if (symbol_conf.event_group && !evsel__is_group_leader(pos))
continue;
hists__find_annotations(hists, pos, ann);
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index c06fe21c8613..62a7b7420a44 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -11,6 +11,7 @@
* Available benchmark collection list:
*
* sched ... scheduler and IPC performance
+ * syscall ... System call performance
* mem ... memory access performance
* numa ... NUMA scheduling and MM performance
* futex ... Futex performance
@@ -49,9 +50,16 @@ static struct bench sched_benchmarks[] = {
{ NULL, NULL, NULL }
};
+static struct bench syscall_benchmarks[] = {
+ { "basic", "Benchmark for basic getppid(2) calls", bench_syscall_basic },
+ { "all", "Run all syscall benchmarks", NULL },
+ { NULL, NULL, NULL },
+};
+
static struct bench mem_benchmarks[] = {
{ "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy },
{ "memset", "Benchmark for memset() functions", bench_mem_memset },
+ { "find_bit", "Benchmark for find_bit() functions", bench_mem_find_bit },
{ "all", "Run all memory access benchmarks", NULL },
{ NULL, NULL, NULL }
};
@@ -67,14 +75,21 @@ static struct bench futex_benchmarks[] = {
{ NULL, NULL, NULL }
};
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
static struct bench epoll_benchmarks[] = {
{ "wait", "Benchmark epoll concurrent epoll_waits", bench_epoll_wait },
{ "ctl", "Benchmark epoll concurrent epoll_ctls", bench_epoll_ctl },
{ "all", "Run all futex benchmarks", NULL },
{ NULL, NULL, NULL }
};
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
+
+static struct bench internals_benchmarks[] = {
+ { "synthesize", "Benchmark perf event synthesis", bench_synthesize },
+ { "kallsyms-parse", "Benchmark kallsyms parsing", bench_kallsyms_parse },
+ { "inject-build-id", "Benchmark build-id injection", bench_inject_build_id },
+ { NULL, NULL, NULL }
+};
struct collection {
const char *name;
@@ -84,14 +99,16 @@ struct collection {
static struct collection collections[] = {
{ "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
+ { "syscall", "System call benchmarks", syscall_benchmarks },
{ "mem", "Memory access benchmarks", mem_benchmarks },
#ifdef HAVE_LIBNUMA_SUPPORT
{ "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
#endif
{"futex", "Futex stressing benchmarks", futex_benchmarks },
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
{"epoll", "Epoll stressing benchmarks", epoll_benchmarks },
#endif
+ { "internals", "Perf-internals benchmarks", internals_benchmarks },
{ "all", "All benchmarks", NULL },
{ NULL, NULL, NULL }
};
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 39efa51d7fb3..a25411926e48 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -174,19 +174,19 @@ static int build_id_cache__add_kcore(const char *filename, bool force)
static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi)
{
char sbuild_id[SBUILD_ID_SIZE];
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int err;
struct nscookie nsc;
nsinfo__mountns_enter(nsi, &nsc);
- err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ err = filename__read_build_id(filename, &bid);
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
return -1;
}
- build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ build_id__sprintf(&bid, sbuild_id);
err = build_id_cache__add_s(sbuild_id, filename, nsi,
false, false);
pr_debug("Adding %s %s: %s\n", sbuild_id, filename,
@@ -196,21 +196,21 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi)
static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi)
{
- u8 build_id[BUILD_ID_SIZE];
char sbuild_id[SBUILD_ID_SIZE];
+ struct build_id bid;
struct nscookie nsc;
int err;
nsinfo__mountns_enter(nsi, &nsc);
- err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ err = filename__read_build_id(filename, &bid);
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
return -1;
}
- build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ build_id__sprintf(&bid, sbuild_id);
err = build_id_cache__remove_s(sbuild_id);
pr_debug("Removing %s %s: %s\n", sbuild_id, filename,
err ? "FAIL" : "Ok");
@@ -274,17 +274,16 @@ static int build_id_cache__purge_all(void)
static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
{
char filename[PATH_MAX];
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
if (dso__build_id_filename(dso, filename, sizeof(filename), false) &&
- filename__read_build_id(filename, build_id,
- sizeof(build_id)) != sizeof(build_id)) {
+ filename__read_build_id(filename, &bid) == -1) {
if (errno == ENOENT)
return false;
pr_warning("Problems with %s file, consider removing it from the cache\n",
filename);
- } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) {
+ } else if (memcmp(dso->bid.data, bid.data, bid.size)) {
pr_warning("Problems with %s file, consider removing it from the cache\n",
filename);
}
@@ -300,14 +299,14 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f
static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
{
- u8 build_id[BUILD_ID_SIZE];
char sbuild_id[SBUILD_ID_SIZE];
+ struct build_id bid;
struct nscookie nsc;
int err;
nsinfo__mountns_enter(nsi, &nsc);
- err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ err = filename__read_build_id(filename, &bid);
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
@@ -315,7 +314,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
}
err = 0;
- build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ build_id__sprintf(&bid, sbuild_id);
if (build_id_cache__cached(sbuild_id))
err = build_id_cache__remove_s(sbuild_id);
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 246ac0b4d54f..d5bea5d3cd51 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -95,6 +95,7 @@ struct perf_c2c {
bool use_stdio;
bool stats_only;
bool symbol_full;
+ bool stitch_lbr;
/* HITM shared clines stats */
struct c2c_stats hitm_stats;
@@ -273,6 +274,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
+ if (c2c.stitch_lbr)
+ al.thread->lbr_stitch_enable = true;
+
ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
evsel, &al, sysctl_perf_event_max_stack);
if (ret)
@@ -648,45 +652,6 @@ STAT_FN(ld_l2hit)
STAT_FN(ld_llchit)
STAT_FN(rmt_hit)
-static uint64_t llc_miss(struct c2c_stats *stats)
-{
- uint64_t llcmiss;
-
- llcmiss = stats->lcl_dram +
- stats->rmt_dram +
- stats->rmt_hitm +
- stats->rmt_hit;
-
- return llcmiss;
-}
-
-static int
-ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hist_entry *he)
-{
- struct c2c_hist_entry *c2c_he;
- int width = c2c_width(fmt, hpp, he->hists);
-
- c2c_he = container_of(he, struct c2c_hist_entry, he);
-
- return scnprintf(hpp->buf, hpp->size, "%*lu", width,
- llc_miss(&c2c_he->stats));
-}
-
-static int64_t
-ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
- struct hist_entry *left, struct hist_entry *right)
-{
- struct c2c_hist_entry *c2c_left;
- struct c2c_hist_entry *c2c_right;
-
- c2c_left = container_of(left, struct c2c_hist_entry, he);
- c2c_right = container_of(right, struct c2c_hist_entry, he);
-
- return (uint64_t) llc_miss(&c2c_left->stats) -
- (uint64_t) llc_miss(&c2c_right->stats);
-}
-
static uint64_t total_records(struct c2c_stats *stats)
{
uint64_t lclmiss, ldcnt, total;
@@ -1324,7 +1289,7 @@ static struct c2c_dimension dim_iaddr = {
};
static struct c2c_dimension dim_tot_hitm = {
- .header = HEADER_SPAN("----- LLC Load Hitm -----", "Total", 2),
+ .header = HEADER_SPAN("------- Load Hitm -------", "Total", 2),
.name = "tot_hitm",
.cmp = tot_hitm_cmp,
.entry = tot_hitm_entry,
@@ -1332,7 +1297,7 @@ static struct c2c_dimension dim_tot_hitm = {
};
static struct c2c_dimension dim_lcl_hitm = {
- .header = HEADER_SPAN_LOW("Lcl"),
+ .header = HEADER_SPAN_LOW("LclHitm"),
.name = "lcl_hitm",
.cmp = lcl_hitm_cmp,
.entry = lcl_hitm_entry,
@@ -1340,7 +1305,7 @@ static struct c2c_dimension dim_lcl_hitm = {
};
static struct c2c_dimension dim_rmt_hitm = {
- .header = HEADER_SPAN_LOW("Rmt"),
+ .header = HEADER_SPAN_LOW("RmtHitm"),
.name = "rmt_hitm",
.cmp = rmt_hitm_cmp,
.entry = rmt_hitm_entry,
@@ -1363,16 +1328,16 @@ static struct c2c_dimension dim_cl_lcl_hitm = {
.width = 7,
};
-static struct c2c_dimension dim_stores = {
- .header = HEADER_SPAN("---- Store Reference ----", "Total", 2),
- .name = "stores",
+static struct c2c_dimension dim_tot_stores = {
+ .header = HEADER_BOTH("Total", "Stores"),
+ .name = "tot_stores",
.cmp = store_cmp,
.entry = store_entry,
.width = 7,
};
static struct c2c_dimension dim_stores_l1hit = {
- .header = HEADER_SPAN_LOW("L1Hit"),
+ .header = HEADER_SPAN("---- Stores ----", "L1Hit", 1),
.name = "stores_l1hit",
.cmp = st_l1hit_cmp,
.entry = st_l1hit_entry,
@@ -1428,7 +1393,7 @@ static struct c2c_dimension dim_ld_l2hit = {
};
static struct c2c_dimension dim_ld_llchit = {
- .header = HEADER_SPAN("-- LLC Load Hit --", "Llc", 1),
+ .header = HEADER_SPAN("- LLC Load Hit --", "LclHit", 1),
.name = "ld_lclhit",
.cmp = ld_llchit_cmp,
.entry = ld_llchit_entry,
@@ -1436,21 +1401,13 @@ static struct c2c_dimension dim_ld_llchit = {
};
static struct c2c_dimension dim_ld_rmthit = {
- .header = HEADER_SPAN_LOW("Rmt"),
+ .header = HEADER_SPAN("- RMT Load Hit --", "RmtHit", 1),
.name = "ld_rmthit",
.cmp = rmt_hit_cmp,
.entry = rmt_hit_entry,
.width = 8,
};
-static struct c2c_dimension dim_ld_llcmiss = {
- .header = HEADER_BOTH("LLC", "Ld Miss"),
- .name = "ld_llcmiss",
- .cmp = ld_llcmiss_cmp,
- .entry = ld_llcmiss_entry,
- .width = 7,
-};
-
static struct c2c_dimension dim_tot_recs = {
.header = HEADER_BOTH("Total", "records"),
.name = "tot_recs",
@@ -1482,7 +1439,7 @@ static struct c2c_dimension dim_percent_hitm = {
};
static struct c2c_dimension dim_percent_rmt_hitm = {
- .header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
+ .header = HEADER_SPAN("----- HITM -----", "RmtHitm", 1),
.name = "percent_rmt_hitm",
.cmp = percent_rmt_hitm_cmp,
.entry = percent_rmt_hitm_entry,
@@ -1491,7 +1448,7 @@ static struct c2c_dimension dim_percent_rmt_hitm = {
};
static struct c2c_dimension dim_percent_lcl_hitm = {
- .header = HEADER_SPAN_LOW("Lcl"),
+ .header = HEADER_SPAN_LOW("LclHitm"),
.name = "percent_lcl_hitm",
.cmp = percent_lcl_hitm_cmp,
.entry = percent_lcl_hitm_entry,
@@ -1644,7 +1601,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_rmt_hitm,
&dim_cl_lcl_hitm,
&dim_cl_rmt_hitm,
- &dim_stores,
+ &dim_tot_stores,
&dim_stores_l1hit,
&dim_stores_l1miss,
&dim_cl_stores_l1hit,
@@ -1654,7 +1611,6 @@ static struct c2c_dimension *dimensions[] = {
&dim_ld_l2hit,
&dim_ld_llchit,
&dim_ld_rmthit,
- &dim_ld_llcmiss,
&dim_tot_recs,
&dim_tot_loads,
&dim_percent_hitm,
@@ -1705,7 +1661,7 @@ static struct c2c_dimension *get_dimension(const char *name)
if (!strcmp(dim->name, name))
return dim;
- };
+ }
return NULL;
}
@@ -1921,7 +1877,7 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
FILTER_HITM(tot_hitm);
default:
break;
- };
+ }
#undef FILTER_HITM
@@ -2255,8 +2211,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
fprintf(out, "=================================================\n");
evlist__for_each_entry(evlist, evsel) {
- fprintf(out, "%-36s: %s\n", first ? " Events" : "",
- perf_evsel__name(evsel));
+ fprintf(out, "%-36s: %s\n", first ? " Events" : "", evsel__name(evsel));
first = false;
}
fprintf(out, " Cachelines sort on : %s HITMs\n",
@@ -2579,7 +2534,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
static int setup_callchain(struct evlist *evlist)
{
- u64 sample_type = perf_evlist__combined_sample_type(evlist);
+ u64 sample_type = evlist__combined_sample_type(evlist);
enum perf_call_graph_mode mode = CALLCHAIN_NONE;
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -2601,6 +2556,12 @@ static int setup_callchain(struct evlist *evlist)
}
}
+ if (c2c.stitch_lbr && (mode != CALLCHAIN_LBR)) {
+ ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+ "Please apply --call-graph lbr when recording.\n");
+ c2c.stitch_lbr = false;
+ }
+
callchain_param.record_mode = mode;
callchain_param.min_percent = 0;
return 0;
@@ -2752,6 +2713,8 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
"coalesce fields: pid,tid,iaddr,dso"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+ OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPT_PARENT(c2c_options),
OPT_END()
};
@@ -2835,15 +2798,16 @@ static int perf_c2c__report(int argc, const char **argv)
"dcacheline,"
"dcacheline_node,"
"dcacheline_count,"
- "tot_recs,"
"percent_hitm,"
"tot_hitm,lcl_hitm,rmt_hitm,"
- "stores,stores_l1hit,stores_l1miss,"
- "dram_lcl,dram_rmt,"
- "ld_llcmiss,"
+ "tot_recs,"
"tot_loads,"
+ "tot_stores,"
+ "stores_l1hit,stores_l1miss,"
"ld_fbhit,ld_l1hit,ld_l2hit,"
- "ld_lclhit,ld_rmthit",
+ "ld_lclhit,lcl_hitm,"
+ "ld_rmthit,rmt_hitm,"
+ "dram_lcl,dram_rmt",
c2c.display == DISPLAY_TOT ? "tot_hitm" :
c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
);
@@ -2876,8 +2840,15 @@ static int parse_record_events(const struct option *opt,
{
bool *event_set = (bool *) opt->value;
+ if (!strcmp(str, "list")) {
+ perf_mem_events__list();
+ exit(0);
+ }
+ if (perf_mem_events__parse(str))
+ exit(-1);
+
*event_set = true;
- return perf_mem_events__parse(str);
+ return 0;
}
@@ -2898,7 +2869,7 @@ static int perf_c2c__record(int argc, const char **argv)
bool event_set = false;
struct option options[] = {
OPT_CALLBACK('e', "event", &event_set, "event",
- "event selector. Use 'perf mem record -e list' to list available events",
+ "event selector. Use 'perf c2c record -e list' to list available events",
parse_record_events),
OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
@@ -2947,7 +2918,7 @@ static int perf_c2c__record(int argc, const char **argv)
rec_argv[i++] = "-e";
rec_argv[i++] = perf_mem_events__name(j);
- };
+ }
if (all_user)
rec_argv[i++] = "--all-user";
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index ca2fb44874e4..8d23b8d6ee8e 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -65,6 +65,7 @@ static int cmd_data_convert(int argc, const char **argv)
OPT_STRING('i', "input", &input_name, "file", "input file name"),
#ifdef HAVE_LIBBABELTRACE_SUPPORT
OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
+ OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
#endif
OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"),
OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"),
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index c94a002f295e..584e2e1a3793 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -25,6 +25,7 @@
#include "util/map.h"
#include "util/spark.h"
#include "util/block-info.h"
+#include "util/stream.h"
#include <linux/err.h>
#include <linux/zalloc.h>
#include <subcmd/pager.h>
@@ -42,6 +43,7 @@ struct perf_diff {
int range_size;
int range_num;
bool has_br_stack;
+ bool stream;
};
/* Diff command specific HPP columns. */
@@ -72,6 +74,7 @@ struct data__file {
struct perf_data data;
int idx;
struct hists *hists;
+ struct evlist_streams *evlist_streams;
struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX];
};
@@ -106,6 +109,7 @@ enum {
COMPUTE_DELTA_ABS,
COMPUTE_CYCLES,
COMPUTE_MAX,
+ COMPUTE_STREAM, /* After COMPUTE_MAX to avoid use current compute arrays */
};
const char *compute_names[COMPUTE_MAX] = {
@@ -393,6 +397,11 @@ static int diff__process_sample_event(struct perf_tool *tool,
struct perf_diff *pdiff = container_of(tool, struct perf_diff, tool);
struct addr_location al;
struct hists *hists = evsel__hists(evsel);
+ struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = sample,
+ .ops = &hist_iter_normal,
+ };
int ret = -1;
if (perf_time__ranges_skip_sample(pdiff->ptime_range, pdiff->range_num,
@@ -411,14 +420,8 @@ static int diff__process_sample_event(struct perf_tool *tool,
goto out_put;
}
- if (compute != COMPUTE_CYCLES) {
- if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample,
- true)) {
- pr_warning("problem incrementing symbol period, "
- "skipping event\n");
- goto out_put;
- }
- } else {
+ switch (compute) {
+ case COMPUTE_CYCLES:
if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL,
NULL, NULL, sample, true)) {
pr_warning("problem incrementing symbol period, "
@@ -428,6 +431,23 @@ static int diff__process_sample_event(struct perf_tool *tool,
hist__account_cycles(sample->branch_stack, &al, sample, false,
NULL);
+ break;
+
+ case COMPUTE_STREAM:
+ if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+ NULL)) {
+ pr_debug("problem adding hist entry, skipping event\n");
+ goto out_put;
+ }
+ break;
+
+ default:
+ if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample,
+ true)) {
+ pr_warning("problem incrementing symbol period, "
+ "skipping event\n");
+ goto out_put;
+ }
}
/*
@@ -467,7 +487,7 @@ static struct evsel *evsel_match(struct evsel *evsel,
struct evsel *e;
evlist__for_each_entry(evlist, e) {
- if (perf_evsel__match2(evsel, e))
+ if (evsel__match2(evsel, e))
return e;
}
@@ -981,7 +1001,7 @@ static void data_process(void)
if (!quiet) {
fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
- perf_evsel__name(evsel_base));
+ evsel__name(evsel_base));
}
first = false;
@@ -990,16 +1010,61 @@ static void data_process(void)
data__fprintf();
/* Don't sort callchain for perf diff */
- perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+ evsel__reset_sample_bit(evsel_base, CALLCHAIN);
hists__process(hists_base);
}
}
+static int process_base_stream(struct data__file *data_base,
+ struct data__file *data_pair,
+ const char *title __maybe_unused)
+{
+ struct evlist *evlist_base = data_base->session->evlist;
+ struct evlist *evlist_pair = data_pair->session->evlist;
+ struct evsel *evsel_base, *evsel_pair;
+ struct evsel_streams *es_base, *es_pair;
+
+ evlist__for_each_entry(evlist_base, evsel_base) {
+ evsel_pair = evsel_match(evsel_base, evlist_pair);
+ if (!evsel_pair)
+ continue;
+
+ es_base = evsel_streams__entry(data_base->evlist_streams,
+ evsel_base->idx);
+ if (!es_base)
+ return -1;
+
+ es_pair = evsel_streams__entry(data_pair->evlist_streams,
+ evsel_pair->idx);
+ if (!es_pair)
+ return -1;
+
+ evsel_streams__match(es_base, es_pair);
+ evsel_streams__report(es_base, es_pair);
+ }
+
+ return 0;
+}
+
+static void stream_process(void)
+{
+ /*
+ * Stream comparison only supports two data files.
+ * perf.data.old and perf.data. data__files[0] is perf.data.old,
+ * data__files[1] is perf.data.
+ */
+ process_base_stream(&data__files[0], &data__files[1],
+ "# Output based on old perf data:\n#\n");
+}
+
static void data__free(struct data__file *d)
{
int col;
+ if (d->evlist_streams)
+ evlist_streams__delete(d->evlist_streams);
+
for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) {
struct diff_hpp_fmt *fmt = &d->fmt[col];
@@ -1153,9 +1218,19 @@ static int __cmd_diff(void)
if (pdiff.ptime_range)
zfree(&pdiff.ptime_range);
+
+ if (compute == COMPUTE_STREAM) {
+ d->evlist_streams = evlist__create_streams(
+ d->session->evlist, 5);
+ if (!d->evlist_streams)
+ goto out_delete;
+ }
}
- data_process();
+ if (compute == COMPUTE_STREAM)
+ stream_process();
+ else
+ data_process();
out_delete:
data__for_each_file(i, d) {
@@ -1228,6 +1303,8 @@ static const struct option options[] = {
"only consider symbols in these pids"),
OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
"only consider symbols in these tids"),
+ OPT_BOOLEAN(0, "stream", &pdiff.stream,
+ "Enable hot streams comparison."),
OPT_END()
};
@@ -1562,7 +1639,7 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
default:
BUG_ON(1);
- };
+ }
}
static void
@@ -1887,6 +1964,9 @@ int cmd_diff(int argc, const char **argv)
if (cycles_hist && (compute != COMPUTE_CYCLES))
usage_with_options(diff_usage, options);
+ if (pdiff.stream)
+ compute = COMPUTE_STREAM;
+
symbol__annotation_init();
if (symbol__init(NULL) < 0)
@@ -1898,13 +1978,26 @@ int cmd_diff(int argc, const char **argv)
if (check_file_brstack() < 0)
return -1;
- if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack)
+ if ((compute == COMPUTE_CYCLES || compute == COMPUTE_STREAM)
+ && !pdiff.has_br_stack) {
return -1;
+ }
- if (ui_init() < 0)
- return -1;
+ if (compute == COMPUTE_STREAM) {
+ symbol_conf.show_branchflag_count = true;
+ symbol_conf.disable_add2line_warn = true;
+ callchain_param.mode = CHAIN_FLAT;
+ callchain_param.key = CCKEY_SRCLINE;
+ callchain_param.branch_callstack = 1;
+ symbol_conf.use_callchain = true;
+ callchain_register_param(&callchain_param);
+ sort_order = "srcline,symbol,dso";
+ } else {
+ if (ui_init() < 0)
+ return -1;
- sort__mode = SORT_MODE__DIFF;
+ sort__mode = SORT_MODE__DIFF;
+ }
if (setup_sorting(NULL) < 0)
usage_with_options(diff_usage, options);
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 440501994931..98e992801251 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -34,7 +34,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
return PTR_ERR(session);
evlist__for_each_entry(session->evlist, pos) {
- perf_evsel__fprintf(pos, details, stdout);
+ evsel__fprintf(pos, details, stdout);
if (pos->core.attr.type == PERF_TYPE_TRACEPOINT)
has_tracepoint = true;
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index d5adc417a4ca..9366fad591dc 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -3,6 +3,7 @@
* builtin-ftrace.c
*
* Copyright (c) 2013 LG Electronics, Namhyung Kim <namhyung@kernel.org>
+ * Copyright (c) 2020 Changbin Du <changbin.du@gmail.com>, significant enhancement.
*/
#include "builtin.h"
@@ -24,8 +25,11 @@
#include "target.h"
#include "cpumap.h"
#include "thread_map.h"
+#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
+#include "util/units.h"
+#include "util/parse-sublevel-options.h"
#define DEFAULT_TRACER "function_graph"
@@ -38,6 +42,15 @@ struct perf_ftrace {
struct list_head graph_funcs;
struct list_head nograph_funcs;
int graph_depth;
+ unsigned long percpu_buffer_size;
+ bool inherit;
+ int func_stack_trace;
+ int func_irq_info;
+ int graph_nosleep_time;
+ int graph_noirqs;
+ int graph_verbose;
+ int graph_thresh;
+ unsigned int initial_delay;
};
struct filter_entry {
@@ -45,6 +58,7 @@ struct filter_entry {
char name[];
};
+static volatile int workload_exec_errno;
static bool done;
static void sig_handler(int sig __maybe_unused)
@@ -63,7 +77,7 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
siginfo_t *info __maybe_unused,
void *ucontext __maybe_unused)
{
- /* workload_exec_errno = info->si_value.sival_int; */
+ workload_exec_errno = info->si_value.sival_int;
done = true;
}
@@ -127,9 +141,119 @@ static int append_tracing_file(const char *name, const char *val)
return __write_tracing_file(name, val, true);
}
+static int read_tracing_file_to_stdout(const char *name)
+{
+ char buf[4096];
+ char *file;
+ int fd;
+ int ret = -1;
+
+ file = get_tracing_file(name);
+ if (!file) {
+ pr_debug("cannot get tracing file: %s\n", name);
+ return -1;
+ }
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ pr_debug("cannot open tracing file: %s: %s\n",
+ name, str_error_r(errno, buf, sizeof(buf)));
+ goto out;
+ }
+
+ /* read contents to stdout */
+ while (true) {
+ int n = read(fd, buf, sizeof(buf));
+ if (n == 0)
+ break;
+ else if (n < 0)
+ goto out_close;
+
+ if (fwrite(buf, n, 1, stdout) != 1)
+ goto out_close;
+ }
+ ret = 0;
+
+out_close:
+ close(fd);
+out:
+ put_tracing_file(file);
+ return ret;
+}
+
+static int read_tracing_file_by_line(const char *name,
+ void (*cb)(char *str, void *arg),
+ void *cb_arg)
+{
+ char *line = NULL;
+ size_t len = 0;
+ char *file;
+ FILE *fp;
+
+ file = get_tracing_file(name);
+ if (!file) {
+ pr_debug("cannot get tracing file: %s\n", name);
+ return -1;
+ }
+
+ fp = fopen(file, "r");
+ if (fp == NULL) {
+ pr_debug("cannot open tracing file: %s\n", name);
+ put_tracing_file(file);
+ return -1;
+ }
+
+ while (getline(&line, &len, fp) != -1) {
+ cb(line, cb_arg);
+ }
+
+ if (line)
+ free(line);
+
+ fclose(fp);
+ put_tracing_file(file);
+ return 0;
+}
+
+static int write_tracing_file_int(const char *name, int value)
+{
+ char buf[16];
+
+ snprintf(buf, sizeof(buf), "%d", value);
+ if (write_tracing_file(name, buf) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int write_tracing_option_file(const char *name, const char *val)
+{
+ char *file;
+ int ret;
+
+ if (asprintf(&file, "options/%s", name) < 0)
+ return -1;
+
+ ret = __write_tracing_file(file, val, false);
+ free(file);
+ return ret;
+}
+
static int reset_tracing_cpu(void);
static void reset_tracing_filters(void);
+static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused)
+{
+ write_tracing_option_file("function-fork", "0");
+ write_tracing_option_file("func_stack_trace", "0");
+ write_tracing_option_file("sleep-time", "1");
+ write_tracing_option_file("funcgraph-irqs", "1");
+ write_tracing_option_file("funcgraph-proc", "0");
+ write_tracing_option_file("funcgraph-abstime", "0");
+ write_tracing_option_file("latency-format", "0");
+ write_tracing_option_file("irq-info", "0");
+}
+
static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
{
if (write_tracing_file("tracing_on", "0") < 0)
@@ -147,7 +271,11 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
if (write_tracing_file("max_graph_depth", "0") < 0)
return -1;
+ if (write_tracing_file("tracing_thresh", "0") < 0)
+ return -1;
+
reset_tracing_filters();
+ reset_tracing_options(ftrace);
return 0;
}
@@ -203,6 +331,28 @@ static int set_tracing_cpu(struct perf_ftrace *ftrace)
return set_tracing_cpumask(cpumap);
}
+static int set_tracing_func_stack_trace(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->func_stack_trace)
+ return 0;
+
+ if (write_tracing_option_file("func_stack_trace", "1") < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->func_irq_info)
+ return 0;
+
+ if (write_tracing_option_file("irq-info", "1") < 0)
+ return -1;
+
+ return 0;
+}
+
static int reset_tracing_cpu(void)
{
struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL);
@@ -257,8 +407,6 @@ static void reset_tracing_filters(void)
static int set_tracing_depth(struct perf_ftrace *ftrace)
{
- char buf[16];
-
if (ftrace->graph_depth == 0)
return 0;
@@ -267,14 +415,156 @@ static int set_tracing_depth(struct perf_ftrace *ftrace)
return -1;
}
- snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth);
+ if (write_tracing_file_int("max_graph_depth", ftrace->graph_depth) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_percpu_buffer_size(struct perf_ftrace *ftrace)
+{
+ int ret;
+
+ if (ftrace->percpu_buffer_size == 0)
+ return 0;
+
+ ret = write_tracing_file_int("buffer_size_kb",
+ ftrace->percpu_buffer_size / 1024);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int set_tracing_trace_inherit(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->inherit)
+ return 0;
- if (write_tracing_file("max_graph_depth", buf) < 0)
+ if (write_tracing_option_file("function-fork", "1") < 0)
return -1;
return 0;
}
+static int set_tracing_sleep_time(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->graph_nosleep_time)
+ return 0;
+
+ if (write_tracing_option_file("sleep-time", "0") < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->graph_noirqs)
+ return 0;
+
+ if (write_tracing_option_file("funcgraph-irqs", "0") < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->graph_verbose)
+ return 0;
+
+ if (write_tracing_option_file("funcgraph-proc", "1") < 0)
+ return -1;
+
+ if (write_tracing_option_file("funcgraph-abstime", "1") < 0)
+ return -1;
+
+ if (write_tracing_option_file("latency-format", "1") < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_thresh(struct perf_ftrace *ftrace)
+{
+ int ret;
+
+ if (ftrace->graph_thresh == 0)
+ return 0;
+
+ ret = write_tracing_file_int("tracing_thresh", ftrace->graph_thresh);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int set_tracing_options(struct perf_ftrace *ftrace)
+{
+ if (set_tracing_pid(ftrace) < 0) {
+ pr_err("failed to set ftrace pid\n");
+ return -1;
+ }
+
+ if (set_tracing_cpu(ftrace) < 0) {
+ pr_err("failed to set tracing cpumask\n");
+ return -1;
+ }
+
+ if (set_tracing_func_stack_trace(ftrace) < 0) {
+ pr_err("failed to set tracing option func_stack_trace\n");
+ return -1;
+ }
+
+ if (set_tracing_func_irqinfo(ftrace) < 0) {
+ pr_err("failed to set tracing option irq-info\n");
+ return -1;
+ }
+
+ if (set_tracing_filters(ftrace) < 0) {
+ pr_err("failed to set tracing filters\n");
+ return -1;
+ }
+
+ if (set_tracing_depth(ftrace) < 0) {
+ pr_err("failed to set graph depth\n");
+ return -1;
+ }
+
+ if (set_tracing_percpu_buffer_size(ftrace) < 0) {
+ pr_err("failed to set tracing per-cpu buffer size\n");
+ return -1;
+ }
+
+ if (set_tracing_trace_inherit(ftrace) < 0) {
+ pr_err("failed to set tracing option function-fork\n");
+ return -1;
+ }
+
+ if (set_tracing_sleep_time(ftrace) < 0) {
+ pr_err("failed to set tracing option sleep-time\n");
+ return -1;
+ }
+
+ if (set_tracing_funcgraph_irqs(ftrace) < 0) {
+ pr_err("failed to set tracing option funcgraph-irqs\n");
+ return -1;
+ }
+
+ if (set_tracing_funcgraph_verbose(ftrace) < 0) {
+ pr_err("failed to set tracing option funcgraph-proc/funcgraph-abstime\n");
+ return -1;
+ }
+
+ if (set_tracing_thresh(ftrace) < 0) {
+ pr_err("failed to set tracing thresh\n");
+ return -1;
+ }
+
+ return 0;
+}
+
static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
{
char *trace_file;
@@ -284,10 +574,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
.events = POLLIN,
};
- if (!perf_cap__capable(CAP_SYS_ADMIN)) {
+ if (!(perf_cap__capable(CAP_PERFMON) ||
+ perf_cap__capable(CAP_SYS_ADMIN))) {
pr_err("ftrace only works for %s!\n",
#ifdef HAVE_LIBCAP_SUPPORT
- "users with the SYS_ADMIN capability"
+ "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
#else
"root"
#endif
@@ -315,25 +606,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
goto out;
}
- if (set_tracing_pid(ftrace) < 0) {
- pr_err("failed to set ftrace pid\n");
- goto out_reset;
- }
-
- if (set_tracing_cpu(ftrace) < 0) {
- pr_err("failed to set tracing cpumask\n");
- goto out_reset;
- }
-
- if (set_tracing_filters(ftrace) < 0) {
- pr_err("failed to set tracing filters\n");
- goto out_reset;
- }
-
- if (set_tracing_depth(ftrace) < 0) {
- pr_err("failed to set graph depth\n");
+ if (set_tracing_options(ftrace) < 0)
goto out_reset;
- }
if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
@@ -360,13 +634,26 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
fcntl(trace_fd, F_SETFL, O_NONBLOCK);
pollfd.fd = trace_fd;
- if (write_tracing_file("tracing_on", "1") < 0) {
- pr_err("can't enable tracing\n");
- goto out_close_fd;
+ /* display column headers */
+ read_tracing_file_to_stdout("trace");
+
+ if (!ftrace->initial_delay) {
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ goto out_close_fd;
+ }
}
perf_evlist__start_workload(ftrace->evlist);
+ if (ftrace->initial_delay) {
+ usleep(ftrace->initial_delay * 1000);
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ goto out_close_fd;
+ }
+ }
+
while (!done) {
if (poll(&pollfd, 1, -1) < 0)
break;
@@ -382,6 +669,14 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
write_tracing_file("tracing_on", "0");
+ if (workload_exec_errno) {
+ const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
+ /* flush stdout first so below error msg appears at the end. */
+ fflush(stdout);
+ pr_err("workload failed: %s\n", emsg);
+ goto out_close_fd;
+ }
+
/* read remaining buffer contents */
while (true) {
int n = read(trace_fd, buf, sizeof(buf));
@@ -396,7 +691,7 @@ out_close_fd:
out_reset:
reset_tracing_files(ftrace);
out:
- return done ? 0 : -1;
+ return (done && !workload_exec_errno) ? 0 : -1;
}
static int perf_ftrace_config(const char *var, const char *value, void *cb)
@@ -419,6 +714,46 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb)
return -1;
}
+static void list_function_cb(char *str, void *arg)
+{
+ struct strfilter *filter = (struct strfilter *)arg;
+
+ if (strfilter__compare(filter, str))
+ printf("%s", str);
+}
+
+static int opt_list_avail_functions(const struct option *opt __maybe_unused,
+ const char *str, int unset)
+{
+ struct strfilter *filter;
+ const char *err = NULL;
+ int ret;
+
+ if (unset || !str)
+ return -1;
+
+ filter = strfilter__new(str, &err);
+ if (!filter)
+ return err ? -EINVAL : -ENOMEM;
+
+ ret = strfilter__or(filter, str, &err);
+ if (ret == -EINVAL) {
+ pr_err("Filter parse error at %td.\n", err - str + 1);
+ pr_err("Source: \"%s\"\n", str);
+ pr_err(" %*c\n", (int)(err - str + 1), '^');
+ strfilter__delete(filter);
+ return ret;
+ }
+
+ ret = read_tracing_file_by_line("available_filter_functions",
+ list_function_cb, filter);
+ strfilter__delete(filter);
+ if (ret < 0)
+ return ret;
+
+ exit(0);
+}
+
static int parse_filter_func(const struct option *opt, const char *str,
int unset __maybe_unused)
{
@@ -445,6 +780,99 @@ static void delete_filter_func(struct list_head *head)
}
}
+static int parse_buffer_size(const struct option *opt,
+ const char *str, int unset)
+{
+ unsigned long *s = (unsigned long *)opt->value;
+ static struct parse_tag tags_size[] = {
+ { .tag = 'B', .mult = 1 },
+ { .tag = 'K', .mult = 1 << 10 },
+ { .tag = 'M', .mult = 1 << 20 },
+ { .tag = 'G', .mult = 1 << 30 },
+ { .tag = 0 },
+ };
+ unsigned long val;
+
+ if (unset) {
+ *s = 0;
+ return 0;
+ }
+
+ val = parse_tag_value(str, tags_size);
+ if (val != (unsigned long) -1) {
+ if (val < 1024) {
+ pr_err("buffer size too small, must larger than 1KB.");
+ return -1;
+ }
+ *s = val;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int parse_func_tracer_opts(const struct option *opt,
+ const char *str, int unset)
+{
+ int ret;
+ struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
+ struct sublevel_option func_tracer_opts[] = {
+ { .name = "call-graph", .value_ptr = &ftrace->func_stack_trace },
+ { .name = "irq-info", .value_ptr = &ftrace->func_irq_info },
+ { .name = NULL, }
+ };
+
+ if (unset)
+ return 0;
+
+ ret = perf_parse_sublevel_options(str, func_tracer_opts);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int parse_graph_tracer_opts(const struct option *opt,
+ const char *str, int unset)
+{
+ int ret;
+ struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
+ struct sublevel_option graph_tracer_opts[] = {
+ { .name = "nosleep-time", .value_ptr = &ftrace->graph_nosleep_time },
+ { .name = "noirqs", .value_ptr = &ftrace->graph_noirqs },
+ { .name = "verbose", .value_ptr = &ftrace->graph_verbose },
+ { .name = "thresh", .value_ptr = &ftrace->graph_thresh },
+ { .name = "depth", .value_ptr = &ftrace->graph_depth },
+ { .name = NULL, }
+ };
+
+ if (unset)
+ return 0;
+
+ ret = perf_parse_sublevel_options(str, graph_tracer_opts);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void select_tracer(struct perf_ftrace *ftrace)
+{
+ bool graph = !list_empty(&ftrace->graph_funcs) ||
+ !list_empty(&ftrace->nograph_funcs);
+ bool func = !list_empty(&ftrace->filters) ||
+ !list_empty(&ftrace->notrace);
+
+ /* The function_graph has priority over function tracer. */
+ if (graph)
+ ftrace->tracer = "function_graph";
+ else if (func)
+ ftrace->tracer = "function";
+ /* Otherwise, the default tracer is used. */
+
+ pr_debug("%s tracer is used\n", ftrace->tracer);
+}
+
int cmd_ftrace(int argc, const char **argv)
{
int ret;
@@ -459,25 +887,43 @@ int cmd_ftrace(int argc, const char **argv)
};
const struct option ftrace_options[] = {
OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
- "tracer to use: function_graph(default) or function"),
+ "Tracer to use: function_graph(default) or function"),
+ OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+ "Show available functions to filter",
+ opt_list_avail_functions, "*"),
OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
- "trace on existing process id"),
+ "Trace on existing process id"),
+ /* TODO: Add short option -t after -t/--tracer can be removed. */
+ OPT_STRING(0, "tid", &ftrace.target.tid, "tid",
+ "Trace on existing thread id (exclusive to --pid)"),
OPT_INCR('v', "verbose", &verbose,
- "be more verbose"),
+ "Be more verbose"),
OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide,
- "system-wide collection from all CPUs"),
+ "System-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
- "list of cpus to monitor"),
+ "List of cpus to monitor"),
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
- "trace given functions only", parse_filter_func),
+ "Trace given functions using function tracer",
+ parse_filter_func),
OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
- "do not trace given functions", parse_filter_func),
+ "Do not trace given functions", parse_filter_func),
+ OPT_CALLBACK(0, "func-opts", &ftrace, "options",
+ "Function tracer options, available options: call-graph,irq-info",
+ parse_func_tracer_opts),
OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
- "Set graph filter on given functions", parse_filter_func),
+ "Trace given functions using function_graph tracer",
+ parse_filter_func),
OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
"Set nograph filter on given functions", parse_filter_func),
- OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth,
- "Max depth for function graph tracer"),
+ OPT_CALLBACK(0, "graph-opts", &ftrace, "options",
+ "Graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=<n>,depth=<n>",
+ parse_graph_tracer_opts),
+ OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
+ "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
+ OPT_BOOLEAN(0, "inherit", &ftrace.inherit,
+ "Trace children processes"),
+ OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
+ "Number of milliseconds to wait before starting tracing after program start"),
OPT_END()
};
@@ -493,7 +939,9 @@ int cmd_ftrace(int argc, const char **argv)
argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target__none(&ftrace.target))
- usage_with_options(ftrace_usage, ftrace_options);
+ ftrace.target.system_wide = true;
+
+ select_tracer(&ftrace);
ret = target__validate(&ftrace.target);
if (ret) {
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 7e124a7b8bfd..452a75fe68e5 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -10,6 +10,7 @@
#include "util/color.h"
#include "util/dso.h"
+#include "util/vdso.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/map.h"
@@ -23,9 +24,11 @@
#include "util/symbol.h"
#include "util/synthetic-events.h"
#include "util/thread.h"
-#include <linux/err.h>
+#include "util/namespaces.h"
+#include <linux/err.h>
#include <subcmd/parse-options.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <linux/list.h>
#include <errno.h>
@@ -35,6 +38,7 @@ struct perf_inject {
struct perf_tool tool;
struct perf_session *session;
bool build_ids;
+ bool build_id_all;
bool sched_stat;
bool have_auxtrace;
bool strip;
@@ -51,9 +55,12 @@ struct perf_inject {
struct event_entry {
struct list_head node;
u32 tid;
- union perf_event event[0];
+ union perf_event event[];
};
+static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
+ struct machine *machine, u8 cpumode, u32 flags);
+
static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
{
ssize_t size;
@@ -97,6 +104,13 @@ static int perf_event__repipe_op2_synth(struct perf_session *session,
return perf_event__repipe_synth(session->tool, event);
}
+static int perf_event__repipe_op4_synth(struct perf_session *session,
+ union perf_event *event,
+ u64 data __maybe_unused)
+{
+ return perf_event__repipe_synth(session->tool, event);
+}
+
static int perf_event__repipe_attr(struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist)
@@ -115,6 +129,13 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
+static int perf_event__repipe_event_update(struct perf_tool *tool,
+ union perf_event *event,
+ struct evlist **pevlist __maybe_unused)
+{
+ return perf_event__repipe_synth(tool, event);
+}
+
#ifdef HAVE_AUXTRACE_SUPPORT
static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
@@ -292,7 +313,7 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
* if jit marker, then inject jit mmaps and generate ELF images
*/
ret = jit_process(inject->session, &inject->output, machine,
- event->mmap.filename, sample->pid, &n);
+ event->mmap.filename, event->mmap.pid, &n);
if (ret < 0)
return ret;
if (ret) {
@@ -303,6 +324,68 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
}
#endif
+static struct dso *findnew_dso(int pid, int tid, const char *filename,
+ struct dso_id *id, struct machine *machine)
+{
+ struct thread *thread;
+ struct nsinfo *nsi = NULL;
+ struct nsinfo *nnsi;
+ struct dso *dso;
+ bool vdso;
+
+ thread = machine__findnew_thread(machine, pid, tid);
+ if (thread == NULL) {
+ pr_err("cannot find or create a task %d/%d.\n", tid, pid);
+ return NULL;
+ }
+
+ vdso = is_vdso_map(filename);
+ nsi = nsinfo__get(thread->nsinfo);
+
+ if (vdso) {
+ /* The vdso maps are always on the host and not the
+ * container. Ensure that we don't use setns to look
+ * them up.
+ */
+ nnsi = nsinfo__copy(nsi);
+ if (nnsi) {
+ nsinfo__put(nsi);
+ nnsi->need_setns = false;
+ nsi = nnsi;
+ }
+ dso = machine__findnew_vdso(machine, thread);
+ } else {
+ dso = machine__findnew_dso_id(machine, filename, id);
+ }
+
+ if (dso)
+ dso->nsinfo = nsi;
+ else
+ nsinfo__put(nsi);
+
+ thread__put(thread);
+ return dso;
+}
+
+static int perf_event__repipe_buildid_mmap(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct dso *dso;
+
+ dso = findnew_dso(event->mmap.pid, event->mmap.tid,
+ event->mmap.filename, NULL, machine);
+
+ if (dso && !dso->hit) {
+ dso->hit = 1;
+ dso__inject_build_id(dso, tool, machine, sample->cpumode, 0);
+ dso__put(dso);
+ }
+
+ return perf_event__repipe(tool, event, sample, machine);
+}
+
static int perf_event__repipe_mmap2(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -330,7 +413,7 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
* if jit marker, then inject jit mmaps and generate ELF images
*/
ret = jit_process(inject->session, &inject->output, machine,
- event->mmap2.filename, sample->pid, &n);
+ event->mmap2.filename, event->mmap2.pid, &n);
if (ret < 0)
return ret;
if (ret) {
@@ -341,6 +424,34 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
}
#endif
+static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct dso_id dso_id = {
+ .maj = event->mmap2.maj,
+ .min = event->mmap2.min,
+ .ino = event->mmap2.ino,
+ .ino_generation = event->mmap2.ino_generation,
+ };
+ struct dso *dso;
+
+ dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
+ event->mmap2.filename, &dso_id, machine);
+
+ if (dso && !dso->hit) {
+ dso->hit = 1;
+ dso__inject_build_id(dso, tool, machine, sample->cpumode,
+ event->mmap2.flags);
+ dso__put(dso);
+ }
+
+ perf_event__repipe(tool, event, sample, machine);
+
+ return 0;
+}
+
static int perf_event__repipe_fork(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -405,34 +516,36 @@ static int perf_event__repipe_tracing_data(struct perf_session *session,
static int dso__read_build_id(struct dso *dso)
{
+ struct nscookie nsc;
+
if (dso->has_build_id)
return 0;
- if (filename__read_build_id(dso->long_name, dso->build_id,
- sizeof(dso->build_id)) > 0) {
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ if (filename__read_build_id(dso->long_name, &dso->bid) > 0)
dso->has_build_id = true;
- return 0;
- }
+ nsinfo__mountns_exit(&nsc);
- return -1;
+ return dso->has_build_id ? 0 : -1;
}
static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
- struct machine *machine)
+ struct machine *machine, u8 cpumode, u32 flags)
{
- u16 misc = PERF_RECORD_MISC_USER;
int err;
+ if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB)
+ return 0;
+ if (is_no_dso_memory(dso->long_name))
+ return 0;
+
if (dso__read_build_id(dso) < 0) {
pr_debug("no build_id found for %s\n", dso->long_name);
return -1;
}
- if (dso->kernel)
- misc = PERF_RECORD_MISC_KERNEL;
-
- err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe,
- machine);
+ err = perf_event__synthesize_build_id(tool, dso, cpumode,
+ perf_event__repipe, machine);
if (err) {
pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
return -1;
@@ -441,11 +554,10 @@ static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
return 0;
}
-static int perf_event__inject_buildid(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct evsel *evsel __maybe_unused,
- struct machine *machine)
+int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel __maybe_unused,
+ struct machine *machine)
{
struct addr_location al;
struct thread *thread;
@@ -460,19 +572,8 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
if (!al.map->dso->hit) {
al.map->dso->hit = 1;
- if (map__load(al.map) >= 0) {
- dso__inject_build_id(al.map->dso, tool, machine);
- /*
- * If this fails, too bad, let the other side
- * account this as unresolved.
- */
- } else {
-#ifdef HAVE_LIBELF_SUPPORT
- pr_warning("no symbols found in %s, maybe "
- "install a debug package?\n",
- al.map->dso->long_name);
-#endif
- }
+ dso__inject_build_id(al.map->dso, tool, machine,
+ sample->cpumode, al.map->flags);
}
}
@@ -536,7 +637,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool,
union perf_event *event_sw;
struct perf_sample sample_sw;
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
- u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ u32 pid = evsel__intval(evsel, sample, "pid");
list_for_each_entry(ent, &inject->samples, node) {
if (pid == ent->tid)
@@ -546,7 +647,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool,
return 0;
found:
event_sw = &ent->event[0];
- perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+ evsel__parse_sample(evsel, event_sw, &sample_sw);
sample_sw.period = sample->period;
sample_sw.time = sample->time;
@@ -561,11 +662,10 @@ static void sig_handler(int sig __maybe_unused)
session_done = 1;
}
-static int perf_evsel__check_stype(struct evsel *evsel,
- u64 sample_type, const char *sample_msg)
+static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
{
struct perf_event_attr *attr = &evsel->core.attr;
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
if (!(attr->sample_type & sample_type)) {
pr_err("Samples for %s event do not have %s attribute set.",
@@ -607,7 +707,7 @@ static int __cmd_inject(struct perf_inject *inject)
signal(SIGINT, sig_handler);
if (inject->build_ids || inject->sched_stat ||
- inject->itrace_synth_opts.set) {
+ inject->itrace_synth_opts.set || inject->build_id_all) {
inject->tool.mmap = perf_event__repipe_mmap;
inject->tool.mmap2 = perf_event__repipe_mmap2;
inject->tool.fork = perf_event__repipe_fork;
@@ -616,16 +716,19 @@ static int __cmd_inject(struct perf_inject *inject)
output_data_offset = session->header.data_offset;
- if (inject->build_ids) {
+ if (inject->build_id_all) {
+ inject->tool.mmap = perf_event__repipe_buildid_mmap;
+ inject->tool.mmap2 = perf_event__repipe_buildid_mmap2;
+ } else if (inject->build_ids) {
inject->tool.sample = perf_event__inject_buildid;
} else if (inject->sched_stat) {
struct evsel *evsel;
evlist__for_each_entry(session->evlist, evsel) {
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
if (!strcmp(name, "sched:sched_switch")) {
- if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
return -EINVAL;
evsel->handler = perf_inject__sched_switch;
@@ -684,14 +787,14 @@ static int __cmd_inject(struct perf_inject *inject)
perf_header__clear_feat(&session->header,
HEADER_AUXTRACE);
- if (inject->itrace_synth_opts.last_branch)
+ if (inject->itrace_synth_opts.last_branch ||
+ inject->itrace_synth_opts.add_last_branch)
perf_header__set_feat(&session->header,
HEADER_BRANCH_STACK);
evsel = perf_evlist__id2evsel_strict(session->evlist,
inject->aux_id);
if (evsel) {
- pr_debug("Deleting %s\n",
- perf_evsel__name(evsel));
+ pr_debug("Deleting %s\n", evsel__name(evsel));
evlist__remove(session->evlist, evsel);
evsel__delete(evsel);
}
@@ -709,9 +812,12 @@ int cmd_inject(int argc, const char **argv)
struct perf_inject inject = {
.tool = {
.sample = perf_event__repipe_sample,
+ .read = perf_event__repipe_sample,
.mmap = perf_event__repipe,
.mmap2 = perf_event__repipe,
.comm = perf_event__repipe,
+ .namespaces = perf_event__repipe,
+ .cgroup = perf_event__repipe,
.fork = perf_event__repipe,
.exit = perf_event__repipe,
.lost = perf_event__repipe,
@@ -719,19 +825,28 @@ int cmd_inject(int argc, const char **argv)
.aux = perf_event__repipe,
.itrace_start = perf_event__repipe,
.context_switch = perf_event__repipe,
- .read = perf_event__repipe_sample,
.throttle = perf_event__repipe,
.unthrottle = perf_event__repipe,
+ .ksymbol = perf_event__repipe,
+ .bpf = perf_event__repipe,
+ .text_poke = perf_event__repipe,
.attr = perf_event__repipe_attr,
+ .event_update = perf_event__repipe_event_update,
.tracing_data = perf_event__repipe_op2_synth,
- .auxtrace_info = perf_event__repipe_op2_synth,
- .auxtrace = perf_event__repipe_auxtrace,
- .auxtrace_error = perf_event__repipe_op2_synth,
- .time_conv = perf_event__repipe_op2_synth,
.finished_round = perf_event__repipe_oe_synth,
.build_id = perf_event__repipe_op2_synth,
.id_index = perf_event__repipe_op2_synth,
+ .auxtrace_info = perf_event__repipe_op2_synth,
+ .auxtrace_error = perf_event__repipe_op2_synth,
+ .time_conv = perf_event__repipe_op2_synth,
+ .thread_map = perf_event__repipe_op2_synth,
+ .cpu_map = perf_event__repipe_op2_synth,
+ .stat_config = perf_event__repipe_op2_synth,
+ .stat = perf_event__repipe_op2_synth,
+ .stat_round = perf_event__repipe_op2_synth,
.feature = perf_event__repipe_op2_synth,
+ .compressed = perf_event__repipe_op4_synth,
+ .auxtrace = perf_event__repipe_auxtrace,
},
.input_name = "-",
.samples = LIST_HEAD_INIT(inject.samples),
@@ -748,6 +863,8 @@ int cmd_inject(int argc, const char **argv)
struct option options[] = {
OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
"Inject build-ids into the output stream"),
+ OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all,
+ "Inject build-ids of all DSOs into the output stream"),
OPT_STRING('i', "input", &inject.input_name, "file",
"input file name"),
OPT_STRING('o', "output", &inject.output.path, "file",
@@ -796,8 +913,6 @@ int cmd_inject(int argc, const char **argv)
return -1;
}
- inject.tool.ordered_events = inject.sched_stat;
-
data.path = inject.input_name;
inject.session = perf_session__new(&data, true, &inject.tool);
if (IS_ERR(inject.session))
@@ -806,7 +921,7 @@ int cmd_inject(int argc, const char **argv)
if (zstd_init(&(inject.session->zstd_data), 0) < 0)
pr_warning("Decompression initialization failed.\n");
- if (inject.build_ids) {
+ if (inject.build_ids && !inject.build_id_all) {
/*
* to make sure the mmap records are ordered correctly
* and so that the correct especially due to jitted code
@@ -816,6 +931,11 @@ int cmd_inject(int argc, const char **argv)
inject.tool.ordered_events = true;
inject.tool.ordering_requires_timestamps = true;
}
+
+ if (inject.sched_stat) {
+ inject.tool.ordered_events = true;
+ }
+
#ifdef HAVE_JITDUMP
if (inject.jit_mode) {
inject.tool.mmap2 = perf_event__jit_repipe_mmap2;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 003c85f5f56c..a50dae2c4ae9 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -169,13 +169,12 @@ static int insert_caller_stat(unsigned long call_site,
return 0;
}
-static int perf_evsel__process_alloc_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *sample)
{
- unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
- call_site = perf_evsel__intval(evsel, sample, "call_site");
- int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
- bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
+ unsigned long ptr = evsel__intval(evsel, sample, "ptr"),
+ call_site = evsel__intval(evsel, sample, "call_site");
+ int bytes_req = evsel__intval(evsel, sample, "bytes_req"),
+ bytes_alloc = evsel__intval(evsel, sample, "bytes_alloc");
if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
insert_caller_stat(call_site, bytes_req, bytes_alloc))
@@ -188,14 +187,13 @@ static int perf_evsel__process_alloc_event(struct evsel *evsel,
return 0;
}
-static int perf_evsel__process_alloc_node_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample)
{
- int ret = perf_evsel__process_alloc_event(evsel, sample);
+ int ret = evsel__process_alloc_event(evsel, sample);
if (!ret) {
int node1 = cpu__get_node(sample->cpu),
- node2 = perf_evsel__intval(evsel, sample, "node");
+ node2 = evsel__intval(evsel, sample, "node");
if (node1 != node2)
nr_cross_allocs++;
@@ -232,10 +230,9 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr,
return NULL;
}
-static int perf_evsel__process_free_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_free_event(struct evsel *evsel, struct perf_sample *sample)
{
- unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
+ unsigned long ptr = evsel__intval(evsel, sample, "ptr");
struct alloc_stat *s_alloc, *s_caller;
s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
@@ -784,13 +781,12 @@ static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
return 0;
}
-static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_page_alloc_event(struct evsel *evsel, struct perf_sample *sample)
{
u64 page;
- unsigned int order = perf_evsel__intval(evsel, sample, "order");
- unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
- unsigned int migrate_type = perf_evsel__intval(evsel, sample,
+ unsigned int order = evsel__intval(evsel, sample, "order");
+ unsigned int gfp_flags = evsel__intval(evsel, sample, "gfp_flags");
+ unsigned int migrate_type = evsel__intval(evsel, sample,
"migratetype");
u64 bytes = kmem_page_size << order;
u64 callsite;
@@ -802,9 +798,9 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
};
if (use_pfn)
- page = perf_evsel__intval(evsel, sample, "pfn");
+ page = evsel__intval(evsel, sample, "pfn");
else
- page = perf_evsel__intval(evsel, sample, "page");
+ page = evsel__intval(evsel, sample, "page");
nr_page_allocs++;
total_page_alloc_bytes += bytes;
@@ -857,11 +853,10 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
return 0;
}
-static int perf_evsel__process_page_free_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_page_free_event(struct evsel *evsel, struct perf_sample *sample)
{
u64 page;
- unsigned int order = perf_evsel__intval(evsel, sample, "order");
+ unsigned int order = evsel__intval(evsel, sample, "order");
u64 bytes = kmem_page_size << order;
struct page_stat *pstat;
struct page_stat this = {
@@ -869,9 +864,9 @@ static int perf_evsel__process_page_free_event(struct evsel *evsel,
};
if (use_pfn)
- page = perf_evsel__intval(evsel, sample, "pfn");
+ page = evsel__intval(evsel, sample, "pfn");
else
- page = perf_evsel__intval(evsel, sample, "page");
+ page = evsel__intval(evsel, sample, "page");
nr_page_frees++;
total_page_free_bytes += bytes;
@@ -1371,15 +1366,15 @@ static int __cmd_kmem(struct perf_session *session)
struct evsel *evsel;
const struct evsel_str_handler kmem_tracepoints[] = {
/* slab allocator */
- { "kmem:kmalloc", perf_evsel__process_alloc_event, },
- { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, },
- { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, },
- { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
- { "kmem:kfree", perf_evsel__process_free_event, },
- { "kmem:kmem_cache_free", perf_evsel__process_free_event, },
+ { "kmem:kmalloc", evsel__process_alloc_event, },
+ { "kmem:kmem_cache_alloc", evsel__process_alloc_event, },
+ { "kmem:kmalloc_node", evsel__process_alloc_node_event, },
+ { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, },
+ { "kmem:kfree", evsel__process_free_event, },
+ { "kmem:kmem_cache_free", evsel__process_free_event, },
/* page allocator */
- { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, },
- { "kmem:mm_page_free", perf_evsel__process_page_free_event, },
+ { "kmem:mm_page_alloc", evsel__process_page_alloc_event, },
+ { "kmem:mm_page_free", evsel__process_page_free_event, },
};
if (!perf_session__has_traces(session, "kmem record"))
@@ -1391,8 +1386,8 @@ static int __cmd_kmem(struct perf_session *session)
}
evlist__for_each_entry(session->evlist, evsel) {
- if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
- perf_evsel__field(evsel, "pfn")) {
+ if (!strcmp(evsel__name(evsel), "kmem:mm_page_alloc") &&
+ evsel__field(evsel, "pfn")) {
use_pfn = true;
break;
}
@@ -1938,7 +1933,8 @@ int cmd_kmem(int argc, const char **argv)
return ret;
argc = parse_options_subcommand(argc, argv, kmem_options,
- kmem_subcommands, kmem_usage, 0);
+ kmem_subcommands, kmem_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(kmem_usage, kmem_options);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 577af4f3297a..460945ded6dd 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -69,7 +69,7 @@ void exit_event_get_key(struct evsel *evsel,
struct event_key *key)
{
key->info = 0;
- key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
+ key->key = evsel__intval(evsel, sample, kvm_exit_reason);
}
bool kvm_exit_event(struct evsel *evsel)
@@ -416,8 +416,7 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread,
return NULL;
}
- vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
- vcpu_id_str);
+ vcpu_record->vcpu_id = evsel__intval(evsel, sample, vcpu_id_str);
thread__set_priv(thread, vcpu_record);
}
@@ -1033,16 +1032,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
struct perf_event_attr *attr = &pos->core.attr;
/* make sure these *are* set */
- perf_evsel__set_sample_bit(pos, TID);
- perf_evsel__set_sample_bit(pos, TIME);
- perf_evsel__set_sample_bit(pos, CPU);
- perf_evsel__set_sample_bit(pos, RAW);
+ evsel__set_sample_bit(pos, TID);
+ evsel__set_sample_bit(pos, TIME);
+ evsel__set_sample_bit(pos, CPU);
+ evsel__set_sample_bit(pos, RAW);
/* make sure these are *not*; want as small a sample as possible */
- perf_evsel__reset_sample_bit(pos, PERIOD);
- perf_evsel__reset_sample_bit(pos, IP);
- perf_evsel__reset_sample_bit(pos, CALLCHAIN);
- perf_evsel__reset_sample_bit(pos, ADDR);
- perf_evsel__reset_sample_bit(pos, READ);
+ evsel__reset_sample_bit(pos, PERIOD);
+ evsel__reset_sample_bit(pos, IP);
+ evsel__reset_sample_bit(pos, CALLCHAIN);
+ evsel__reset_sample_bit(pos, ADDR);
+ evsel__reset_sample_bit(pos, READ);
attr->mmap = 0;
attr->comm = 0;
attr->task = 0;
@@ -1320,7 +1319,7 @@ static struct evlist *kvm_live_event_list(void)
*name = '\0';
name++;
- if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
+ if (evlist__add_newtp(evlist, sys, name, NULL)) {
pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
free(tp);
goto out;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 965ef017496f..10ab5e40a34f 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -42,7 +42,7 @@ int cmd_list(int argc, const char **argv)
OPT_END()
};
const char * const list_usage[] = {
- "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
+ "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]",
NULL
};
@@ -92,13 +92,6 @@ int cmd_list(int argc, const char **argv)
else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx;
- if (sep == NULL) {
- print_events(argv[i], raw_dump, !desc_flag,
- long_desc_flag,
- details_flag,
- deprecated);
- continue;
- }
sep_idx = sep - argv[i];
s = strdup(argv[i]);
if (s == NULL)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 474dfd59d7eb..f0a1dbacb46c 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -48,7 +48,7 @@ struct lock_stat {
struct rb_node rb; /* used for sorting */
/*
- * FIXME: perf_evsel__intval() returns u64,
+ * FIXME: evsel__intval() returns u64,
* so address of lockdep_map should be dealed as 64bit.
* Is there more better solution?
*/
@@ -404,9 +404,9 @@ static int report_lock_acquire_event(struct evsel *evsel,
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
- const char *name = perf_evsel__strval(evsel, sample, "name");
- u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
- int flag = perf_evsel__intval(evsel, sample, "flag");
+ const char *name = evsel__strval(evsel, sample, "name");
+ u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
+ int flag = evsel__intval(evsel, sample, "flag");
memcpy(&addr, &tmp, sizeof(void *));
@@ -477,8 +477,8 @@ static int report_lock_acquired_event(struct evsel *evsel,
struct thread_stat *ts;
struct lock_seq_stat *seq;
u64 contended_term;
- const char *name = perf_evsel__strval(evsel, sample, "name");
- u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+ const char *name = evsel__strval(evsel, sample, "name");
+ u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
memcpy(&addr, &tmp, sizeof(void *));
@@ -539,8 +539,8 @@ static int report_lock_contended_event(struct evsel *evsel,
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
- const char *name = perf_evsel__strval(evsel, sample, "name");
- u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+ const char *name = evsel__strval(evsel, sample, "name");
+ u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
memcpy(&addr, &tmp, sizeof(void *));
@@ -594,8 +594,8 @@ static int report_lock_release_event(struct evsel *evsel,
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
- const char *name = perf_evsel__strval(evsel, sample, "name");
- u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+ const char *name = evsel__strval(evsel, sample, "name");
+ u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
memcpy(&addr, &tmp, sizeof(void *));
@@ -657,32 +657,28 @@ static struct trace_lock_handler report_lock_ops = {
static struct trace_lock_handler *trace_handler;
-static int perf_evsel__process_lock_acquire(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
{
if (trace_handler->acquire_event)
return trace_handler->acquire_event(evsel, sample);
return 0;
}
-static int perf_evsel__process_lock_acquired(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
{
if (trace_handler->acquired_event)
return trace_handler->acquired_event(evsel, sample);
return 0;
}
-static int perf_evsel__process_lock_contended(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
{
if (trace_handler->contended_event)
return trace_handler->contended_event(evsel, sample);
return 0;
}
-static int perf_evsel__process_lock_release(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
{
if (trace_handler->release_event)
return trace_handler->release_event(evsel, sample);
@@ -775,7 +771,7 @@ static void dump_threads(void)
pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
node = rb_next(node);
thread__put(t);
- };
+ }
}
static void dump_map(void)
@@ -849,10 +845,10 @@ static void sort_result(void)
}
static const struct evsel_str_handler lock_tracepoints[] = {
- { "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
- { "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
- { "lock:lock_contended", perf_evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
- { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
+ { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
+ { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+ { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+ { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
};
static bool force;
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index a13f5817d6fc..3523279af6af 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -38,26 +38,16 @@ static int parse_record_events(const struct option *opt,
const char *str, int unset __maybe_unused)
{
struct perf_mem *mem = *(struct perf_mem **)opt->value;
- int j;
- if (strcmp(str, "list")) {
- if (!perf_mem_events__parse(str)) {
- mem->operation = 0;
- return 0;
- }
- exit(-1);
+ if (!strcmp(str, "list")) {
+ perf_mem_events__list();
+ exit(0);
}
+ if (perf_mem_events__parse(str))
+ exit(-1);
- for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
- struct perf_mem_event *e = &perf_mem_events[j];
-
- fprintf(stderr, "%-13s%-*s%s\n",
- e->tag,
- verbose > 0 ? 25 : 0,
- verbose > 0 ? perf_mem_events__name(j) : "",
- e->supported ? ": available" : "");
- }
- exit(0);
+ mem->operation = 0;
+ return 0;
}
static const char * const __usage[] = {
@@ -123,7 +113,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
rec_argv[i++] = "-e";
rec_argv[i++] = perf_mem_events__name(j);
- };
+ }
if (all_user)
rec_argv[i++] = "--all-user";
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 70548df2abb9..6b1507566770 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -364,6 +364,9 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
for (k = 0; k < pev->ntevs; k++) {
struct probe_trace_event *tev = &pev->tevs[k];
+ /* Skipped events have no event name */
+ if (!tev->event)
+ continue;
/* We use tev's name for showing new events */
show_perf_probe_event(tev->group, tev->event, pev,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1ab349abe904..adf311d15d3d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,6 +34,7 @@
#include "util/tsc.h"
#include "util/parse-branch-options.h"
#include "util/parse-regs-options.h"
+#include "util/perf_api_probe.h"
#include "util/llvm-utils.h"
#include "util/bpf-loader.h"
#include "util/trigger.h"
@@ -43,6 +44,9 @@
#include "util/time-utils.h"
#include "util/units.h"
#include "util/bpf-event.h"
+#include "util/util.h"
+#include "util/pfm.h"
+#include "util/clockid.h"
#include "asm/bug.h"
#include "perf.h"
@@ -50,9 +54,13 @@
#include <inttypes.h>
#include <locale.h>
#include <poll.h>
+#include <pthread.h>
#include <unistd.h>
#include <sched.h>
#include <signal.h>
+#ifdef HAVE_EVENTFD_SUPPORT
+#include <sys/eventfd.h>
+#endif
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/types.h>
@@ -63,6 +71,7 @@
#include <linux/time64.h>
#include <linux/zalloc.h>
#include <linux/bitmap.h>
+#include <sys/time.h>
struct switch_output {
bool enabled;
@@ -84,7 +93,10 @@ struct record {
struct auxtrace_record *itr;
struct evlist *evlist;
struct perf_session *session;
+ struct evlist *sb_evlist;
+ pthread_t thread_id;
int realtime_prio;
+ bool switch_output_event_set;
bool no_buildid;
bool no_buildid_set;
bool no_buildid_cache;
@@ -503,6 +515,20 @@ static int process_synthesized_event(struct perf_tool *tool,
return record__write(rec, NULL, event, event->header.size);
}
+static int process_locked_synthesized_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
+ int ret;
+
+ pthread_mutex_lock(&synth_lock);
+ ret = process_synthesized_event(tool, event, sample, machine);
+ pthread_mutex_unlock(&synth_lock);
+ return ret;
+}
+
static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
{
struct record *rec = to;
@@ -518,6 +544,9 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
static volatile int signr = -1;
static volatile int child_finished;
+#ifdef HAVE_EVENTFD_SUPPORT
+static int done_fd = -1;
+#endif
static void sig_handler(int sig)
{
@@ -527,6 +556,21 @@ static void sig_handler(int sig)
signr = sig;
done = 1;
+#ifdef HAVE_EVENTFD_SUPPORT
+{
+ u64 tmp = 1;
+ /*
+ * It is possible for this signal handler to run after done is checked
+ * in the main loop, but before the perf counter fds are polled. If this
+ * happens, the poll() will continue to wait even though done is set,
+ * and will only break out if either another signal is received, or the
+ * counters are ready for read. To ensure the poll() doesn't sleep when
+ * done is set, use an eventfd (done_fd) to wake up the poll().
+ */
+ if (write(done_fd, &tmp, sizeof(tmp)) < 0)
+ pr_err("failed to signal wakeup fd, error: %m\n");
+}
+#endif // HAVE_EVENTFD_SUPPORT
}
static void sigsegv_handler(int sig)
@@ -723,6 +767,43 @@ static int record__auxtrace_init(struct record *rec __maybe_unused)
#endif
+static int record__config_text_poke(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ int err;
+
+ /* Nothing to do if text poke is already configured */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.text_poke)
+ return 0;
+ }
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ evsel = evlist__last(evlist);
+
+ evsel->core.attr.freq = 0;
+ evsel->core.attr.sample_period = 1;
+ evsel->core.attr.text_poke = 1;
+ evsel->core.attr.ksymbol = 1;
+
+ evsel->core.system_wide = true;
+ evsel->no_aux_samples = true;
+ evsel->immediate = true;
+
+ /* Text poke must be collected on all CPUs */
+ perf_cpu_map__put(evsel->core.own_cpus);
+ evsel->core.own_cpus = perf_cpu_map__new(NULL);
+ perf_cpu_map__put(evsel->core.cpus);
+ evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
+
+ evsel__set_sample_bit(evsel, TIME);
+
+ return 0;
+}
+
static bool record__kcore_readable(struct machine *machine)
{
char kcore[PATH_MAX];
@@ -805,19 +886,28 @@ static int record__open(struct record *rec)
int rc = 0;
/*
- * For initial_delay we need to add a dummy event so that we can track
- * PERF_RECORD_MMAP while we wait for the initial delay to enable the
- * real events, the ones asked by the user.
+ * For initial_delay or system wide, we need to add a dummy event so
+ * that we can track PERF_RECORD_MMAP to cover the delay of waiting or
+ * event synthesis.
*/
- if (opts->initial_delay) {
- if (perf_evlist__add_dummy(evlist))
- return -ENOMEM;
+ if (opts->initial_delay || target__has_cpu(&opts->target)) {
+ pos = perf_evlist__get_tracking_event(evlist);
+ if (!evsel__is_dummy_event(pos)) {
+ /* Set up dummy event. */
+ if (evlist__add_dummy(evlist))
+ return -ENOMEM;
+ pos = evlist__last(evlist);
+ perf_evlist__set_tracking_event(evlist, pos);
+ }
- pos = evlist__first(evlist);
- pos->tracking = 0;
- pos = evlist__last(evlist);
- pos->tracking = 1;
- pos->core.attr.enable_on_exec = 1;
+ /*
+ * Enable the dummy event when the process is forked for
+ * initial_delay, immediately for system wide.
+ */
+ if (opts->initial_delay && !pos->immediate)
+ pos->core.attr.enable_on_exec = 1;
+ else
+ pos->immediate = 1;
}
perf_evlist__config(evlist, opts, &callchain_param);
@@ -825,7 +915,7 @@ static int record__open(struct record *rec)
evlist__for_each_entry(evlist, pos) {
try_again:
if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
- if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
+ if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
goto try_again;
@@ -837,8 +927,7 @@ try_again:
goto try_again;
}
rc = -errno;
- perf_evsel__open_strerror(pos, &opts->target,
- errno, msg, sizeof(msg));
+ evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
ui__error("%s\n", msg);
goto out;
}
@@ -859,7 +948,7 @@ try_again:
if (perf_evlist__apply_filters(evlist, &pos)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
- pos->filter, perf_evsel__name(pos), errno,
+ pos->filter, evsel__name(pos), errno,
str_error_r(errno, msg, sizeof(msg)));
rc = -1;
goto out;
@@ -1116,6 +1205,9 @@ static void record__init_features(struct record *rec)
if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+ if (!rec->opts.use_clockid)
+ perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
+
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
if (!record__comp_enabled(rec))
perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
@@ -1288,6 +1380,7 @@ static int record__synthesize(struct record *rec, bool tail)
struct perf_tool *tool = &rec->tool;
int fd = perf_data__fd(data);
int err = 0;
+ event_op f = process_synthesized_event;
if (rec->opts.tail_synthesize != tail)
return 0;
@@ -1402,13 +1495,114 @@ static int record__synthesize(struct record *rec, bool tail)
if (err < 0)
pr_warning("Couldn't synthesize cgroup events.\n");
+ if (rec->opts.nr_threads_synthesize > 1) {
+ perf_set_multithreaded();
+ f = process_locked_synthesized_event;
+ }
+
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
- process_synthesized_event, opts->sample_address,
- 1);
+ f, opts->sample_address,
+ rec->opts.nr_threads_synthesize);
+
+ if (rec->opts.nr_threads_synthesize > 1)
+ perf_set_singlethreaded();
+
out:
return err;
}
+static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
+{
+ struct record *rec = data;
+ pthread_kill(rec->thread_id, SIGUSR2);
+ return 0;
+}
+
+static int record__setup_sb_evlist(struct record *rec)
+{
+ struct record_opts *opts = &rec->opts;
+
+ if (rec->sb_evlist != NULL) {
+ /*
+ * We get here if --switch-output-event populated the
+ * sb_evlist, so associate a callback that will send a SIGUSR2
+ * to the main thread.
+ */
+ evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
+ rec->thread_id = pthread_self();
+ }
+#ifdef HAVE_LIBBPF_SUPPORT
+ if (!opts->no_bpf_event) {
+ if (rec->sb_evlist == NULL) {
+ rec->sb_evlist = evlist__new();
+
+ if (rec->sb_evlist == NULL) {
+ pr_err("Couldn't create side band evlist.\n.");
+ return -1;
+ }
+ }
+
+ if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
+ pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
+ return -1;
+ }
+ }
+#endif
+ if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
+ pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
+ opts->no_bpf_event = true;
+ }
+
+ return 0;
+}
+
+static int record__init_clock(struct record *rec)
+{
+ struct perf_session *session = rec->session;
+ struct timespec ref_clockid;
+ struct timeval ref_tod;
+ u64 ref;
+
+ if (!rec->opts.use_clockid)
+ return 0;
+
+ if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+ session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
+
+ session->header.env.clock.clockid = rec->opts.clockid;
+
+ if (gettimeofday(&ref_tod, NULL) != 0) {
+ pr_err("gettimeofday failed, cannot set reference time.\n");
+ return -1;
+ }
+
+ if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
+ pr_err("clock_gettime failed, cannot set reference time.\n");
+ return -1;
+ }
+
+ ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
+ (u64) ref_tod.tv_usec * NSEC_PER_USEC;
+
+ session->header.env.clock.tod_ns = ref;
+
+ ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
+ (u64) ref_clockid.tv_nsec;
+
+ session->header.env.clock.clockid_ns = ref;
+ return 0;
+}
+
+static void hit_auxtrace_snapshot_trigger(struct record *rec)
+{
+ if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
+ trigger_hit(&auxtrace_snapshot_trigger);
+ auxtrace_record__snapshot_started = 1;
+ if (auxtrace_record__snapshot_start(rec->itr))
+ trigger_error(&auxtrace_snapshot_trigger);
+ }
+}
+
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
@@ -1420,9 +1614,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
struct perf_data *data = &rec->data;
struct perf_session *session;
bool disabled = false, draining = false;
- struct evlist *sb_evlist = NULL;
int fd;
float ratio = 0;
+ enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
atexit(record__sig_exit);
signal(SIGCHLD, sig_handler);
@@ -1465,6 +1659,20 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
pr_err("Compression initialization failed.\n");
return -1;
}
+#ifdef HAVE_EVENTFD_SUPPORT
+ done_fd = eventfd(0, EFD_NONBLOCK);
+ if (done_fd < 0) {
+ pr_err("Failed to create wakeup eventfd, error: %m\n");
+ status = -1;
+ goto out_delete_session;
+ }
+ err = evlist__add_pollfd(rec->evlist, done_fd);
+ if (err < 0) {
+ pr_err("Failed to add wakeup eventfd to poll list\n");
+ status = err;
+ goto out_delete_session;
+ }
+#endif // HAVE_EVENTFD_SUPPORT
session->header.env.comp_type = PERF_COMP_ZSTD;
session->header.env.comp_level = rec->opts.comp_level;
@@ -1475,10 +1683,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
return -1;
}
- record__init_features(rec);
+ if (record__init_clock(rec))
+ return -1;
- if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
- session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+ record__init_features(rec);
if (forks) {
err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
@@ -1528,7 +1736,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* Normally perf_session__new would do this, but it doesn't have the
* evlist.
*/
- if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
+ if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
rec->tool.ordered_events = false;
}
@@ -1546,21 +1754,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
goto out_child;
}
+ err = -1;
if (!rec->no_buildid
&& !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
pr_err("Couldn't generate buildids. "
"Use --no-buildid to profile anyway.\n");
- err = -1;
goto out_child;
}
- if (!opts->no_bpf_event)
- bpf_event__add_sb_event(&sb_evlist, &session->header.env);
-
- if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
- pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
- opts->no_bpf_event = true;
- }
+ err = record__setup_sb_evlist(rec);
+ if (err)
+ goto out_child;
err = record__synthesize(rec, false);
if (err < 0)
@@ -1634,9 +1838,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
perf_evlist__start_workload(rec->evlist);
}
+ if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack))
+ goto out_child;
+
if (opts->initial_delay) {
- usleep(opts->initial_delay * USEC_PER_MSEC);
- evlist__enable(rec->evlist);
+ pr_info(EVLIST_DISABLED_MSG);
+ if (opts->initial_delay > 0) {
+ usleep(opts->initial_delay * USEC_PER_MSEC);
+ evlist__enable(rec->evlist);
+ pr_info(EVLIST_ENABLED_MSG);
+ }
}
trigger_ready(&auxtrace_snapshot_trigger);
@@ -1728,6 +1939,25 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
draining = true;
}
+ if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
+ switch (cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ pr_info(EVLIST_ENABLED_MSG);
+ break;
+ case EVLIST_CTL_CMD_DISABLE:
+ pr_info(EVLIST_DISABLED_MSG);
+ break;
+ case EVLIST_CTL_CMD_SNAPSHOT:
+ hit_auxtrace_snapshot_trigger(rec);
+ evlist__ctlfd_ack(rec->evlist);
+ break;
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ break;
+ }
+ }
+
/*
* When perf is starting the traced process, at the end events
* die with the process and we wait for that. Thus no need to
@@ -1761,6 +1991,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__synthesize_workload(rec, true);
out_child:
+ evlist__finalize_ctlfd(rec->evlist);
record__mmap_read_all(rec, true);
record__aio_mmap_read_sync(rec);
@@ -1827,11 +2058,15 @@ out_child:
}
out_delete_session:
+#ifdef HAVE_EVENTFD_SUPPORT
+ if (done_fd >= 0)
+ close(done_fd);
+#endif
zstd_fini(&session->zstd_data);
perf_session__delete(session);
if (!opts->no_bpf_event)
- perf_evlist__stop_sb_thread(sb_evlist);
+ perf_evlist__stop_sb_thread(rec->sb_evlist);
return status;
}
@@ -1923,103 +2158,6 @@ static int perf_record_config(const char *var, const char *value, void *cb)
return 0;
}
-struct clockid_map {
- const char *name;
- int clockid;
-};
-
-#define CLOCKID_MAP(n, c) \
- { .name = n, .clockid = (c), }
-
-#define CLOCKID_END { .name = NULL, }
-
-
-/*
- * Add the missing ones, we need to build on many distros...
- */
-#ifndef CLOCK_MONOTONIC_RAW
-#define CLOCK_MONOTONIC_RAW 4
-#endif
-#ifndef CLOCK_BOOTTIME
-#define CLOCK_BOOTTIME 7
-#endif
-#ifndef CLOCK_TAI
-#define CLOCK_TAI 11
-#endif
-
-static const struct clockid_map clockids[] = {
- /* available for all events, NMI safe */
- CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
- CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
-
- /* available for some events */
- CLOCKID_MAP("realtime", CLOCK_REALTIME),
- CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
- CLOCKID_MAP("tai", CLOCK_TAI),
-
- /* available for the lazy */
- CLOCKID_MAP("mono", CLOCK_MONOTONIC),
- CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
- CLOCKID_MAP("real", CLOCK_REALTIME),
- CLOCKID_MAP("boot", CLOCK_BOOTTIME),
-
- CLOCKID_END,
-};
-
-static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
-{
- struct timespec res;
-
- *res_ns = 0;
- if (!clock_getres(clk_id, &res))
- *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
- else
- pr_warning("WARNING: Failed to determine specified clock resolution.\n");
-
- return 0;
-}
-
-static int parse_clockid(const struct option *opt, const char *str, int unset)
-{
- struct record_opts *opts = (struct record_opts *)opt->value;
- const struct clockid_map *cm;
- const char *ostr = str;
-
- if (unset) {
- opts->use_clockid = 0;
- return 0;
- }
-
- /* no arg passed */
- if (!str)
- return 0;
-
- /* no setting it twice */
- if (opts->use_clockid)
- return -1;
-
- opts->use_clockid = true;
-
- /* if its a number, we're done */
- if (sscanf(str, "%d", &opts->clockid) == 1)
- return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
-
- /* allow a "CLOCK_" prefix to the name */
- if (!strncasecmp(str, "CLOCK_", 6))
- str += 6;
-
- for (cm = clockids; cm->name; cm++) {
- if (!strcasecmp(str, cm->name)) {
- opts->clockid = cm->clockid;
- return get_clockid_res(opts->clockid,
- &opts->clockid_res_ns);
- }
- }
-
- opts->use_clockid = false;
- ui__warning("unknown clockid %s, check man page\n", ostr);
- return -1;
-}
static int record__parse_affinity(const struct option *opt, const char *str, int unset)
{
@@ -2106,6 +2244,15 @@ out_free:
return ret;
}
+static int parse_control_option(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct record_opts *opts = opt->value;
+
+ return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
+}
+
static void switch_output_size_warn(struct record *rec)
{
u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
@@ -2142,10 +2289,19 @@ static int switch_output_setup(struct record *rec)
};
unsigned long val;
+ /*
+ * If we're using --switch-output-events, then we imply its
+ * --switch-output=signal, as we'll send a SIGUSR2 from the side band
+ * thread to its parent.
+ */
+ if (rec->switch_output_event_set)
+ goto do_signal;
+
if (!s->set)
return 0;
if (!strcmp(s->str, "signal")) {
+do_signal:
s->signal = true;
pr_debug("switch-output with SIGUSR2 signal\n");
goto enabled;
@@ -2232,6 +2388,9 @@ static struct record record = {
.default_per_cpu = true,
},
.mmap_flush = MMAP_FLUSH_DEFAULT,
+ .nr_threads_synthesize = 1,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1,
},
.tool = {
.sample = process_sample_event,
@@ -2289,7 +2448,7 @@ static struct option __record_options[] = {
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
"synthesize non-sample events at the end of output"),
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
- OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
+ OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
"Fail if the specified frequency can't be used"),
OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
@@ -2334,8 +2493,8 @@ static struct option __record_options[] = {
OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
"monitor event in cgroup name only",
parse_cgroups),
- OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
- "ms to wait before starting measurement after program start"),
+ OPT_INTEGER('D', "delay", &record.opts.initial_delay,
+ "ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
"user to profile"),
@@ -2374,8 +2533,9 @@ static struct option __record_options[] = {
"Record namespaces events"),
OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
"Record cgroup events"),
- OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
- "Record context switch events"),
+ OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
+ &record.opts.record_switch_events_set,
+ "Record context switch events"),
OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
"Configure all used events to run in kernel space.",
PARSE_OPT_EXCLUSIVE),
@@ -2402,6 +2562,9 @@ static struct option __record_options[] = {
&record.switch_output.set, "signal or size[BKMG] or time[smhd]",
"Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
"signal"),
+ OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
+ "switch output event selector. use 'perf list' to list available events",
+ parse_events_option_new_evlist),
OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
"Limit number of switch output generated files"),
OPT_BOOLEAN(0, "dry-run", &dry_run,
@@ -2421,6 +2584,20 @@ static struct option __record_options[] = {
#endif
OPT_CALLBACK(0, "max-size", &record.output_max_size,
"size", "Limit the maximum size of the output file", parse_output_max_size),
+ OPT_UINTEGER(0, "num-thread-synthesize",
+ &record.opts.nr_threads_synthesize,
+ "number of threads to run for event synthesis"),
+#ifdef HAVE_LIBPFM
+ OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
+ "libpfm4 event selector. use 'perf list' to list available events",
+ parse_libpfm_events_option),
+#endif
+ OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
+ "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
+ "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
+ "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
+ "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
+ parse_control_option),
OPT_END()
};
@@ -2492,12 +2669,14 @@ int cmd_record(int argc, const char **argv)
!perf_can_record_switch_events()) {
ui__error("kernel does not support recording context switch events\n");
parse_options_usage(record_usage, record_options, "switch-events", 0);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_opts;
}
if (switch_output_setup(rec)) {
parse_options_usage(record_usage, record_options, "switch-output", 0);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_opts;
}
if (rec->switch_output.time) {
@@ -2508,8 +2687,10 @@ int cmd_record(int argc, const char **argv)
if (rec->switch_output.num_files) {
rec->switch_output.filenames = calloc(sizeof(char *),
rec->switch_output.num_files);
- if (!rec->switch_output.filenames)
- return -EINVAL;
+ if (!rec->switch_output.filenames) {
+ err = -EINVAL;
+ goto out_opts;
+ }
}
/*
@@ -2525,7 +2706,8 @@ int cmd_record(int argc, const char **argv)
rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
if (!rec->affinity_mask.bits) {
pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out_opts;
}
pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
}
@@ -2582,7 +2764,7 @@ int cmd_record(int argc, const char **argv)
record.opts.tail_synthesize = true;
if (rec->evlist->core.nr_entries == 0 &&
- __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
+ __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out;
}
@@ -2626,6 +2808,14 @@ int cmd_record(int argc, const char **argv)
if (rec->opts.full_auxtrace)
rec->buildid_all = true;
+ if (rec->opts.text_poke) {
+ err = record__config_text_poke(rec->evlist);
+ if (err) {
+ pr_err("record__config_text_poke failed, error %d\n", err);
+ goto out;
+ }
+ }
+
if (record_opts__config(&rec->opts)) {
err = -EINVAL;
goto out;
@@ -2648,6 +2838,8 @@ out:
evlist__delete(rec->evlist);
symbol__exit();
auxtrace_record__free(rec->itr);
+out_opts:
+ evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
return err;
}
@@ -2655,12 +2847,7 @@ static void snapshot_sig_handler(int sig __maybe_unused)
{
struct record *rec = &record;
- if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
- trigger_hit(&auxtrace_snapshot_trigger);
- auxtrace_record__snapshot_started = 1;
- if (auxtrace_record__snapshot_start(record.itr))
- trigger_error(&auxtrace_snapshot_trigger);
- }
+ hit_auxtrace_snapshot_trigger(rec);
if (switch_output_signal(rec))
trigger_hit(&switch_output_trigger);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 26d8fc27e427..3c74c9c0f3c3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -47,7 +47,6 @@
#include "util/time-utils.h"
#include "util/auxtrace.h"
#include "util/units.h"
-#include "util/branch.h"
#include "util/util.h" // perf_tip()
#include "ui/ui.h"
#include "ui/progress.h"
@@ -84,6 +83,7 @@ struct report {
bool header_only;
bool nonany_branch_mode;
bool group_set;
+ bool stitch_lbr;
int max_stack;
struct perf_read_values show_threads_values;
struct annotation_options annotation_opts;
@@ -267,6 +267,9 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}
+ if (rep->stitch_lbr)
+ al.thread->lbr_stitch_enable = true;
+
if (symbol_conf.hide_unresolved && al.sym == NULL)
goto out_put;
@@ -317,7 +320,7 @@ static int process_read_event(struct perf_tool *tool,
struct report *rep = container_of(tool, struct report, tool);
if (rep->show_threads) {
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
int err = perf_read_values_add_value(&rep->show_threads_values,
event->read.pid, event->read.tid,
evsel->idx,
@@ -335,16 +338,18 @@ static int process_read_event(struct perf_tool *tool,
static int report__setup_sample_type(struct report *rep)
{
struct perf_session *session = rep->session;
- u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+ u64 sample_type = evlist__combined_sample_type(session->evlist);
bool is_pipe = perf_data__is_pipe(session->data);
if (session->itrace_synth_opts->callchain ||
+ session->itrace_synth_opts->add_callchain ||
(!is_pipe &&
perf_header__has_feat(&session->header, HEADER_AUXTRACE) &&
!session->itrace_synth_opts->set))
sample_type |= PERF_SAMPLE_CALLCHAIN;
- if (session->itrace_synth_opts->last_branch)
+ if (session->itrace_synth_opts->last_branch ||
+ session->itrace_synth_opts->add_last_branch)
sample_type |= PERF_SAMPLE_BRANCH_STACK;
if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
@@ -396,20 +401,16 @@ static int report__setup_sample_type(struct report *rep)
}
}
- if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
- if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER)) {
- callchain_param.record_mode = CALLCHAIN_DWARF;
- dwarf_callchain_users = true;
- } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
- callchain_param.record_mode = CALLCHAIN_LBR;
- else
- callchain_param.record_mode = CALLCHAIN_FP;
+ callchain_param_setup(sample_type);
+
+ if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
+ ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+ "Please apply --call-graph lbr when recording.\n");
+ rep->stitch_lbr = false;
}
/* ??? handle more cases than just ANY? */
- if (!(perf_evlist__combined_branch_type(session->evlist) &
- PERF_SAMPLE_BRANCH_ANY))
+ if (!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY))
rep->nonany_branch_mode = true;
#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT)
@@ -447,10 +448,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
nr_events = hists->stats.total_non_filtered_period;
}
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
struct evsel *pos;
- perf_evsel__group_desc(evsel, buf, size);
+ evsel__group_desc(evsel, buf, size);
evname = buf;
for_each_group_member(pos, evsel) {
@@ -476,8 +477,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
if (rep->time_str)
ret += fprintf(fp, " (time slices: %s)", rep->time_str);
- if (symbol_conf.show_ref_callgraph &&
- strstr(evname, "call-graph=no")) {
+ if (symbol_conf.show_ref_callgraph && evname && strstr(evname, "call-graph=no")) {
ret += fprintf(fp, ", show reference callgraph");
}
@@ -525,10 +525,9 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- const char *evname = perf_evsel__name(pos);
+ const char *evname = evsel__name(pos);
- if (symbol_conf.event_group &&
- !perf_evsel__is_group_leader(pos))
+ if (symbol_conf.event_group && !evsel__is_group_leader(pos))
continue;
hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
@@ -670,8 +669,7 @@ static int report__collapse_hists(struct report *rep)
break;
/* Non-group events are considered as leader */
- if (symbol_conf.event_group &&
- !perf_evsel__is_group_leader(pos)) {
+ if (symbol_conf.event_group && !evsel__is_group_leader(pos)) {
struct hists *leader_hists = evsel__hists(pos->leader);
hists__match(leader_hists, hists);
@@ -706,8 +704,7 @@ static void report__output_resort(struct report *rep)
ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
evlist__for_each_entry(rep->session->evlist, pos) {
- perf_evsel__output_resort_cb(pos, &prog,
- hists__resort_cb, rep);
+ evsel__output_resort_cb(pos, &prog, hists__resort_cb, rep);
}
ui_progress__finish();
@@ -1080,6 +1077,26 @@ parse_percent_limit(const struct option *opt, const char *str,
return 0;
}
+static int process_attr(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct evlist **pevlist)
+{
+ u64 sample_type;
+ int err;
+
+ err = perf_event__process_attr(tool, event, pevlist);
+ if (err)
+ return err;
+
+ /*
+ * Check if we need to enable callchains based
+ * on events sample_type.
+ */
+ sample_type = evlist__combined_sample_type(*pevlist);
+ callchain_param_setup(sample_type);
+ return 0;
+}
+
int cmd_report(int argc, const char **argv)
{
struct perf_session *session;
@@ -1110,7 +1127,7 @@ int cmd_report(int argc, const char **argv)
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.read = process_read_event,
- .attr = perf_event__process_attr,
+ .attr = process_attr,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
.id_index = perf_event__process_id_index,
@@ -1257,6 +1274,8 @@ int cmd_report(int argc, const char **argv)
"Show full source file name path for source lines"),
OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
"Show callgraph from reference event"),
+ OPT_BOOLEAN(0, "stitch-lbr", &report.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPT_INTEGER(0, "socket-filter", &report.socket_filter,
"only show processor socket that match with this filter"),
OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
@@ -1313,6 +1332,9 @@ int cmd_report(int argc, const char **argv)
if (report.mmaps_mode)
report.tasks_mode = true;
+ if (dump_trace)
+ report.tool.ordered_events = false;
+
if (quiet)
perf_quiet_option();
@@ -1332,7 +1354,7 @@ int cmd_report(int argc, const char **argv)
if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
callchain_param.order = ORDER_CALLER;
- if (itrace_synth_opts.callchain &&
+ if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) &&
(int)itrace_synth_opts.callchain_sz > report.max_stack)
report.max_stack = itrace_synth_opts.callchain_sz;
@@ -1369,7 +1391,7 @@ repeat:
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
- if (perf_evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER)
+ if (evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER)
has_br_stack = false;
setup_forced_leader(&report, session->evlist);
@@ -1380,7 +1402,7 @@ repeat:
goto error;
}
- if (itrace_synth_opts.last_branch)
+ if (itrace_synth_opts.last_branch || itrace_synth_opts.add_last_branch)
has_br_stack = true;
if (has_br_stack && branch_call_mode)
@@ -1400,7 +1422,7 @@ repeat:
}
if (branch_call_mode) {
callchain_param.key = CCKEY_ADDRESS;
- callchain_param.branch_callstack = 1;
+ callchain_param.branch_callstack = true;
symbol_conf.use_callchain = true;
callchain_register_param(&callchain_param);
if (sort_order == NULL)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 82fcc2c15fe4..0e16f9d5a947 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -130,7 +130,8 @@ struct work_atoms {
struct thread *thread;
struct rb_node node;
u64 max_lat;
- u64 max_lat_at;
+ u64 max_lat_start;
+ u64 max_lat_end;
u64 total_lat;
u64 nb_atoms;
u64 total_runtime;
@@ -811,8 +812,8 @@ replay_wakeup_event(struct perf_sched *sched,
struct evsel *evsel, struct perf_sample *sample,
struct machine *machine __maybe_unused)
{
- const char *comm = perf_evsel__strval(evsel, sample, "comm");
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const char *comm = evsel__strval(evsel, sample, "comm");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
struct task_desc *waker, *wakee;
if (verbose > 0) {
@@ -833,11 +834,11 @@ static int replay_switch_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine __maybe_unused)
{
- const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"),
- *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
- const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
- const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+ const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"),
+ *next_comm = evsel__strval(evsel, sample, "next_comm");
+ const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
+ next_pid = evsel__intval(evsel, sample, "next_pid");
+ const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
struct task_desc *prev, __maybe_unused *next;
u64 timestamp0, timestamp = sample->time;
int cpu = sample->cpu;
@@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
atoms->total_lat += delta;
if (delta > atoms->max_lat) {
atoms->max_lat = delta;
- atoms->max_lat_at = timestamp;
+ atoms->max_lat_start = atom->wake_up_time;
+ atoms->max_lat_end = timestamp;
}
atoms->nb_atoms++;
}
@@ -1106,9 +1108,9 @@ static int latency_switch_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
- const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
- const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+ const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
+ next_pid = evsel__intval(evsel, sample, "next_pid");
+ const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
struct work_atoms *out_events, *in_events;
struct thread *sched_out, *sched_in;
u64 timestamp0, timestamp = sample->time;
@@ -1176,8 +1178,8 @@ static int latency_runtime_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
- const u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
+ const u64 runtime = evsel__intval(evsel, sample, "runtime");
struct thread *thread = machine__findnew_thread(machine, -1, pid);
struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
u64 timestamp = sample->time;
@@ -1211,7 +1213,7 @@ static int latency_wakeup_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
struct work_atoms *atoms;
struct work_atom *atom;
struct thread *wakee;
@@ -1272,7 +1274,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
u64 timestamp = sample->time;
struct work_atoms *atoms;
struct work_atom *atom;
@@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
int i;
int ret;
u64 avg;
- char max_lat_at[32];
+ char max_lat_start[32], max_lat_end[32];
if (!work_list->nb_atoms)
return;
@@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
printf(" ");
avg = work_list->total_lat / work_list->nb_atoms;
- timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
+ timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
+ timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
- printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
+ printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
(double)work_list->total_runtime / NSEC_PER_MSEC,
work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
(double)work_list->max_lat / NSEC_PER_MSEC,
- max_lat_at);
+ max_lat_start, max_lat_end);
}
static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -1526,7 +1529,7 @@ map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid
static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
struct perf_sample *sample, struct machine *machine)
{
- const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
struct thread *sched_in;
struct thread_runtime *tr;
int new_shortname;
@@ -1670,8 +1673,8 @@ static int process_sched_switch_event(struct perf_tool *tool,
{
struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
int this_cpu = sample->cpu, err = 0;
- u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
+ next_pid = evsel__intval(evsel, sample, "next_pid");
if (sched->curr_pid[this_cpu] != (u32)-1) {
/*
@@ -1848,7 +1851,7 @@ static inline void print_sched_time(unsigned long long nsecs, int width)
* returns runtime data for event, allocating memory for it the
* first time it is used.
*/
-static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel)
+static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel)
{
struct evsel_runtime *r = evsel->priv;
@@ -1863,10 +1866,9 @@ static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel)
/*
* save last time event was seen per cpu
*/
-static void perf_evsel__save_time(struct evsel *evsel,
- u64 timestamp, u32 cpu)
+static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu)
{
- struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+ struct evsel_runtime *r = evsel__get_runtime(evsel);
if (r == NULL)
return;
@@ -1890,9 +1892,9 @@ static void perf_evsel__save_time(struct evsel *evsel,
}
/* returns last time this event was seen on the given cpu */
-static u64 perf_evsel__get_time(struct evsel *evsel, u32 cpu)
+static u64 evsel__get_time(struct evsel *evsel, u32 cpu)
{
- struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+ struct evsel_runtime *r = evsel__get_runtime(evsel);
if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu))
return 0;
@@ -2004,8 +2006,8 @@ static void timehist_print_sample(struct perf_sched *sched,
u64 t, int state)
{
struct thread_runtime *tr = thread__priv(thread);
- const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
- const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ const char *next_comm = evsel__strval(evsel, sample, "next_comm");
+ const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
u32 max_cpus = sched->max_cpu + 1;
char tstr[64];
char nstr[30];
@@ -2136,8 +2138,8 @@ static bool is_idle_sample(struct perf_sample *sample,
struct evsel *evsel)
{
/* pid 0 == swapper == idle task */
- if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0)
- return perf_evsel__intval(evsel, sample, "prev_pid") == 0;
+ if (strcmp(evsel__name(evsel), "sched:sched_switch") == 0)
+ return evsel__intval(evsel, sample, "prev_pid") == 0;
return sample->pid == 0;
}
@@ -2334,7 +2336,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched,
itr->last_thread = thread;
/* copy task callchain when entering to idle */
- if (perf_evsel__intval(evsel, sample, "next_pid") == 0)
+ if (evsel__intval(evsel, sample, "next_pid") == 0)
save_idle_callchain(sched, itr, sample);
}
}
@@ -2355,10 +2357,10 @@ static bool timehist_skip_sample(struct perf_sched *sched,
}
if (sched->idle_hist) {
- if (strcmp(perf_evsel__name(evsel), "sched:sched_switch"))
+ if (strcmp(evsel__name(evsel), "sched:sched_switch"))
rc = true;
- else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 &&
- perf_evsel__intval(evsel, sample, "next_pid") != 0)
+ else if (evsel__intval(evsel, sample, "prev_pid") != 0 &&
+ evsel__intval(evsel, sample, "next_pid") != 0)
rc = true;
}
@@ -2399,6 +2401,15 @@ static void timehist_print_wakeup_event(struct perf_sched *sched,
printf("\n");
}
+static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
static int timehist_sched_wakeup_event(struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct evsel *evsel,
@@ -2409,7 +2420,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool,
struct thread *thread;
struct thread_runtime *tr = NULL;
/* want pid of awakened task not pid in sample */
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
thread = machine__findnew_thread(machine, 0, pid);
if (thread == NULL)
@@ -2445,8 +2456,8 @@ static void timehist_print_migration_event(struct perf_sched *sched,
return;
max_cpus = sched->max_cpu + 1;
- ocpu = perf_evsel__intval(evsel, sample, "orig_cpu");
- dcpu = perf_evsel__intval(evsel, sample, "dest_cpu");
+ ocpu = evsel__intval(evsel, sample, "orig_cpu");
+ dcpu = evsel__intval(evsel, sample, "dest_cpu");
thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL)
@@ -2493,7 +2504,7 @@ static int timehist_migrate_task_event(struct perf_tool *tool,
struct thread *thread;
struct thread_runtime *tr = NULL;
/* want pid of migrated task not pid in sample */
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
thread = machine__findnew_thread(machine, 0, pid);
if (thread == NULL)
@@ -2524,8 +2535,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
struct thread_runtime *tr = NULL;
u64 tprev, t = sample->time;
int rc = 0;
- int state = perf_evsel__intval(evsel, sample, "prev_state");
-
+ int state = evsel__intval(evsel, sample, "prev_state");
if (machine__resolve(machine, &al, sample) < 0) {
pr_err("problem processing %d event. skipping it\n",
@@ -2549,7 +2559,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
goto out;
}
- tprev = perf_evsel__get_time(evsel, sample->cpu);
+ tprev = evsel__get_time(evsel, sample->cpu);
/*
* If start time given:
@@ -2577,7 +2587,8 @@ static int timehist_sched_change_event(struct perf_tool *tool,
}
if (!sched->idle_hist || thread->tid == 0) {
- timehist_update_runtime_stats(tr, t, tprev);
+ if (!cpu_list || test_bit(sample->cpu, cpu_bitmap))
+ timehist_update_runtime_stats(tr, t, tprev);
if (sched->idle_hist) {
struct idle_thread_runtime *itr = (void *)tr;
@@ -2632,7 +2643,7 @@ out:
tr->ready_to_run = 0;
}
- perf_evsel__save_time(evsel, sample->time, sample->cpu);
+ evsel__save_time(evsel, sample->time, sample->cpu);
return rc;
}
@@ -2850,6 +2861,9 @@ static void timehist_print_summary(struct perf_sched *sched,
printf("\nIdle stats:\n");
for (i = 0; i < idle_max_cpu; ++i) {
+ if (cpu_list && !test_bit(i, cpu_bitmap))
+ continue;
+
t = idle_threads[i];
if (!t)
continue;
@@ -2942,7 +2956,7 @@ static int timehist_check_attr(struct perf_sched *sched,
struct evsel_runtime *er;
list_for_each_entry(evsel, &evlist->core.entries, core.node) {
- er = perf_evsel__get_runtime(evsel);
+ er = evsel__get_runtime(evsel);
if (er == NULL) {
pr_err("Failed to allocate memory for evsel runtime data\n");
return -1;
@@ -2960,9 +2974,10 @@ static int timehist_check_attr(struct perf_sched *sched,
static int perf_sched__timehist(struct perf_sched *sched)
{
- const struct evsel_str_handler handlers[] = {
+ struct evsel_str_handler handlers[] = {
{ "sched:sched_switch", timehist_sched_switch_event, },
{ "sched:sched_wakeup", timehist_sched_wakeup_event, },
+ { "sched:sched_waking", timehist_sched_wakeup_event, },
{ "sched:sched_wakeup_new", timehist_sched_wakeup_event, },
};
const struct evsel_str_handler migrate_handlers[] = {
@@ -3020,6 +3035,11 @@ static int perf_sched__timehist(struct perf_sched *sched)
setup_pager();
+ /* prefer sched_waking if it is captured */
+ if (perf_evlist__find_tracepoint_by_name(session->evlist,
+ "sched:sched_waking"))
+ handlers[1].handler = timehist_sched_wakeup_ignore;
+
/* setup per-evsel handlers */
if (perf_session__set_tracepoints_handlers(session, handlers))
goto out;
@@ -3120,7 +3140,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d
list_splice(&data->work_list, &this->work_list);
if (this->max_lat < data->max_lat) {
this->max_lat = data->max_lat;
- this->max_lat_at = data->max_lat_at;
+ this->max_lat_start = data->max_lat_start;
+ this->max_lat_end = data->max_lat_end;
}
zfree(&data);
return;
@@ -3159,9 +3180,9 @@ static int perf_sched__lat(struct perf_sched *sched)
perf_sched__merge_lat(sched);
perf_sched__sort_lat(sched);
- printf("\n -----------------------------------------------------------------------------------------------------------------\n");
- printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
- printf(" -----------------------------------------------------------------------------------------------------------------\n");
+ printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
+ printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
+ printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
next = rb_first_cached(&sched->sorted_atom_root);
@@ -3332,12 +3353,16 @@ static int __cmd_record(int argc, const char **argv)
"-e", "sched:sched_stat_iowait",
"-e", "sched:sched_stat_runtime",
"-e", "sched:sched_process_fork",
- "-e", "sched:sched_wakeup",
"-e", "sched:sched_wakeup_new",
"-e", "sched:sched_migrate_task",
};
+ struct tep_event *waking_event;
- rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+ /*
+ * +2 for either "-e", "sched:sched_wakeup" or
+ * "-e", "sched:sched_waking"
+ */
+ rec_argc = ARRAY_SIZE(record_args) + 2 + argc - 1;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (rec_argv == NULL)
@@ -3346,6 +3371,13 @@ static int __cmd_record(int argc, const char **argv)
for (i = 0; i < ARRAY_SIZE(record_args); i++)
rec_argv[i] = strdup(record_args[i]);
+ rec_argv[i++] = "-e";
+ waking_event = trace_event__tp_format("sched", "sched_waking");
+ if (!IS_ERR(waking_event))
+ rec_argv[i++] = strdup("sched:sched_waking");
+ else
+ rec_argv[i++] = strdup("sched:sched_wakeup");
+
for (j = 1; j < (unsigned int)argc; j++, i++)
rec_argv[i] = argv[j];
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 1f57a7ecdf3d..48588ccf902e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -82,38 +82,64 @@ static bool native_arch;
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
enum perf_output_field {
- PERF_OUTPUT_COMM = 1U << 0,
- PERF_OUTPUT_TID = 1U << 1,
- PERF_OUTPUT_PID = 1U << 2,
- PERF_OUTPUT_TIME = 1U << 3,
- PERF_OUTPUT_CPU = 1U << 4,
- PERF_OUTPUT_EVNAME = 1U << 5,
- PERF_OUTPUT_TRACE = 1U << 6,
- PERF_OUTPUT_IP = 1U << 7,
- PERF_OUTPUT_SYM = 1U << 8,
- PERF_OUTPUT_DSO = 1U << 9,
- PERF_OUTPUT_ADDR = 1U << 10,
- PERF_OUTPUT_SYMOFFSET = 1U << 11,
- PERF_OUTPUT_SRCLINE = 1U << 12,
- PERF_OUTPUT_PERIOD = 1U << 13,
- PERF_OUTPUT_IREGS = 1U << 14,
- PERF_OUTPUT_BRSTACK = 1U << 15,
- PERF_OUTPUT_BRSTACKSYM = 1U << 16,
- PERF_OUTPUT_DATA_SRC = 1U << 17,
- PERF_OUTPUT_WEIGHT = 1U << 18,
- PERF_OUTPUT_BPF_OUTPUT = 1U << 19,
- PERF_OUTPUT_CALLINDENT = 1U << 20,
- PERF_OUTPUT_INSN = 1U << 21,
- PERF_OUTPUT_INSNLEN = 1U << 22,
- PERF_OUTPUT_BRSTACKINSN = 1U << 23,
- PERF_OUTPUT_BRSTACKOFF = 1U << 24,
- PERF_OUTPUT_SYNTH = 1U << 25,
- PERF_OUTPUT_PHYS_ADDR = 1U << 26,
- PERF_OUTPUT_UREGS = 1U << 27,
- PERF_OUTPUT_METRIC = 1U << 28,
- PERF_OUTPUT_MISC = 1U << 29,
- PERF_OUTPUT_SRCCODE = 1U << 30,
- PERF_OUTPUT_IPC = 1U << 31,
+ PERF_OUTPUT_COMM = 1ULL << 0,
+ PERF_OUTPUT_TID = 1ULL << 1,
+ PERF_OUTPUT_PID = 1ULL << 2,
+ PERF_OUTPUT_TIME = 1ULL << 3,
+ PERF_OUTPUT_CPU = 1ULL << 4,
+ PERF_OUTPUT_EVNAME = 1ULL << 5,
+ PERF_OUTPUT_TRACE = 1ULL << 6,
+ PERF_OUTPUT_IP = 1ULL << 7,
+ PERF_OUTPUT_SYM = 1ULL << 8,
+ PERF_OUTPUT_DSO = 1ULL << 9,
+ PERF_OUTPUT_ADDR = 1ULL << 10,
+ PERF_OUTPUT_SYMOFFSET = 1ULL << 11,
+ PERF_OUTPUT_SRCLINE = 1ULL << 12,
+ PERF_OUTPUT_PERIOD = 1ULL << 13,
+ PERF_OUTPUT_IREGS = 1ULL << 14,
+ PERF_OUTPUT_BRSTACK = 1ULL << 15,
+ PERF_OUTPUT_BRSTACKSYM = 1ULL << 16,
+ PERF_OUTPUT_DATA_SRC = 1ULL << 17,
+ PERF_OUTPUT_WEIGHT = 1ULL << 18,
+ PERF_OUTPUT_BPF_OUTPUT = 1ULL << 19,
+ PERF_OUTPUT_CALLINDENT = 1ULL << 20,
+ PERF_OUTPUT_INSN = 1ULL << 21,
+ PERF_OUTPUT_INSNLEN = 1ULL << 22,
+ PERF_OUTPUT_BRSTACKINSN = 1ULL << 23,
+ PERF_OUTPUT_BRSTACKOFF = 1ULL << 24,
+ PERF_OUTPUT_SYNTH = 1ULL << 25,
+ PERF_OUTPUT_PHYS_ADDR = 1ULL << 26,
+ PERF_OUTPUT_UREGS = 1ULL << 27,
+ PERF_OUTPUT_METRIC = 1ULL << 28,
+ PERF_OUTPUT_MISC = 1ULL << 29,
+ PERF_OUTPUT_SRCCODE = 1ULL << 30,
+ PERF_OUTPUT_IPC = 1ULL << 31,
+ PERF_OUTPUT_TOD = 1ULL << 32,
+};
+
+struct perf_script {
+ struct perf_tool tool;
+ struct perf_session *session;
+ bool show_task_events;
+ bool show_mmap_events;
+ bool show_switch_events;
+ bool show_namespace_events;
+ bool show_lost_events;
+ bool show_round_events;
+ bool show_bpf_events;
+ bool show_cgroup_events;
+ bool show_text_poke_events;
+ bool allocated;
+ bool per_event_dump;
+ bool stitch_lbr;
+ struct evswitch evswitch;
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
+ int name_width;
+ const char *time_str;
+ struct perf_time_interval *ptime_range;
+ int range_size;
+ int range_num;
};
struct output_option {
@@ -152,6 +178,7 @@ struct output_option {
{.str = "misc", .field = PERF_OUTPUT_MISC},
{.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
{.str = "ipc", .field = PERF_OUTPUT_IPC},
+ {.str = "tod", .field = PERF_OUTPUT_TOD},
};
enum {
@@ -167,6 +194,7 @@ static struct {
u64 fields;
u64 invalid_fields;
u64 user_set_fields;
+ u64 user_unset_fields;
} output[OUTPUT_TYPE_MAX] = {
[PERF_TYPE_HARDWARE] = {
@@ -273,7 +301,7 @@ static struct evsel_script *perf_evsel_script__new(struct evsel *evsel,
struct evsel_script *es = zalloc(sizeof(*es));
if (es != NULL) {
- if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
+ if (asprintf(&es->filename, "%s.%s.dump", data->file.path, evsel__name(evsel)) < 0)
goto out_free;
es->fp = fopen(es->filename, "w");
if (es->fp == NULL)
@@ -351,10 +379,8 @@ static const char *output_field2str(enum perf_output_field field)
#define PRINT_FIELD(x) (output[output_type(attr->type)].fields & PERF_OUTPUT_##x)
-static int perf_evsel__do_check_stype(struct evsel *evsel,
- u64 sample_type, const char *sample_msg,
- enum perf_output_field field,
- bool allow_user_set)
+static int evsel__do_check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg,
+ enum perf_output_field field, bool allow_user_set)
{
struct perf_event_attr *attr = &evsel->core.attr;
int type = output_type(attr->type);
@@ -366,7 +392,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel,
if (output[type].user_set_fields & field) {
if (allow_user_set)
return 0;
- evname = perf_evsel__name(evsel);
+ evname = evsel__name(evsel);
pr_err("Samples for '%s' event do not have %s attribute set. "
"Cannot print '%s' field.\n",
evname, sample_msg, output_field2str(field));
@@ -375,7 +401,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel,
/* user did not ask for it explicitly so remove from the default list */
output[type].fields &= ~field;
- evname = perf_evsel__name(evsel);
+ evname = evsel__name(evsel);
pr_debug("Samples for '%s' event do not have %s attribute set. "
"Skipping '%s' field.\n",
evname, sample_msg, output_field2str(field));
@@ -383,16 +409,13 @@ static int perf_evsel__do_check_stype(struct evsel *evsel,
return 0;
}
-static int perf_evsel__check_stype(struct evsel *evsel,
- u64 sample_type, const char *sample_msg,
- enum perf_output_field field)
+static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg,
+ enum perf_output_field field)
{
- return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
- false);
+ return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false);
}
-static int perf_evsel__check_attr(struct evsel *evsel,
- struct perf_session *session)
+static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
{
struct perf_event_attr *attr = &evsel->core.attr;
bool allow_user_set;
@@ -404,32 +427,28 @@ static int perf_evsel__check_attr(struct evsel *evsel,
HEADER_AUXTRACE);
if (PRINT_FIELD(TRACE) &&
- !perf_session__has_traces(session, "record -R"))
+ !perf_session__has_traces(session, "record -R"))
return -EINVAL;
if (PRINT_FIELD(IP)) {
- if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP",
- PERF_OUTPUT_IP))
+ if (evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", PERF_OUTPUT_IP))
return -EINVAL;
}
if (PRINT_FIELD(ADDR) &&
- perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
- PERF_OUTPUT_ADDR, allow_user_set))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", PERF_OUTPUT_ADDR, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(DATA_SRC) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC",
- PERF_OUTPUT_DATA_SRC))
+ evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC))
return -EINVAL;
if (PRINT_FIELD(WEIGHT) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT",
- PERF_OUTPUT_WEIGHT))
+ evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", PERF_OUTPUT_WEIGHT))
return -EINVAL;
if (PRINT_FIELD(SYM) &&
- !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
+ !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
pr_err("Display of symbols requested but neither sample IP nor "
"sample address\navailable. Hence, no addresses to convert "
"to symbols.\n");
@@ -441,7 +460,7 @@ static int perf_evsel__check_attr(struct evsel *evsel,
return -EINVAL;
}
if (PRINT_FIELD(DSO) &&
- !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
+ !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
pr_err("Display of DSO requested but no address to convert.\n");
return -EINVAL;
}
@@ -451,40 +470,33 @@ static int perf_evsel__check_attr(struct evsel *evsel,
return -EINVAL;
}
if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set &&
- !(perf_evlist__combined_branch_type(session->evlist) &
- PERF_SAMPLE_BRANCH_ANY)) {
+ !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) {
pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
"Hint: run 'perf record -b ...'\n");
return -EINVAL;
}
if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
- PERF_OUTPUT_TID|PERF_OUTPUT_PID))
+ evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID))
return -EINVAL;
if (PRINT_FIELD(TIME) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME",
- PERF_OUTPUT_TIME))
+ evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", PERF_OUTPUT_TIME))
return -EINVAL;
if (PRINT_FIELD(CPU) &&
- perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
- PERF_OUTPUT_CPU, allow_user_set))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU", PERF_OUTPUT_CPU, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(IREGS) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
- PERF_OUTPUT_IREGS))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(UREGS) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS",
- PERF_OUTPUT_UREGS))
+ evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS", PERF_OUTPUT_UREGS))
return -EINVAL;
if (PRINT_FIELD(PHYS_ADDR) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
- PERF_OUTPUT_PHYS_ADDR))
+ evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR))
return -EINVAL;
return 0;
@@ -517,6 +529,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
*/
static int perf_session__check_output_opt(struct perf_session *session)
{
+ bool tod = false;
unsigned int j;
struct evsel *evsel;
@@ -536,13 +549,14 @@ static int perf_session__check_output_opt(struct perf_session *session)
}
if (evsel && output[j].fields &&
- perf_evsel__check_attr(evsel, session))
+ evsel__check_attr(evsel, session))
return -1;
if (evsel == NULL)
continue;
set_print_ip_opts(&evsel->core.attr);
+ tod |= output[j].fields & PERF_OUTPUT_TOD;
}
if (!no_callchain) {
@@ -583,13 +597,17 @@ static int perf_session__check_output_opt(struct perf_session *session)
}
}
+ if (tod && !session->header.env.clock.enabled) {
+ pr_err("Can't provide 'tod' time, missing clock data. "
+ "Please record with -k/--clockid option.\n");
+ return -1;
+ }
out:
return 0;
}
static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
- FILE *fp
-)
+ FILE *fp)
{
unsigned i = 0, r;
int printed = 0;
@@ -604,11 +622,59 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
}
- fprintf(fp, "\n");
-
return printed;
}
+#define DEFAULT_TOD_FMT "%F %H:%M:%S"
+
+static char*
+tod_scnprintf(struct perf_script *script, char *buf, int buflen,
+ u64 timestamp)
+{
+ u64 tod_ns, clockid_ns;
+ struct perf_env *env;
+ unsigned long nsec;
+ struct tm ltime;
+ char date[64];
+ time_t sec;
+
+ buf[0] = '\0';
+ if (buflen < 64 || !script)
+ return buf;
+
+ env = &script->session->header.env;
+ if (!env->clock.enabled) {
+ scnprintf(buf, buflen, "disabled");
+ return buf;
+ }
+
+ clockid_ns = env->clock.clockid_ns;
+ tod_ns = env->clock.tod_ns;
+
+ if (timestamp > clockid_ns)
+ tod_ns += timestamp - clockid_ns;
+ else
+ tod_ns -= clockid_ns - timestamp;
+
+ sec = (time_t) (tod_ns / NSEC_PER_SEC);
+ nsec = tod_ns - sec * NSEC_PER_SEC;
+
+ if (localtime_r(&sec, &ltime) == NULL) {
+ scnprintf(buf, buflen, "failed");
+ } else {
+ strftime(date, sizeof(date), DEFAULT_TOD_FMT, &ltime);
+
+ if (symbol_conf.nanosecs) {
+ snprintf(buf, buflen, "%s.%09lu", date, nsec);
+ } else {
+ snprintf(buf, buflen, "%s.%06lu",
+ date, nsec / NSEC_PER_USEC);
+ }
+ }
+
+ return buf;
+}
+
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
@@ -623,7 +689,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample,
attr->sample_regs_user, fp);
}
-static int perf_sample__fprintf_start(struct perf_sample *sample,
+static int perf_sample__fprintf_start(struct perf_script *script,
+ struct perf_sample *sample,
struct thread *thread,
struct evsel *evsel,
u32 type, FILE *fp)
@@ -632,14 +699,17 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
unsigned long secs;
unsigned long long nsecs;
int printed = 0;
+ char tstr[128];
if (PRINT_FIELD(COMM)) {
+ const char *comm = thread ? thread__comm_str(thread) : ":-1";
+
if (latency_format)
- printed += fprintf(fp, "%8.8s ", thread__comm_str(thread));
+ printed += fprintf(fp, "%8.8s ", comm);
else if (PRINT_FIELD(IP) && evsel__has_callchain(evsel) && symbol_conf.use_callchain)
- printed += fprintf(fp, "%s ", thread__comm_str(thread));
+ printed += fprintf(fp, "%s ", comm);
else
- printed += fprintf(fp, "%16s ", thread__comm_str(thread));
+ printed += fprintf(fp, "%16s ", comm);
}
if (PRINT_FIELD(PID) && PRINT_FIELD(TID))
@@ -700,6 +770,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
printed += ret;
}
+ if (PRINT_FIELD(TOD)) {
+ tod_scnprintf(script, tstr, sizeof(tstr), sample->time);
+ printed += fprintf(fp, "%s ", tstr);
+ }
+
if (PRINT_FIELD(TIME)) {
u64 t = sample->time;
if (reltime) {
@@ -1684,36 +1759,13 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
return 0;
}
-struct perf_script {
- struct perf_tool tool;
- struct perf_session *session;
- bool show_task_events;
- bool show_mmap_events;
- bool show_switch_events;
- bool show_namespace_events;
- bool show_lost_events;
- bool show_round_events;
- bool show_bpf_events;
- bool show_cgroup_events;
- bool allocated;
- bool per_event_dump;
- struct evswitch evswitch;
- struct perf_cpu_map *cpus;
- struct perf_thread_map *threads;
- int name_width;
- const char *time_str;
- struct perf_time_interval *ptime_range;
- int range_size;
- int range_num;
-};
-
-static int perf_evlist__max_name_len(struct evlist *evlist)
+static int evlist__max_name_len(struct evlist *evlist)
{
struct evsel *evsel;
int max = 0;
evlist__for_each_entry(evlist, evsel) {
- int len = strlen(perf_evsel__name(evsel));
+ int len = strlen(evsel__name(evsel));
max = MAX(len, max);
}
@@ -1754,7 +1806,7 @@ static void script_print_metric(struct perf_stat_config *config __maybe_unused,
if (!fmt)
return;
- perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel,
PERF_RECORD_SAMPLE, mctx->fp);
fputs("\tmetric: ", mctx->fp);
if (color)
@@ -1769,7 +1821,7 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
{
struct metric_ctx *mctx = ctx;
- perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel,
PERF_RECORD_SAMPLE, mctx->fp);
fputs("\tmetric: ", mctx->fp);
}
@@ -1880,17 +1932,17 @@ static void process_event(struct perf_script *script,
++es->samples;
- perf_sample__fprintf_start(sample, thread, evsel,
+ perf_sample__fprintf_start(script, sample, thread, evsel,
PERF_RECORD_SAMPLE, fp);
if (PRINT_FIELD(PERIOD))
fprintf(fp, "%10" PRIu64 " ", sample->period);
if (PRINT_FIELD(EVNAME)) {
- const char *evname = perf_evsel__name(evsel);
+ const char *evname = evsel__name(evsel);
if (!script->name_width)
- script->name_width = perf_evlist__max_name_len(script->session->evlist);
+ script->name_width = evlist__max_name_len(script->session->evlist);
fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]");
}
@@ -1923,6 +1975,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(IP)) {
struct callchain_cursor *cursor = NULL;
+ if (script->stitch_lbr)
+ al->thread->lbr_stitch_enable = true;
+
if (symbol_conf.use_callchain && sample->callchain &&
thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
sample, NULL, NULL, scripting_max_stack) == 0)
@@ -1946,7 +2001,7 @@ static void process_event(struct perf_script *script,
else if (PRINT_FIELD(BRSTACKOFF))
perf_sample__fprintf_brstackoff(sample, thread, attr, fp);
- if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+ if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
perf_sample__fprintf_bpf_output(sample, fp);
perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
@@ -1975,7 +2030,7 @@ static struct scripting_ops *scripting_ops;
static void __process_stat(struct evsel *counter, u64 tstamp)
{
int nthreads = perf_thread_map__nr(counter->core.threads);
- int ncpus = perf_evsel__nr_cpus(counter);
+ int ncpus = evsel__nr_cpus(counter);
int cpu, thread;
static int header_printed;
@@ -2001,7 +2056,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp)
counts->ena,
counts->run,
tstamp,
- perf_evsel__name(counter));
+ evsel__name(counter));
}
}
}
@@ -2040,7 +2095,7 @@ static int cleanup_scripting(void)
static bool filter_cpu(struct perf_sample *sample)
{
- if (cpu_list)
+ if (cpu_list && sample->cpu != (u32)-1)
return !test_bit(sample->cpu, cpu_bitmap);
return false;
}
@@ -2098,6 +2153,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
struct perf_script *scr = container_of(tool, struct perf_script, tool);
struct evlist *evlist;
struct evsel *evsel, *pos;
+ u64 sample_type;
int err;
static struct evsel_script *es;
@@ -2130,49 +2186,89 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
return 0;
}
- set_print_ip_opts(&evsel->core.attr);
+ if (evsel->core.attr.sample_type) {
+ err = evsel__check_attr(evsel, scr->session);
+ if (err)
+ return err;
+ }
- if (evsel->core.attr.sample_type)
- err = perf_evsel__check_attr(evsel, scr->session);
+ /*
+ * Check if we need to enable callchains based
+ * on events sample_type.
+ */
+ sample_type = evlist__combined_sample_type(evlist);
+ callchain_param_setup(sample_type);
- return err;
+ /* Enable fields for callchain entries */
+ if (symbol_conf.use_callchain &&
+ (sample_type & PERF_SAMPLE_CALLCHAIN ||
+ sample_type & PERF_SAMPLE_BRANCH_STACK ||
+ (sample_type & PERF_SAMPLE_REGS_USER &&
+ sample_type & PERF_SAMPLE_STACK_USER))) {
+ int type = output_type(evsel->core.attr.type);
+
+ if (!(output[type].user_unset_fields & PERF_OUTPUT_IP))
+ output[type].fields |= PERF_OUTPUT_IP;
+ if (!(output[type].user_unset_fields & PERF_OUTPUT_SYM))
+ output[type].fields |= PERF_OUTPUT_SYM;
+ }
+ set_print_ip_opts(&evsel->core.attr);
+ return 0;
}
-static int process_comm_event(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct machine *machine)
+static int print_event_with_time(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine,
+ pid_t pid, pid_t tid, u64 timestamp)
{
- struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- int ret = -1;
+ struct thread *thread = NULL;
- thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid);
- if (thread == NULL) {
- pr_debug("problem processing COMM event, skipping it.\n");
- return -1;
+ if (evsel && !evsel->core.attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = timestamp;
+ sample->pid = pid;
+ sample->tid = tid;
}
- if (perf_event__process_comm(tool, event, sample, machine) < 0)
- goto out;
+ if (filter_cpu(sample))
+ return 0;
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->comm.tid;
- sample->pid = event->comm.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_COMM, stdout);
- perf_event__fprintf(event, stdout);
+ if (tid != -1)
+ thread = machine__findnew_thread(machine, pid, tid);
+
+ if (evsel) {
+ perf_sample__fprintf_start(script, sample, thread, evsel,
+ event->header.type, stdout);
}
- ret = 0;
-out:
+
+ perf_event__fprintf(event, machine, stdout);
+
thread__put(thread);
- return ret;
+
+ return 0;
+}
+
+static int print_event(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine,
+ pid_t pid, pid_t tid)
+{
+ return print_event_with_time(tool, event, sample, machine, pid, tid, 0);
+}
+
+static int process_comm_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ if (perf_event__process_comm(tool, event, sample, machine) < 0)
+ return -1;
+
+ return print_event(tool, event, sample, machine, event->comm.pid,
+ event->comm.tid);
}
static int process_namespaces_event(struct perf_tool *tool,
@@ -2180,37 +2276,11 @@ static int process_namespaces_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- int ret = -1;
-
- thread = machine__findnew_thread(machine, event->namespaces.pid,
- event->namespaces.tid);
- if (thread == NULL) {
- pr_debug("problem processing NAMESPACES event, skipping it.\n");
- return -1;
- }
-
if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
- goto out;
+ return -1;
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->namespaces.tid;
- sample->pid = event->namespaces.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_NAMESPACES, stdout);
- perf_event__fprintf(event, stdout);
- }
- ret = 0;
-out:
- thread__put(thread);
- return ret;
+ return print_event(tool, event, sample, machine, event->namespaces.pid,
+ event->namespaces.tid);
}
static int process_cgroup_event(struct perf_tool *tool,
@@ -2218,34 +2288,11 @@ static int process_cgroup_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- int ret = -1;
-
- thread = machine__findnew_thread(machine, sample->pid, sample->tid);
- if (thread == NULL) {
- pr_debug("problem processing CGROUP event, skipping it.\n");
- return -1;
- }
-
if (perf_event__process_cgroup(tool, event, sample, machine) < 0)
- goto out;
+ return -1;
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_CGROUP, stdout);
- perf_event__fprintf(event, stdout);
- }
- ret = 0;
-out:
- thread__put(thread);
- return ret;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static int process_fork_event(struct perf_tool *tool,
@@ -2253,69 +2300,24 @@ static int process_fork_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (perf_event__process_fork(tool, event, sample, machine) < 0)
return -1;
- thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
- if (thread == NULL) {
- pr_debug("problem processing FORK event, skipping it.\n");
- return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = event->fork.time;
- sample->tid = event->fork.tid;
- sample->pid = event->fork.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_FORK, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
-
- return 0;
+ return print_event_with_time(tool, event, sample, machine,
+ event->fork.pid, event->fork.tid,
+ event->fork.time);
}
static int process_exit_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
- int err = 0;
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
- thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
- if (thread == NULL) {
- pr_debug("problem processing EXIT event, skipping it.\n");
+ /* Print before 'exit' deletes anything */
+ if (print_event_with_time(tool, event, sample, machine, event->fork.pid,
+ event->fork.tid, event->fork.time))
return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->fork.tid;
- sample->pid = event->fork.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_EXIT, stdout);
- perf_event__fprintf(event, stdout);
- }
-
- if (perf_event__process_exit(tool, event, sample, machine) < 0)
- err = -1;
- thread__put(thread);
- return err;
+ return perf_event__process_exit(tool, event, sample, machine);
}
static int process_mmap_event(struct perf_tool *tool,
@@ -2323,33 +2325,11 @@ static int process_mmap_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (perf_event__process_mmap(tool, event, sample, machine) < 0)
return -1;
- thread = machine__findnew_thread(machine, event->mmap.pid, event->mmap.tid);
- if (thread == NULL) {
- pr_debug("problem processing MMAP event, skipping it.\n");
- return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->mmap.tid;
- sample->pid = event->mmap.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_MMAP, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, event->mmap.pid,
+ event->mmap.tid);
}
static int process_mmap2_event(struct perf_tool *tool,
@@ -2357,33 +2337,11 @@ static int process_mmap2_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (perf_event__process_mmap2(tool, event, sample, machine) < 0)
return -1;
- thread = machine__findnew_thread(machine, event->mmap2.pid, event->mmap2.tid);
- if (thread == NULL) {
- pr_debug("problem processing MMAP2 event, skipping it.\n");
- return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->mmap2.tid;
- sample->pid = event->mmap2.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_MMAP2, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, event->mmap2.pid,
+ event->mmap2.tid);
}
static int process_switch_event(struct perf_tool *tool,
@@ -2391,10 +2349,7 @@ static int process_switch_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_switch(tool, event, sample, machine) < 0)
return -1;
@@ -2405,20 +2360,8 @@ static int process_switch_event(struct perf_tool *tool,
if (!script->show_switch_events)
return 0;
- thread = machine__findnew_thread(machine, sample->pid,
- sample->tid);
- if (thread == NULL) {
- pr_debug("problem processing SWITCH event, skipping it.\n");
- return -1;
- }
-
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_SWITCH, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static int
@@ -2427,23 +2370,8 @@ process_lost_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- struct thread *thread;
-
- thread = machine__findnew_thread(machine, sample->pid,
- sample->tid);
- if (thread == NULL)
- return -1;
-
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_LOST, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static int
@@ -2452,7 +2380,7 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused,
struct ordered_events *oe __maybe_unused)
{
- perf_event__fprintf(event, stdout);
+ perf_event__fprintf(event, NULL, stdout);
return 0;
}
@@ -2462,33 +2390,23 @@ process_bpf_events(struct perf_tool *tool __maybe_unused,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (machine__process_ksymbol(machine, event, sample) < 0)
return -1;
- if (!evsel->core.attr.sample_id_all) {
- perf_event__fprintf(event, stdout);
- return 0;
- }
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
+}
- thread = machine__findnew_thread(machine, sample->pid, sample->tid);
- if (thread == NULL) {
- pr_debug("problem processing MMAP event, skipping it.\n");
+static int process_text_poke_events(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ if (perf_event__process_text_poke(tool, event, sample, machine) < 0)
return -1;
- }
-
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- event->header.type, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static void sig_handler(int sig __maybe_unused)
@@ -2599,6 +2517,10 @@ static int __cmd_script(struct perf_script *script)
script->tool.ksymbol = process_bpf_events;
script->tool.bpf = process_bpf_events;
}
+ if (script->show_text_poke_events) {
+ script->tool.ksymbol = process_bpf_events;
+ script->tool.text_poke = process_text_poke_events;
+ }
if (perf_script__setup_per_event_dump(script)) {
pr_err("Couldn't create the per event dump files\n");
@@ -2619,7 +2541,7 @@ static int __cmd_script(struct perf_script *script)
struct script_spec {
struct list_head node;
struct scripting_ops *ops;
- char spec[0];
+ char spec[];
};
static LIST_HEAD(script_specs);
@@ -2857,9 +2779,11 @@ parse:
if (change == REMOVE) {
output[j].fields &= ~all_output_options[i].field;
output[j].user_set_fields &= ~all_output_options[i].field;
+ output[j].user_unset_fields |= all_output_options[i].field;
} else {
output[j].fields |= all_output_options[i].field;
output[j].user_set_fields |= all_output_options[i].field;
+ output[j].user_unset_fields &= ~all_output_options[i].field;
}
output[j].user_set = true;
output[j].wildcard_set = true;
@@ -3145,7 +3069,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
match = 0;
evlist__for_each_entry(session->evlist, pos) {
- if (!strcmp(perf_evsel__name(pos), evname)) {
+ if (!strcmp(evsel__name(pos), evname)) {
match = 1;
break;
}
@@ -3330,7 +3254,7 @@ static int have_cmd(int argc, const char **argv)
static void script__setup_sample_type(struct perf_script *script)
{
struct perf_session *session = script->session;
- u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+ u64 sample_type = evlist__combined_sample_type(session->evlist);
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -3342,6 +3266,12 @@ static void script__setup_sample_type(struct perf_script *script)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
+
+ if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
+ pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+ "Please apply --call-graph lbr when recording.\n");
+ script->stitch_lbr = false;
+ }
}
static int process_stat_round_event(struct perf_session *session,
@@ -3465,7 +3395,10 @@ static int parse_xed(const struct option *opt __maybe_unused,
const char *str __maybe_unused,
int unset __maybe_unused)
{
- force_pager("xed -F insn: -A -64 | less");
+ if (isatty(1))
+ force_pager("xed -F insn: -A -64 | less");
+ else
+ force_pager("xed -F insn: -A -64");
return 0;
}
@@ -3573,7 +3506,7 @@ int cmd_script(int argc, const char **argv)
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,srcline,period,iregs,uregs,brstack,"
"brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
- "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc",
+ "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
@@ -3624,6 +3557,8 @@ int cmd_script(int argc, const char **argv)
"Show round events (if recorded)"),
OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events,
"Show bpf related events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-text-poke-events", &script.show_text_poke_events,
+ "Show text poke related events (if recorded)"),
OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
"Dump trace output to files named by the monitored events"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
@@ -3653,6 +3588,8 @@ int cmd_script(int argc, const char **argv)
"file", "file saving guest os /proc/kallsyms"),
OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
"file", "file saving guest os /proc/modules"),
+ OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPTS_EVSWITCH(&script.evswitch),
OPT_END()
};
@@ -3709,7 +3646,7 @@ int cmd_script(int argc, const char **argv)
return -1;
}
- if (itrace_synth_opts.callchain &&
+ if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) &&
itrace_synth_opts.callchain_sz > scripting_max_stack)
scripting_max_stack = itrace_synth_opts.callchain_sz;
@@ -3985,6 +3922,9 @@ int cmd_script(int argc, const char **argv)
if (err)
goto out_delete;
+ if (zstd_init(&(session->zstd_data), 0) < 0)
+ pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
+
err = __cmd_script(&script);
flush_scripting();
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ec053dc1e35c..b01af171d94f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -56,7 +56,7 @@
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/counts.h"
-#include "util/group.h"
+#include "util/topdown.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/string2.h"
@@ -66,6 +66,7 @@
#include "util/time-utils.h"
#include "util/top.h"
#include "util/affinity.h"
+#include "util/pfm.h"
#include "asm/bug.h"
#include <linux/time64.h>
@@ -127,6 +128,15 @@ static const char * topdown_attrs[] = {
NULL,
};
+static const char *topdown_metric_attrs[] = {
+ "slots",
+ "topdown-retiring",
+ "topdown-bad-spec",
+ "topdown-fe-bound",
+ "topdown-be-bound",
+ NULL,
+};
+
static const char *smi_cost_attrs = {
"{"
"msr/aperf/,"
@@ -187,8 +197,63 @@ static struct perf_stat_config stat_config = {
.metric_only_len = METRIC_ONLY_LEN,
.walltime_nsecs_stats = &walltime_nsecs_stats,
.big_num = true,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1
};
+static bool cpus_map_matched(struct evsel *a, struct evsel *b)
+{
+ if (!a->core.cpus && !b->core.cpus)
+ return true;
+
+ if (!a->core.cpus || !b->core.cpus)
+ return false;
+
+ if (a->core.cpus->nr != b->core.cpus->nr)
+ return false;
+
+ for (int i = 0; i < a->core.cpus->nr; i++) {
+ if (a->core.cpus->map[i] != b->core.cpus->map[i])
+ return false;
+ }
+
+ return true;
+}
+
+static void evlist__check_cpu_maps(struct evlist *evlist)
+{
+ struct evsel *evsel, *pos, *leader;
+ char buf[1024];
+
+ evlist__for_each_entry(evlist, evsel) {
+ leader = evsel->leader;
+
+ /* Check that leader matches cpus with each member. */
+ if (leader == evsel)
+ continue;
+ if (cpus_map_matched(leader, evsel))
+ continue;
+
+ /* If there's mismatch disable the group and warn user. */
+ WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n");
+ evsel__group_desc(leader, buf, sizeof(buf));
+ pr_warning(" %s\n", buf);
+
+ if (verbose) {
+ cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
+ pr_warning(" %s: %s\n", leader->name, buf);
+ cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
+ pr_warning(" %s: %s\n", evsel->name, buf);
+ }
+
+ for_each_group_evsel(pos, leader) {
+ pos->leader = pos;
+ pos->core.nr_members = 0;
+ }
+ evsel->leader->core.nr_members = 0;
+ }
+}
+
static inline void diff_timespec(struct timespec *r, struct timespec *a,
struct timespec *b)
{
@@ -238,9 +303,8 @@ static int write_stat_round_event(u64 tm, u64 type)
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
-static int
-perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
- struct perf_counts_values *count)
+static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
+ struct perf_counts_values *count)
{
struct perf_sample_id *sid = SID(counter, cpu, thread);
@@ -259,7 +323,7 @@ static int read_single_counter(struct evsel *counter, int cpu,
count->val = val;
return 0;
}
- return perf_evsel__read_counter(counter, cpu, thread);
+ return evsel__read_counter(counter, cpu, thread);
}
/*
@@ -284,7 +348,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
/*
* The leader's group read loads data into its group members
- * (via perf_evsel__read_counter()) and sets their count->loaded.
+ * (via evsel__read_counter()) and sets their count->loaded.
*/
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
read_single_counter(counter, cpu, thread, rs)) {
@@ -297,7 +361,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
perf_counts__set_loaded(counter->counts, cpu, thread, false);
if (STAT_RECORD) {
- if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ if (evsel__write_stat_event(counter, cpu, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
@@ -306,7 +370,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
if (verbose > 1) {
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter),
+ evsel__name(counter),
cpu,
count->val, count->ena, count->run);
}
@@ -315,14 +379,14 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
return 0;
}
-static void read_counters(struct timespec *rs)
+static int read_affinity_counters(struct timespec *rs)
{
struct evsel *counter;
struct affinity affinity;
int i, ncpus, cpu;
if (affinity__setup(&affinity) < 0)
- return;
+ return -1;
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
if (!target__has_cpu(&target) || target__has_per_thread(&target))
@@ -342,6 +406,15 @@ static void read_counters(struct timespec *rs)
}
}
affinity__cleanup(&affinity);
+ return 0;
+}
+
+static void read_counters(struct timespec *rs)
+{
+ struct evsel *counter;
+
+ if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0))
+ return;
evlist__for_each_entry(evsel_list, counter) {
if (counter->err)
@@ -352,6 +425,46 @@ static void read_counters(struct timespec *rs)
}
}
+static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
+{
+ int i;
+
+ config->stats = calloc(nthreads, sizeof(struct runtime_stat));
+ if (!config->stats)
+ return -1;
+
+ config->stats_num = nthreads;
+
+ for (i = 0; i < nthreads; i++)
+ runtime_stat__init(&config->stats[i]);
+
+ return 0;
+}
+
+static void runtime_stat_delete(struct perf_stat_config *config)
+{
+ int i;
+
+ if (!config->stats)
+ return;
+
+ for (i = 0; i < config->stats_num; i++)
+ runtime_stat__exit(&config->stats[i]);
+
+ zfree(&config->stats);
+}
+
+static void runtime_stat_reset(struct perf_stat_config *config)
+{
+ int i;
+
+ if (!config->stats)
+ return;
+
+ for (i = 0; i < config->stats_num; i++)
+ perf_stat__reset_shadow_per_stat(&config->stats[i]);
+}
+
static void process_interval(void)
{
struct timespec ts, rs;
@@ -359,6 +472,8 @@ static void process_interval(void)
clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time);
+ perf_stat__reset_shadow_per_stat(&rt_stat);
+ runtime_stat_reset(&stat_config);
read_counters(&rs);
if (STAT_RECORD) {
@@ -367,22 +482,42 @@ static void process_interval(void)
}
init_stats(&walltime_nsecs_stats);
- update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
+ update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
print_counters(&rs, 0, NULL);
}
+static bool handle_interval(unsigned int interval, int *times)
+{
+ if (interval) {
+ process_interval();
+ if (interval_count && !(--(*times)))
+ return true;
+ }
+ return false;
+}
+
static void enable_counters(void)
{
- if (stat_config.initial_delay)
+ if (stat_config.initial_delay < 0) {
+ pr_info(EVLIST_DISABLED_MSG);
+ return;
+ }
+
+ if (stat_config.initial_delay > 0) {
+ pr_info(EVLIST_DISABLED_MSG);
usleep(stat_config.initial_delay * USEC_PER_MSEC);
+ }
/*
* We need to enable counters only if:
* - we don't have tracee (attaching to task or cpu)
* - we have initial delay configured
*/
- if (!target__none(&target) || stat_config.initial_delay)
+ if (!target__none(&target) || stat_config.initial_delay) {
evlist__enable(evsel_list);
+ if (stat_config.initial_delay > 0)
+ pr_info(EVLIST_ENABLED_MSG);
+ }
}
static void disable_counters(void)
@@ -409,7 +544,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
workload_exec_errno = info->si_value.sival_int;
}
-static bool perf_evsel__should_store_id(struct evsel *counter)
+static bool evsel__should_store_id(struct evsel *counter)
{
return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
}
@@ -436,6 +571,87 @@ static bool is_target_alive(struct target *_target,
return false;
}
+static void process_evlist(struct evlist *evlist, unsigned int interval)
+{
+ enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
+
+ if (evlist__ctlfd_process(evlist, &cmd) > 0) {
+ switch (cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ pr_info(EVLIST_ENABLED_MSG);
+ if (interval)
+ process_interval();
+ break;
+ case EVLIST_CTL_CMD_DISABLE:
+ if (interval)
+ process_interval();
+ pr_info(EVLIST_DISABLED_MSG);
+ break;
+ case EVLIST_CTL_CMD_SNAPSHOT:
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ break;
+ }
+ }
+}
+
+static void compute_tts(struct timespec *time_start, struct timespec *time_stop,
+ int *time_to_sleep)
+{
+ int tts = *time_to_sleep;
+ struct timespec time_diff;
+
+ diff_timespec(&time_diff, time_stop, time_start);
+
+ tts -= time_diff.tv_sec * MSEC_PER_SEC +
+ time_diff.tv_nsec / NSEC_PER_MSEC;
+
+ if (tts < 0)
+ tts = 0;
+
+ *time_to_sleep = tts;
+}
+
+static int dispatch_events(bool forks, int timeout, int interval, int *times)
+{
+ int child_exited = 0, status = 0;
+ int time_to_sleep, sleep_time;
+ struct timespec time_start, time_stop;
+
+ if (interval)
+ sleep_time = interval;
+ else if (timeout)
+ sleep_time = timeout;
+ else
+ sleep_time = 1000;
+
+ time_to_sleep = sleep_time;
+
+ while (!done) {
+ if (forks)
+ child_exited = waitpid(child_pid, &status, WNOHANG);
+ else
+ child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
+
+ if (child_exited)
+ break;
+
+ clock_gettime(CLOCK_MONOTONIC, &time_start);
+ if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */
+ if (timeout || handle_interval(interval, times))
+ break;
+ time_to_sleep = sleep_time;
+ } else { /* fd revent */
+ process_evlist(evsel_list, interval);
+ clock_gettime(CLOCK_MONOTONIC, &time_stop);
+ compute_tts(&time_start, &time_stop, &time_to_sleep);
+ }
+ }
+
+ return status;
+}
+
enum counter_recovery {
COUNTER_SKIP,
COUNTER_RETRY,
@@ -454,7 +670,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
errno == ENXIO) {
if (verbose > 0)
ui__warning("%s event is not supported by the kernel.\n",
- perf_evsel__name(counter));
+ evsel__name(counter));
counter->supported = false;
/*
* errored is a sticky flag that means one of the counter's
@@ -465,7 +681,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
if ((counter->leader != counter) ||
!(counter->leader->core.nr_members > 1))
return COUNTER_SKIP;
- } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+ } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
return COUNTER_RETRY;
@@ -483,8 +699,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
}
}
- perf_evsel__open_strerror(counter, &target,
- errno, msg, sizeof(msg));
+ evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
ui__error("%s\n", msg);
if (child_pid != -1)
@@ -500,7 +715,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
char msg[BUFSIZ];
unsigned long long t0, t1;
struct evsel *counter;
- struct timespec ts;
size_t l;
int status = 0;
const bool forks = (argc > 0);
@@ -509,17 +723,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int i, cpu;
bool second_pass = false;
- if (interval) {
- ts.tv_sec = interval / USEC_PER_MSEC;
- ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
- } else if (timeout) {
- ts.tv_sec = timeout / USEC_PER_MSEC;
- ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
- } else {
- ts.tv_sec = 1;
- ts.tv_nsec = 0;
- }
-
if (forks) {
if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
workload_exec_failed_signal) < 0) {
@@ -604,7 +807,7 @@ try_again:
if (!counter->reset_group)
continue;
try_again_reset:
- pr_debug2("reopening weak %s\n", perf_evsel__name(counter));
+ pr_debug2("reopening weak %s\n", evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {
@@ -635,14 +838,14 @@ try_again_reset:
if (l > stat_config.unit_width)
stat_config.unit_width = l;
- if (perf_evsel__should_store_id(counter) &&
- perf_evsel__store_ids(counter, evsel_list))
+ if (evsel__should_store_id(counter) &&
+ evsel__store_ids(counter, evsel_list))
return -1;
}
if (perf_evlist__apply_filters(evsel_list, &counter)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
- counter->filter, perf_evsel__name(counter), errno,
+ counter->filter, evsel__name(counter), errno,
str_error_r(errno, msg, sizeof(msg)));
return -1;
}
@@ -676,18 +879,13 @@ try_again_reset:
perf_evlist__start_workload(evsel_list);
enable_counters();
- if (interval || timeout) {
- while (!waitpid(child_pid, &status, WNOHANG)) {
- nanosleep(&ts, NULL);
- if (timeout)
- break;
- process_interval();
- if (interval_count && !(--times))
- break;
- }
- }
- if (child_pid != -1)
+ if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
+ status = dispatch_events(forks, timeout, interval, &times);
+ if (child_pid != -1) {
+ if (timeout)
+ kill(child_pid, SIGTERM);
wait4(child_pid, &status, 0, &stat_config.ru_data);
+ }
if (workload_exec_errno) {
const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
@@ -699,18 +897,7 @@ try_again_reset:
psignal(WTERMSIG(status), argv[0]);
} else {
enable_counters();
- while (!done) {
- nanosleep(&ts, NULL);
- if (!is_target_alive(&target, evsel_list->core.threads))
- break;
- if (timeout)
- break;
- if (interval) {
- process_interval();
- if (interval_count && !(--times))
- break;
- }
- }
+ status = dispatch_events(forks, timeout, interval, &times);
}
disable_counters();
@@ -720,7 +907,21 @@ try_again_reset:
if (stat_config.walltime_run_table)
stat_config.walltime_run[run_idx] = t1 - t0;
- update_stats(&walltime_nsecs_stats, t1 - t0);
+ if (interval && stat_config.summary) {
+ stat_config.interval = 0;
+ stat_config.stop_read_counter = true;
+ init_stats(&walltime_nsecs_stats);
+ update_stats(&walltime_nsecs_stats, t1 - t0);
+
+ if (stat_config.aggr_mode == AGGR_GLOBAL)
+ perf_evlist__save_aggr_prev_raw_counts(evsel_list);
+
+ perf_evlist__copy_prev_raw_counts(evsel_list);
+ perf_evlist__reset_prev_raw_counts(evsel_list);
+ runtime_stat_reset(&stat_config);
+ perf_stat__reset_shadow_per_stat(&rt_stat);
+ } else
+ update_stats(&walltime_nsecs_stats, t1 - t0);
/*
* Closing a group leader splits the group, and as we only disable
@@ -819,10 +1020,16 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
+void perf_stat__set_big_num(int set)
+{
+ stat_config.big_num = (set != 0);
+}
+
static int stat__set_big_num(const struct option *opt __maybe_unused,
const char *s __maybe_unused, int unset)
{
big_num_opt = unset ? 0 : 1;
+ perf_stat__set_big_num(!unset);
return 0;
}
@@ -838,7 +1045,30 @@ static int parse_metric_groups(const struct option *opt,
const char *str,
int unset __maybe_unused)
{
- return metricgroup__parse_groups(opt, str, &stat_config.metric_events);
+ return metricgroup__parse_groups(opt, str,
+ stat_config.metric_no_group,
+ stat_config.metric_no_merge,
+ &stat_config.metric_events);
+}
+
+static int parse_control_option(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct perf_stat_config *config = opt->value;
+
+ return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close);
+}
+
+static int parse_stat_cgroups(const struct option *opt,
+ const char *str, int unset)
+{
+ if (stat_config.cgroup_list) {
+ pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
+ return -1;
+ }
+
+ return parse_cgroups(opt, str, unset);
}
static struct option stat_options[] = {
@@ -884,7 +1114,9 @@ static struct option stat_options[] = {
OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
"print counts with custom separator"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
- "monitor event in cgroup name only", parse_cgroups),
+ "monitor event in cgroup name only", parse_stat_cgroups),
+ OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
+ "expand events for each cgroup"),
OPT_STRING('o', "output", &output_name, "file", "output file name"),
OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
OPT_INTEGER(0, "log-fd", &output_fd,
@@ -912,10 +1144,14 @@ static struct option stat_options[] = {
"aggregate counts per thread", AGGR_THREAD),
OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
"aggregate counts per numa node", AGGR_NODE),
- OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
- "ms to wait before starting measurement after program start"),
+ OPT_INTEGER('D', "delay", &stat_config.initial_delay,
+ "ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
"Only print computed metrics. No raw values", enable_metric_only),
+ OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
+ "don't group metric events, impacts multiplexing"),
+ OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
+ "don't try to share events between metrics in a group"),
OPT_BOOLEAN(0, "topdown", &topdown_run,
"measure topdown level 1 statistics"),
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
@@ -933,6 +1169,18 @@ static struct option stat_options[] = {
"Use with 'percore' event qualifier to show the event "
"counts of one hardware thread by sum up total hardware "
"threads of same physical core"),
+ OPT_BOOLEAN(0, "summary", &stat_config.summary,
+ "print summary for interval mode"),
+#ifdef HAVE_LIBPFM
+ OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
+ "libpfm4 event selector. use 'perf list' to list available events",
+ parse_libpfm_events_option),
+#endif
+ OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
+ "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
+ "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
+ "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
+ parse_control_option),
OPT_END()
};
@@ -1255,55 +1503,6 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
return 0;
}
-static int topdown_filter_events(const char **attr, char **str, bool use_group)
-{
- int off = 0;
- int i;
- int len = 0;
- char *s;
-
- for (i = 0; attr[i]; i++) {
- if (pmu_have_event("cpu", attr[i])) {
- len += strlen(attr[i]) + 1;
- attr[i - off] = attr[i];
- } else
- off++;
- }
- attr[i - off] = NULL;
-
- *str = malloc(len + 1 + 2);
- if (!*str)
- return -1;
- s = *str;
- if (i - off == 0) {
- *s = 0;
- return 0;
- }
- if (use_group)
- *s++ = '{';
- for (i = 0; attr[i]; i++) {
- strcpy(s, attr[i]);
- s += strlen(s);
- *s++ = ',';
- }
- if (use_group) {
- s[-1] = '}';
- *s = 0;
- } else
- s[-1] = 0;
- return 0;
-}
-
-__weak bool arch_topdown_check_group(bool *warn)
-{
- *warn = false;
- return false;
-}
-
-__weak void arch_topdown_group_warn(void)
-{
-}
-
/*
* Add default attributes, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
@@ -1440,6 +1639,8 @@ static int add_default_attributes(void)
struct option opt = { .value = &evsel_list };
return metricgroup__parse_groups(&opt, "transaction",
+ stat_config.metric_no_group,
+ stat_config.metric_no_merge,
&stat_config.metric_events);
}
@@ -1498,6 +1699,24 @@ static int add_default_attributes(void)
char *str = NULL;
bool warn = false;
+ if (!force_metric_only)
+ stat_config.metric_only = true;
+
+ if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) {
+ pr_err("Out of memory\n");
+ return -1;
+ }
+ if (topdown_metric_attrs[0] && str) {
+ if (!stat_config.interval && !stat_config.metric_only) {
+ fprintf(stat_config.output,
+ "Topdown accuracy may decrease when measuring long periods.\n"
+ "Please print the result regularly, e.g. -I1000\n");
+ }
+ goto setup_metrics;
+ }
+
+ zfree(&str);
+
if (stat_config.aggr_mode != AGGR_GLOBAL &&
stat_config.aggr_mode != AGGR_CORE) {
pr_err("top down event configuration requires --per-core mode\n");
@@ -1509,8 +1728,6 @@ static int add_default_attributes(void)
return -1;
}
- if (!force_metric_only)
- stat_config.metric_only = true;
if (topdown_filter_events(topdown_attrs, &str,
arch_topdown_check_group(&warn)) < 0) {
pr_err("Out of memory\n");
@@ -1519,6 +1736,7 @@ static int add_default_attributes(void)
if (topdown_attrs[0] && str) {
if (warn)
arch_topdown_group_warn();
+setup_metrics:
err = parse_events(evsel_list, str, &errinfo);
if (err) {
fprintf(stderr,
@@ -1539,19 +1757,17 @@ static int add_default_attributes(void)
if (target__has_cpu(&target))
default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
- if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+ if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
return -1;
if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
- if (perf_evlist__add_default_attrs(evsel_list,
- frontend_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
return -1;
}
if (pmu_have_event("cpu", "stalled-cycles-backend")) {
- if (perf_evlist__add_default_attrs(evsel_list,
- backend_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
return -1;
}
- if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
+ if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
return -1;
}
@@ -1561,21 +1777,21 @@ static int add_default_attributes(void)
return 0;
/* Append detailed run extra attributes: */
- if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
return -1;
if (detailed_run < 2)
return 0;
/* Append very detailed run extra attributes: */
- if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
return -1;
if (detailed_run < 3)
return 0;
/* Append very, very detailed run extra attributes: */
- return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
+ return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
}
static const char * const stat_record_usage[] = {
@@ -1735,35 +1951,6 @@ int process_cpu_map_event(struct perf_session *session,
return set_maps(st);
}
-static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
-{
- int i;
-
- config->stats = calloc(nthreads, sizeof(struct runtime_stat));
- if (!config->stats)
- return -1;
-
- config->stats_num = nthreads;
-
- for (i = 0; i < nthreads; i++)
- runtime_stat__init(&config->stats[i]);
-
- return 0;
-}
-
-static void runtime_stat_delete(struct perf_stat_config *config)
-{
- int i;
-
- if (!config->stats)
- return;
-
- for (i = 0; i < config->stats_num; i++)
- runtime_stat__exit(&config->stats[i]);
-
- zfree(&config->stats);
-}
-
static const char * const stat_report_usage[] = {
"perf stat report [<options>]",
NULL,
@@ -1850,8 +2037,10 @@ static void setup_system_wide(int forks)
struct evsel *counter;
evlist__for_each_entry(evsel_list, counter) {
- if (!counter->core.system_wide)
+ if (!counter->core.system_wide &&
+ strcmp(counter->name, "duration_time")) {
return;
+ }
}
if (evsel_list->core.nr_entries)
@@ -2037,6 +2226,19 @@ int cmd_stat(int argc, const char **argv)
if (add_default_attributes())
goto out;
+ if (stat_config.cgroup_list) {
+ if (nr_cgroups > 0) {
+ pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
+ parse_options_usage(stat_usage, stat_options, "G", 1);
+ parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
+ goto out;
+ }
+
+ if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list,
+ &stat_config.metric_events, true) < 0)
+ goto out;
+ }
+
target__validate(&target);
if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
@@ -2055,6 +2257,8 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
+ evlist__check_cpu_maps(evsel_list);
+
/*
* Initialize thread_map with comm names,
* so we could print it out on output.
@@ -2129,6 +2333,9 @@ int cmd_stat(int argc, const char **argv)
signal(SIGALRM, skip_signal);
signal(SIGABRT, skip_signal);
+ if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
+ goto out;
+
status = 0;
for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
if (stat_config.run_count != 1 && verbose > 0)
@@ -2145,9 +2352,11 @@ int cmd_stat(int argc, const char **argv)
}
}
- if (!forever && status != -1 && !interval)
+ if (!forever && status != -1 && (!interval || stat_config.summary))
print_counters(NULL, argc, argv);
+ evlist__finalize_ctlfd(evsel_list);
+
if (STAT_RECORD) {
/*
* We synthesize the kernel mmap record just so that older tools
@@ -2194,7 +2403,9 @@ out:
evlist__delete(evsel_list);
+ metricgroup__rblist_exit(&stat_config.metric_events);
runtime_stat_delete(&stat_config);
+ evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close);
return status;
}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 9e84fae9b096..4e380e7b5230 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -128,7 +128,7 @@ struct sample_wrapper {
struct sample_wrapper *next;
u64 timestamp;
- unsigned char data[0];
+ unsigned char data[];
};
#define TYPE_NONE 0
@@ -579,8 +579,8 @@ process_sample_cpu_idle(struct timechart *tchart __maybe_unused,
struct perf_sample *sample,
const char *backtrace __maybe_unused)
{
- u32 state = perf_evsel__intval(evsel, sample, "state");
- u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+ u32 state = evsel__intval(evsel, sample, "state");
+ u32 cpu_id = evsel__intval(evsel, sample, "cpu_id");
if (state == (u32)PWR_EVENT_EXIT)
c_state_end(tchart, cpu_id, sample->time);
@@ -595,8 +595,8 @@ process_sample_cpu_frequency(struct timechart *tchart,
struct perf_sample *sample,
const char *backtrace __maybe_unused)
{
- u32 state = perf_evsel__intval(evsel, sample, "state");
- u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+ u32 state = evsel__intval(evsel, sample, "state");
+ u32 cpu_id = evsel__intval(evsel, sample, "cpu_id");
p_state_change(tchart, cpu_id, sample->time, state);
return 0;
@@ -608,9 +608,9 @@ process_sample_sched_wakeup(struct timechart *tchart,
struct perf_sample *sample,
const char *backtrace)
{
- u8 flags = perf_evsel__intval(evsel, sample, "common_flags");
- int waker = perf_evsel__intval(evsel, sample, "common_pid");
- int wakee = perf_evsel__intval(evsel, sample, "pid");
+ u8 flags = evsel__intval(evsel, sample, "common_flags");
+ int waker = evsel__intval(evsel, sample, "common_pid");
+ int wakee = evsel__intval(evsel, sample, "pid");
sched_wakeup(tchart, sample->cpu, sample->time, waker, wakee, flags, backtrace);
return 0;
@@ -622,9 +622,9 @@ process_sample_sched_switch(struct timechart *tchart,
struct perf_sample *sample,
const char *backtrace)
{
- int prev_pid = perf_evsel__intval(evsel, sample, "prev_pid");
- int next_pid = perf_evsel__intval(evsel, sample, "next_pid");
- u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+ int prev_pid = evsel__intval(evsel, sample, "prev_pid");
+ int next_pid = evsel__intval(evsel, sample, "next_pid");
+ u64 prev_state = evsel__intval(evsel, sample, "prev_state");
sched_switch(tchart, sample->cpu, sample->time, prev_pid, next_pid,
prev_state, backtrace);
@@ -638,8 +638,8 @@ process_sample_power_start(struct timechart *tchart __maybe_unused,
struct perf_sample *sample,
const char *backtrace __maybe_unused)
{
- u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
- u64 value = perf_evsel__intval(evsel, sample, "value");
+ u64 cpu_id = evsel__intval(evsel, sample, "cpu_id");
+ u64 value = evsel__intval(evsel, sample, "value");
c_state_start(cpu_id, sample->time, value);
return 0;
@@ -661,8 +661,8 @@ process_sample_power_frequency(struct timechart *tchart,
struct perf_sample *sample,
const char *backtrace __maybe_unused)
{
- u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
- u64 value = perf_evsel__intval(evsel, sample, "value");
+ u64 cpu_id = evsel__intval(evsel, sample, "cpu_id");
+ u64 value = evsel__intval(evsel, sample, "value");
p_state_change(tchart, cpu_id, sample->time, value);
return 0;
@@ -843,7 +843,7 @@ process_enter_read(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_READ,
sample->time, fd);
}
@@ -853,7 +853,7 @@ process_exit_read(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_READ,
sample->time, ret);
}
@@ -863,7 +863,7 @@ process_enter_write(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_WRITE,
sample->time, fd);
}
@@ -873,7 +873,7 @@ process_exit_write(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_WRITE,
sample->time, ret);
}
@@ -883,7 +883,7 @@ process_enter_sync(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_SYNC,
sample->time, fd);
}
@@ -893,7 +893,7 @@ process_exit_sync(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_SYNC,
sample->time, ret);
}
@@ -903,7 +903,7 @@ process_enter_tx(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_TX,
sample->time, fd);
}
@@ -913,7 +913,7 @@ process_exit_tx(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_TX,
sample->time, ret);
}
@@ -923,7 +923,7 @@ process_enter_rx(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_RX,
sample->time, fd);
}
@@ -933,7 +933,7 @@ process_exit_rx(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_RX,
sample->time, ret);
}
@@ -943,7 +943,7 @@ process_enter_poll(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_POLL,
sample->time, fd);
}
@@ -953,7 +953,7 @@ process_exit_poll(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_POLL,
sample->time, ret);
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 289cf83e658a..7c64134472c7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -33,6 +33,7 @@
#include "util/map.h"
#include "util/mmap.h"
#include "util/session.h"
+#include "util/thread.h"
#include "util/symbol.h"
#include "util/synthetic-events.h"
#include "util/top.h"
@@ -52,6 +53,7 @@
#include "util/debug.h"
#include "util/ordered-events.h"
+#include "util/pfm.h"
#include <assert.h>
#include <elf.h>
@@ -254,7 +256,7 @@ static void perf_top__show_details(struct perf_top *top)
if (notes->src == NULL)
goto out_unlock;
- printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
+ printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name);
printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts);
@@ -297,8 +299,7 @@ static void perf_top__resort_hists(struct perf_top *t)
hists__collapse_resort(hists, NULL);
/* Non-group events are considered as leader */
- if (symbol_conf.event_group &&
- !perf_evsel__is_group_leader(pos)) {
+ if (symbol_conf.event_group && !evsel__is_group_leader(pos)) {
struct hists *leader_hists = evsel__hists(pos->leader);
hists__match(leader_hists, hists);
@@ -307,7 +308,7 @@ static void perf_top__resort_hists(struct perf_top *t)
}
evlist__for_each_entry(evlist, pos) {
- perf_evsel__output_resort(pos, NULL);
+ evsel__output_resort(pos, NULL);
}
}
@@ -441,7 +442,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top)
fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries);
if (top->evlist->core.nr_entries > 1)
- fprintf(stdout, "\t[E] active event counter. \t(%s)\n", perf_evsel__name(top->sym_evsel));
+ fprintf(stdout, "\t[E] active event counter. \t(%s)\n", evsel__name(top->sym_evsel));
fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter);
@@ -528,13 +529,13 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
fprintf(stderr, "\nAvailable events:");
evlist__for_each_entry(top->evlist, top->sym_evsel)
- fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
+ fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, evsel__name(top->sym_evsel));
prompt_integer(&counter, "Enter details event counter");
if (counter >= top->evlist->core.nr_entries) {
top->sym_evsel = evlist__first(top->evlist);
- fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
+ fprintf(stderr, "Sorry, no such event, using %s.\n", evsel__name(top->sym_evsel));
sleep(1);
break;
}
@@ -775,6 +776,9 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (machine__resolve(machine, &al, sample) < 0)
return;
+ if (top->stitch_lbr)
+ al.thread->lbr_stitch_enable = true;
+
if (!machine->kptr_restrict_warned &&
symbol_conf.kptr_restrict &&
al.cpumode == PERF_RECORD_MISC_KERNEL) {
@@ -946,7 +950,7 @@ static int perf_top__overwrite_check(struct perf_top *top)
{
struct record_opts *opts = &top->record_opts;
struct evlist *evlist = top->evlist;
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
struct list_head *config_terms;
struct evsel *evsel;
int set, overwrite = -1;
@@ -955,7 +959,7 @@ static int perf_top__overwrite_check(struct perf_top *top)
set = -1;
config_terms = &evsel->config_terms;
list_for_each_entry(term, config_terms, list) {
- if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+ if (term->type == EVSEL__CONFIG_TERM_OVERWRITE)
set = term->val.overwrite ? 1 : 0;
}
@@ -1042,14 +1046,13 @@ try_again:
perf_top_overwrite_fallback(top, counter))
goto try_again;
- if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+ if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
goto try_again;
}
- perf_evsel__open_strerror(counter, &opts->target,
- errno, msg, sizeof(msg));
+ evsel__open_strerror(counter, &opts->target, errno, msg, sizeof(msg));
ui__error("%s\n", msg);
goto out_err;
}
@@ -1571,10 +1574,16 @@ int cmd_top(int argc, const char **argv)
"Sort the output by the event at the index n in group. "
"If n is invalid, sort by the first event. "
"WARNING: should be used on grouped events."),
+ OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
+#ifdef HAVE_LIBPFM
+ OPT_CALLBACK(0, "pfm-events", &top.evlist, "event",
+ "libpfm4 event selector. use 'perf list' to list available events",
+ parse_libpfm_events_option),
+#endif
OPTS_EVSWITCH(&top.evswitch),
OPT_END()
};
- struct evlist *sb_evlist = NULL;
const char * const top_usage[] = {
"perf top [<options>]",
NULL
@@ -1618,7 +1627,7 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
if (!top.evlist->core.nr_entries &&
- perf_evlist__add_default(top.evlist) < 0) {
+ evlist__add_default(top.evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out_delete_evlist;
}
@@ -1640,6 +1649,11 @@ int cmd_top(int argc, const char **argv)
}
}
+ if (top.stitch_lbr && !(callchain_param.record_mode == CALLCHAIN_LBR)) {
+ pr_err("Error: --stitch-lbr must be used with --call-graph lbr\n");
+ goto out_delete_evlist;
+ }
+
if (opts->branch_stack && callchain_param.enabled)
symbol_conf.show_branchflag_count = true;
@@ -1732,10 +1746,23 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
}
- if (!top.record_opts.no_bpf_event)
- bpf_event__add_sb_event(&sb_evlist, &perf_env);
+#ifdef HAVE_LIBBPF_SUPPORT
+ if (!top.record_opts.no_bpf_event) {
+ top.sb_evlist = evlist__new();
+
+ if (top.sb_evlist == NULL) {
+ pr_err("Couldn't create side band evlist.\n.");
+ goto out_delete_evlist;
+ }
+
+ if (evlist__add_bpf_sb_event(top.sb_evlist, &perf_env)) {
+ pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
+ goto out_delete_evlist;
+ }
+ }
+#endif
- if (perf_evlist__start_sb_thread(sb_evlist, target)) {
+ if (perf_evlist__start_sb_thread(top.sb_evlist, target)) {
pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
opts->no_bpf_event = true;
}
@@ -1743,7 +1770,7 @@ int cmd_top(int argc, const char **argv)
status = __cmd_top(&top);
if (!opts->no_bpf_event)
- perf_evlist__stop_sb_thread(sb_evlist);
+ perf_evlist__stop_sb_thread(top.sb_evlist);
out_delete_evlist:
evlist__delete(top.evlist);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 01d542007c8b..44a75f234db1 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -366,11 +366,9 @@ out_delete:
return NULL;
}
-static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
- struct tp_field *field,
- const char *name)
+static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
{
- struct tep_format_field *format_field = perf_evsel__field(evsel, name);
+ struct tep_format_field *format_field = evsel__field(evsel, name);
if (format_field == NULL)
return -1;
@@ -380,13 +378,11 @@ static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
- perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
+ evsel__init_tp_uint_field(evsel, &sc->name, #name); })
-static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
- struct tp_field *field,
- const char *name)
+static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
{
- struct tep_format_field *format_field = perf_evsel__field(evsel, name);
+ struct tep_format_field *format_field = evsel__field(evsel, name);
if (format_field == NULL)
return -1;
@@ -396,7 +392,7 @@ static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
- perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
+ evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
static void evsel__delete_priv(struct evsel *evsel)
{
@@ -404,13 +400,13 @@ static void evsel__delete_priv(struct evsel *evsel)
evsel__delete(evsel);
}
-static int perf_evsel__init_syscall_tp(struct evsel *evsel)
+static int evsel__init_syscall_tp(struct evsel *evsel)
{
struct syscall_tp *sc = evsel__syscall_tp(evsel);
if (sc != NULL) {
- if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
- perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
+ if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
+ evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
return -ENOENT;
return 0;
}
@@ -418,14 +414,14 @@ static int perf_evsel__init_syscall_tp(struct evsel *evsel)
return -ENOMEM;
}
-static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
+static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
{
struct syscall_tp *sc = evsel__syscall_tp(evsel);
if (sc != NULL) {
- struct tep_format_field *syscall_id = perf_evsel__field(tp, "id");
+ struct tep_format_field *syscall_id = evsel__field(tp, "id");
if (syscall_id == NULL)
- syscall_id = perf_evsel__field(tp, "__syscall_nr");
+ syscall_id = evsel__field(tp, "__syscall_nr");
if (syscall_id == NULL ||
__tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
return -EINVAL;
@@ -436,21 +432,21 @@ static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evs
return -ENOMEM;
}
-static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
+static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
{
struct syscall_tp *sc = __evsel__syscall_tp(evsel);
return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
}
-static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
+static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
{
struct syscall_tp *sc = __evsel__syscall_tp(evsel);
return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
}
-static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
+static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
{
if (evsel__syscall_tp(evsel) != NULL) {
if (perf_evsel__init_sc_tp_uint_field(evsel, id))
@@ -465,16 +461,16 @@ static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
{
- struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
+ struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
if (IS_ERR(evsel))
- evsel = perf_evsel__newtp("syscalls", direction);
+ evsel = evsel__newtp("syscalls", direction);
if (IS_ERR(evsel))
return NULL;
- if (perf_evsel__init_raw_syscall_tp(evsel, handler))
+ if (evsel__init_raw_syscall_tp(evsel, handler))
goto out_delete;
return evsel;
@@ -1752,12 +1748,30 @@ static int trace__read_syscall_info(struct trace *trace, int id)
struct syscall *sc;
const char *name = syscalltbl__name(trace->sctbl, id);
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
if (trace->syscalls.table == NULL) {
trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
if (trace->syscalls.table == NULL)
return -ENOMEM;
}
+#else
+ if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
+ // When using libaudit we don't know beforehand what is the max syscall id
+ struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
+
+ if (table == NULL)
+ return -ENOMEM;
+
+ // Need to memset from offset 0 and +1 members if brand new
+ if (trace->syscalls.table == NULL)
+ memset(table, 0, (id + 1) * sizeof(*sc));
+ else
+ memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
+ trace->syscalls.table = table;
+ trace->sctbl->syscalls.max_id = id;
+ }
+#endif
sc = trace->syscalls.table + id;
if (sc->nonexistent)
return 0;
@@ -1801,7 +1815,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
return syscall__set_arg_fmts(sc);
}
-static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel)
+static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
{
struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
@@ -2074,15 +2088,27 @@ static struct syscall *trace__syscall_info(struct trace *trace,
if (verbose > 1) {
static u64 n;
fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
- id, perf_evsel__name(evsel), ++n);
+ id, evsel__name(evsel), ++n);
}
return NULL;
}
err = -EINVAL;
- if (id > trace->sctbl->syscalls.max_id)
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
+ if (id > trace->sctbl->syscalls.max_id) {
+#else
+ if (id >= trace->sctbl->syscalls.max_id) {
+ /*
+ * With libaudit we don't know beforehand what is the max_id,
+ * so we let trace__read_syscall_info() figure that out as we
+ * go on reading syscalls.
+ */
+ err = trace__read_syscall_info(trace, id);
+ if (err)
+#endif
goto out_cant_read;
+ }
if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
(err = trace__read_syscall_info(trace, id)) != 0)
@@ -2206,7 +2232,7 @@ static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
double ts = (double)sample->time / NSEC_PER_MSEC;
printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
- perf_evsel__name(evsel), ts,
+ evsel__name(evsel), ts,
thread__comm_str(thread),
sample->pid, sample->tid, sample->cpu);
}
@@ -2382,7 +2408,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam
static const char *errno_to_name(struct evsel *evsel, int err)
{
- struct perf_env *env = perf_evsel__env(evsel);
+ struct perf_env *env = evsel__env(evsel);
const char *arch_name = perf_env__arch(env);
return arch_syscalls__strerrno(arch_name, err);
@@ -2513,7 +2539,7 @@ errno_print: {
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
out:
ttrace->entry_pending = false;
err = 0;
@@ -2531,7 +2557,7 @@ static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
size_t filename_len, entry_str_len, to_move;
ssize_t remaining_space;
char *pos;
- const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
+ const char *filename = evsel__rawptr(evsel, sample, "pathname");
if (!thread)
goto out;
@@ -2587,7 +2613,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
- u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
+ u64 runtime = evsel__intval(evsel, sample, "runtime");
double runtime_ms = (double)runtime / NSEC_PER_MSEC;
struct thread *thread = machine__findnew_thread(trace->host,
sample->pid,
@@ -2606,10 +2632,10 @@ out_put:
out_dump:
fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
evsel->name,
- perf_evsel__strval(evsel, sample, "comm"),
- (pid_t)perf_evsel__intval(evsel, sample, "pid"),
+ evsel__strval(evsel, sample, "comm"),
+ (pid_t)evsel__intval(evsel, sample, "pid"),
runtime,
- perf_evsel__intval(evsel, sample, "vruntime"));
+ evsel__intval(evsel, sample, "vruntime"));
goto out_put;
}
@@ -2774,7 +2800,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
fprintf(trace->output, "%s(", evsel->name);
- if (perf_evsel__is_bpf_output(evsel)) {
+ if (evsel__is_bpf_output(evsel)) {
bpf_output__fprintf(trace, sample);
} else if (evsel->tp_format) {
if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
@@ -2795,7 +2821,7 @@ newline:
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
++trace->nr_events_printed;
@@ -2890,7 +2916,7 @@ static int trace__pgfault(struct trace *trace,
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
++trace->nr_events_printed;
out:
@@ -3032,10 +3058,10 @@ static bool evlist__add_vfs_getname(struct evlist *evlist)
}
evlist__for_each_entry_safe(evlist, evsel, tmp) {
- if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
+ if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
continue;
- if (perf_evsel__field(evsel, "pathname")) {
+ if (evsel__field(evsel, "pathname")) {
evsel->handler = trace__vfs_getname;
found = true;
continue;
@@ -3049,7 +3075,7 @@ static bool evlist__add_vfs_getname(struct evlist *evlist)
return found;
}
-static struct evsel *perf_evsel__new_pgfault(u64 config)
+static struct evsel *evsel__new_pgfault(u64 config)
{
struct evsel *evsel;
struct perf_event_attr attr = {
@@ -3093,7 +3119,7 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
sample->raw_data == NULL) {
fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
- perf_evsel__name(evsel), sample->tid,
+ evsel__name(evsel), sample->tid,
sample->cpu, sample->raw_size);
} else {
tracepoint_handler handler = evsel->handler;
@@ -3124,8 +3150,8 @@ static int trace__add_syscall_newtp(struct trace *trace)
if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
goto out_delete_sys_exit;
- perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
- perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
+ evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
+ evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
evlist__add(evlist, sys_enter);
evlist__add(evlist, sys_exit);
@@ -3164,10 +3190,9 @@ static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
if (filter == NULL)
goto out_enomem;
- if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
- filter)) {
+ if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
sys_exit = trace->syscalls.events.sys_exit;
- err = perf_evsel__append_tp_filter(sys_exit, filter);
+ err = evsel__append_tp_filter(sys_exit, filter);
}
free(filter);
@@ -3179,6 +3204,26 @@ out_enomem:
}
#ifdef HAVE_LIBBPF_SUPPORT
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
+{
+ if (trace->bpf_obj == NULL)
+ return NULL;
+
+ return bpf_object__find_map_by_name(trace->bpf_obj, name);
+}
+
+static void trace__set_bpf_map_filtered_pids(struct trace *trace)
+{
+ trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
+}
+
+static void trace__set_bpf_map_syscalls(struct trace *trace)
+{
+ trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
+ trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
+ trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
+}
+
static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
{
if (trace->bpf_obj == NULL)
@@ -3517,6 +3562,20 @@ static void trace__delete_augmented_syscalls(struct trace *trace)
trace->bpf_obj = NULL;
}
#else // HAVE_LIBBPF_SUPPORT
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
+ const char *name __maybe_unused)
+{
+ return NULL;
+}
+
+static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
+{
+}
+
+static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
+{
+}
+
static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
{
return 0;
@@ -3695,7 +3754,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
return __trace__deliver_event(trace, event->event);
}
-static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg)
+static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
{
struct tep_format_field *field;
struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
@@ -3750,7 +3809,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
- fmt = perf_evsel__syscall_arg_fmt(evsel, arg);
+ fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
if (fmt == NULL) {
pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
arg, evsel->name, evsel->filter);
@@ -3801,7 +3860,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
if (new_filter != evsel->filter) {
pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
- perf_evsel__set_filter(evsel, new_filter);
+ evsel__set_filter(evsel, new_filter);
free(new_filter);
}
@@ -3846,24 +3905,23 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
}
if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
- pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
+ pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
if (pgfault_maj == NULL)
goto out_error_mem;
- perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
+ evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
evlist__add(evlist, pgfault_maj);
}
if ((trace->trace_pgfaults & TRACE_PFMIN)) {
- pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
+ pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
if (pgfault_min == NULL)
goto out_error_mem;
- perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
+ evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
evlist__add(evlist, pgfault_min);
}
if (trace->sched &&
- perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
- trace__sched_stat_runtime))
+ evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
/*
* If a global cgroup was set, apply it to all the events without an
@@ -4095,11 +4153,11 @@ out_error_raw_syscalls:
goto out_error;
out_error_mmap:
- perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
+ evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
goto out_error;
out_error_open:
- perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+ evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
out_error:
fprintf(trace->output, "%s\n", errbuf);
@@ -4108,7 +4166,7 @@ out_error:
out_error_apply_filters:
fprintf(trace->output,
"Failed to set filter \"%s\" on event %s with %d (%s)\n",
- evsel->filter, perf_evsel__name(evsel), errno,
+ evsel->filter, evsel__name(evsel), errno,
str_error_r(errno, errbuf, sizeof(errbuf)));
goto out_delete_evlist;
}
@@ -4179,7 +4237,7 @@ static int trace__replay(struct trace *trace)
"syscalls:sys_enter");
if (evsel &&
- (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
+ (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
pr_err("Error during initialize raw_syscalls:sys_enter event\n");
goto out;
@@ -4191,7 +4249,7 @@ static int trace__replay(struct trace *trace)
evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
"syscalls:sys_exit");
if (evsel &&
- (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
+ (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
pr_err("Error during initialize raw_syscalls:sys_exit event\n");
goto out;
@@ -4471,11 +4529,11 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist)
continue;
if (strcmp(evsel->tp_format->system, "syscalls")) {
- perf_evsel__init_tp_arg_scnprintf(evsel);
+ evsel__init_tp_arg_scnprintf(evsel);
continue;
}
- if (perf_evsel__init_syscall_tp(evsel))
+ if (evsel__init_syscall_tp(evsel))
return -1;
if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
@@ -4605,26 +4663,6 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u
return 0;
}
-static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
-{
- if (trace->bpf_obj == NULL)
- return NULL;
-
- return bpf_object__find_map_by_name(trace->bpf_obj, name);
-}
-
-static void trace__set_bpf_map_filtered_pids(struct trace *trace)
-{
- trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
-}
-
-static void trace__set_bpf_map_syscalls(struct trace *trace)
-{
- trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
- trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
- trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
-}
-
static int trace__config(const char *var, const char *value, void *arg)
{
struct trace *trace = arg;
@@ -4778,7 +4816,7 @@ int cmd_trace(int argc, const char **argv)
"per thread proc mmap processing timeout in ms"),
OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
trace__parse_cgroups),
- OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
+ OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
"ms to wait before starting measurement after program "
"start"),
OPTS_EVSWITCH(&trace.evswitch),
@@ -4989,7 +5027,7 @@ int cmd_trace(int argc, const char **argv)
*/
if (trace.syscalls.events.augmented) {
evlist__for_each_entry(trace.evlist, evsel) {
- bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
+ bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
if (raw_syscalls_sys_exit) {
trace.raw_augmented_syscalls = true;
@@ -4997,10 +5035,10 @@ int cmd_trace(int argc, const char **argv)
}
if (trace.syscalls.events.augmented->priv == NULL &&
- strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) {
+ strstr(evsel__name(evsel), "syscalls:sys_enter")) {
struct evsel *augmented = trace.syscalls.events.augmented;
- if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
- perf_evsel__init_augmented_syscall_tp_args(augmented))
+ if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
+ evsel__init_augmented_syscall_tp_args(augmented))
goto out;
/*
* Augmented is __augmented_syscalls__ BPF_OUTPUT event
@@ -5014,16 +5052,16 @@ int cmd_trace(int argc, const char **argv)
* as not to filter it, then we'll handle it just like we would
* for the BPF_OUTPUT one:
*/
- if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) ||
- perf_evsel__init_augmented_syscall_tp_args(evsel))
+ if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
+ evsel__init_augmented_syscall_tp_args(evsel))
goto out;
evsel->handler = trace__sys_enter;
}
- if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+ if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
struct syscall_tp *sc;
init_augmented_syscall_tp:
- if (perf_evsel__init_augmented_syscall_tp(evsel, evsel))
+ if (evsel__init_augmented_syscall_tp(evsel, evsel))
goto out;
sc = __evsel__syscall_tp(evsel);
/*
@@ -5047,7 +5085,7 @@ init_augmented_syscall_tp:
*/
if (trace.raw_augmented_syscalls)
trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
- perf_evsel__init_augmented_syscall_tp_ret(evsel);
+ evsel__init_augmented_syscall_tp_ret(evsel);
evsel->handler = trace__sys_exit;
}
}
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index 05cf2af9e2c2..d09ec2f03071 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -60,7 +60,6 @@ static void library_status(void)
STATUS(HAVE_DWARF_SUPPORT, dwarf);
STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations);
STATUS(HAVE_GLIBC_SUPPORT, glibc);
- STATUS(HAVE_GTK2_SUPPORT, gtk2);
#ifndef HAVE_SYSCALL_TABLE_SUPPORT
STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit);
#endif
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index cf147db4e5ca..15ecb1803fb9 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -75,6 +75,15 @@ include/uapi/asm-generic/mman-common.h
include/uapi/asm-generic/unistd.h
'
+# These copies are under tools/perf/trace/beauty/ as they are not used to in
+# building object files only by scripts in tools/perf/trace/beauty/ to generate
+# tables that then gets included in .c files for things like id->string syscall
+# tables (and the reverse lookup as well: string -> id)
+
+BEAUTY_FILES='
+include/linux/socket.h
+'
+
check_2 () {
file1=$1
file2=$2
@@ -100,6 +109,14 @@ check () {
check_2 tools/$file $file $*
}
+beauty_check () {
+ file=$1
+
+ shift
+
+ check_2 tools/perf/trace/beauty/$file $file $*
+}
+
# Check if we have the kernel headers (tools/perf/../../include), else
# we're probably on a detached tarball, so no point in trying to check
# differences.
@@ -128,4 +145,12 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in
# diff non-symmetric files
check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
+for i in $BEAUTY_FILES; do
+ beauty_check $i -B
+done
+
+# check duplicated library files
+check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
+check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
+
cd tools/perf
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 0453ba26cdbd..a42fab308ff6 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -258,7 +258,8 @@ gets schedule to. Per task counters can be created by any user, for
their own tasks.
A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
-all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+all events on CPU-x. Per CPU counters need CAP_PERFMON or CAP_SYS_ADMIN
+privilege.
The 'flags' parameter is currently unused and must be zero.
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index c441a34cb1c0..fcca275e5bf9 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -32,34 +32,41 @@ static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
#ifdef HAVE_JVMTI_CMLR
static jvmtiError
-do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
- jvmti_line_info_t *tab, jint *nr)
+do_get_line_number(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
+ jvmti_line_info_t *tab)
{
- jint i, lines = 0;
- jint nr_lines = 0;
+ jint i, nr_lines = 0;
jvmtiLineNumberEntry *loc_tab = NULL;
jvmtiError ret;
+ jint src_line = -1;
ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
- if (ret != JVMTI_ERROR_NONE) {
+ if (ret == JVMTI_ERROR_ABSENT_INFORMATION || ret == JVMTI_ERROR_NATIVE_METHOD) {
+ /* No debug information for this method */
+ return ret;
+ } else if (ret != JVMTI_ERROR_NONE) {
print_error(jvmti, "GetLineNumberTable", ret);
return ret;
}
- for (i = 0; i < nr_lines; i++) {
- if (loc_tab[i].start_location < bci) {
- tab[lines].pc = (unsigned long)pc;
- tab[lines].line_number = loc_tab[i].line_number;
- tab[lines].discrim = 0; /* not yet used */
- tab[lines].methodID = m;
- lines++;
- } else {
- break;
- }
+ for (i = 0; i < nr_lines && loc_tab[i].start_location <= bci; i++) {
+ src_line = i;
+ }
+
+ if (src_line != -1) {
+ tab->pc = (unsigned long)pc;
+ tab->line_number = loc_tab[src_line].line_number;
+ tab->discrim = 0; /* not yet used */
+ tab->methodID = m;
+
+ ret = JVMTI_ERROR_NONE;
+ } else {
+ ret = JVMTI_ERROR_ABSENT_INFORMATION;
}
+
(*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab);
- *nr = lines;
- return JVMTI_ERROR_NONE;
+
+ return ret;
}
static jvmtiError
@@ -67,9 +74,8 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
{
const jvmtiCompiledMethodLoadRecordHeader *hdr;
jvmtiCompiledMethodLoadInlineRecord *rec;
- jvmtiLineNumberEntry *lne = NULL;
PCStackInfo *c;
- jint nr, ret;
+ jint ret;
int nr_total = 0;
int i, lines_total = 0;
@@ -82,21 +88,7 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
- for (i = 0; i < rec->numpcs; i++) {
- c = rec->pcinfo + i;
- nr = 0;
- /*
- * unfortunately, need a tab to get the number of lines!
- */
- ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne);
- if (ret == JVMTI_ERROR_NONE) {
- /* free what was allocated for nothing */
- (*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
- nr_total += (int)nr;
- } else {
- print_error(jvmti, "GetLineNumberTable", ret);
- }
- }
+ nr_total += rec->numpcs;
}
}
@@ -115,14 +107,17 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
for (i = 0; i < rec->numpcs; i++) {
c = rec->pcinfo + i;
- nr = 0;
- ret = do_get_line_numbers(jvmti, c->pc,
- c->methods[0],
- c->bcis[0],
- *tab + lines_total,
- &nr);
+ /*
+ * c->methods is the stack of inlined method calls
+ * at c->pc. [0] is the leaf method. Caller frames
+ * are ignored at the moment.
+ */
+ ret = do_get_line_number(jvmti, c->pc,
+ c->methods[0],
+ c->bcis[0],
+ *tab + lines_total);
if (ret == JVMTI_ERROR_NONE)
- lines_total += nr;
+ lines_total++;
}
}
}
@@ -246,8 +241,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
char *class_sign = NULL;
char *func_name = NULL;
char *func_sign = NULL;
- char *file_name = NULL;
- char fn[PATH_MAX];
uint64_t addr = (uint64_t)(uintptr_t)code_addr;
jvmtiError ret;
int nr_lines = 0; /* in line_tab[] */
@@ -264,7 +257,9 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
if (has_line_numbers && map && map_length) {
ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines);
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: cannot get line table for method");
+ if (ret != JVMTI_ERROR_NOT_FOUND) {
+ warnx("jvmti: cannot get line table for method");
+ }
nr_lines = 0;
} else if (nr_lines > 0) {
line_file_names = malloc(sizeof(char*) * nr_lines);
@@ -282,12 +277,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
}
}
- ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
- if (ret != JVMTI_ERROR_NONE) {
- print_error(jvmti, "GetSourceFileName", ret);
- goto error;
- }
-
ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
&class_sign, NULL);
if (ret != JVMTI_ERROR_NONE) {
@@ -302,8 +291,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
goto error;
}
- copy_class_filename(class_sign, file_name, fn, PATH_MAX);
-
/*
* write source line info record if we have it
*/
@@ -323,7 +310,6 @@ error:
(*jvmti)->Deallocate(jvmti, (unsigned char *)func_name);
(*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign);
(*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
- (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
free(line_tab);
while (line_file_names && (nr_lines > 0)) {
if (line_file_names[nr_lines - 1]) {
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 15e458e150bd..7a2264e1e4e1 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -9,31 +9,13 @@
struct perf_event_attr;
-extern bool test_attr__enabled;
-void test_attr__ready(void);
-void test_attr__init(void);
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
- int fd, int group_fd, unsigned long flags);
-
-#ifndef HAVE_ATTR_TEST
-#define HAVE_ATTR_TEST 1
-#endif
-
static inline int
sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
- int fd;
-
- fd = syscall(__NR_perf_event_open, attr, pid, cpu,
- group_fd, flags);
-
-#if HAVE_ATTR_TEST
- if (unlikely(test_attr__enabled))
- test_attr__open(attr, pid, cpu, fd, group_fd, flags);
-#endif
- return fd;
+ return syscall(__NR_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
}
#endif /* _PERF_SYS_H */
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/cache.json b/tools/perf/pmu-events/arch/powerpc/power8/cache.json
index 6b792b2c87e2..05a17084d939 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/cache.json
@@ -32,8 +32,8 @@
{
"EventCode": "0x1c04e",
"EventName": "PM_DATA_FROM_L2MISS_MOD",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to a demand load",
- "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
},
{
"EventCode": "0x3c040",
@@ -74,8 +74,8 @@
{
"EventCode": "0x4c04e",
"EventName": "PM_DATA_FROM_L3MISS_MOD",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a demand load",
- "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
},
{
"EventCode": "0x3c042",
@@ -134,7 +134,7 @@
{
"EventCode": "0x4e04e",
"EventName": "PM_DPTEG_FROM_L3MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a data side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request",
"PublicDescription": ""
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/frontend.json b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json
index 1ddc30655d43..1c902a8263b6 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json
@@ -116,8 +116,8 @@
{
"EventCode": "0x1404e",
"EventName": "PM_INST_FROM_L2MISS",
- "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to an instruction fetch (not prefetch)",
- "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
},
{
"EventCode": "0x34040",
@@ -158,8 +158,8 @@
{
"EventCode": "0x4404e",
"EventName": "PM_INST_FROM_L3MISS_MOD",
- "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch",
- "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
},
{
"EventCode": "0x34042",
@@ -320,7 +320,7 @@
{
"EventCode": "0x1504e",
"EventName": "PM_IPTEG_FROM_L2MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a instruction side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request",
"PublicDescription": ""
},
{
@@ -344,7 +344,7 @@
{
"EventCode": "0x4504e",
"EventName": "PM_IPTEG_FROM_L3MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a instruction side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request",
"PublicDescription": ""
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/marked.json b/tools/perf/pmu-events/arch/powerpc/power8/marked.json
index 94dc58b83b7e..6de61a797bbd 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/marked.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/marked.json
@@ -92,7 +92,7 @@
{
"EventCode": "0x4c12e",
"EventName": "PM_MRK_DATA_FROM_L2MISS_CYC",
- "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L2 due to a marked load",
+ "BriefDescription": "Duration in cycles to reload from a location other than the local core's L2 due to a marked load",
"PublicDescription": ""
},
{
@@ -158,13 +158,13 @@
{
"EventCode": "0x201e4",
"EventName": "PM_MRK_DATA_FROM_L3MISS",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a marked load",
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load",
"PublicDescription": ""
},
{
"EventCode": "0x2d12e",
"EventName": "PM_MRK_DATA_FROM_L3MISS_CYC",
- "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L3 due to a marked load",
+ "BriefDescription": "Duration in cycles to reload from a location other than the local core's L3 due to a marked load",
"PublicDescription": ""
},
{
@@ -392,7 +392,7 @@
{
"EventCode": "0x1f14e",
"EventName": "PM_MRK_DPTEG_FROM_L2MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a marked data side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request",
"PublicDescription": ""
},
{
@@ -416,7 +416,7 @@
{
"EventCode": "0x4f14e",
"EventName": "PM_MRK_DPTEG_FROM_L3MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a marked data side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request",
"PublicDescription": ""
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
index bffb2d4a6420..fc4aa6c2ddc9 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
@@ -169,7 +169,7 @@
},
{
"BriefDescription": "Cycles GCT empty where dispatch was held",
- "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL)",
+ "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "gct_empty_disp_held_cpi"
},
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json
index f4e760cab111..84a0cedf1fd9 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/other.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json
@@ -410,8 +410,8 @@
{
"EventCode": "0x61c04e",
"EventName": "PM_DATA_ALL_FROM_L2MISS_MOD",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either demand loads or data prefetch",
- "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
},
{
"EventCode": "0x63c040",
@@ -470,8 +470,8 @@
{
"EventCode": "0x64c04e",
"EventName": "PM_DATA_ALL_FROM_L3MISS_MOD",
- "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either demand loads or data prefetch",
- "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
},
{
"EventCode": "0x63c042",
@@ -1280,8 +1280,8 @@
{
"EventCode": "0x51404e",
"EventName": "PM_INST_ALL_FROM_L2MISS",
- "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to instruction fetches and prefetches",
- "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
},
{
"EventCode": "0x534040",
@@ -1340,8 +1340,8 @@
{
"EventCode": "0x54404e",
"EventName": "PM_INST_ALL_FROM_L3MISS_MOD",
- "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch",
- "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
},
{
"EventCode": "0x534042",
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/translation.json b/tools/perf/pmu-events/arch/powerpc/power8/translation.json
index 623e7475b010..a1657f5fdc6b 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/translation.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/translation.json
@@ -44,7 +44,7 @@
{
"EventCode": "0x1e04e",
"EventName": "PM_DPTEG_FROM_L2MISS",
- "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a data side request",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request",
"PublicDescription": ""
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
index 811c2a8c1c9e..f8784c608479 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
@@ -60,7 +60,7 @@
},
{
"BriefDescription": "Stalls due to short latency decimal floating ops.",
- "MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dfu_stall_cpi - dflong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dfu_other_stall_cpi"
},
@@ -72,7 +72,7 @@
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache",
- "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dmiss_non_local_stall_cpi - dmiss_remote_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_distant_stall_cpi"
},
@@ -90,7 +90,7 @@
},
{
"BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict",
- "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dmiss_l2l3_stall_cpi - dmiss_l2l3_conflict_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l2l3_noconflict_stall_cpi"
},
@@ -114,7 +114,7 @@
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory",
- "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dmiss_l3miss_stall_cpi - dmiss_l21_l31_stall_cpi - dmiss_lmem_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_non_local_stall_cpi"
},
@@ -126,7 +126,7 @@
},
{
"BriefDescription": "Stalls due to short latency double precision ops.",
- "MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dp_stall_cpi - dplong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dp_other_stall_cpi"
},
@@ -155,7 +155,7 @@
"MetricName": "emq_full_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL",
+ "MetricExpr": "erat_miss_stall_cpi + emq_full_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "emq_stall_cpi"
},
@@ -173,7 +173,7 @@
},
{
"BriefDescription": "Completion stall due to execution units for other reasons.",
- "MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
+ "MetricExpr": "exec_unit_stall_cpi - scalar_stall_cpi - vector_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "exec_unit_other_stall_cpi"
},
@@ -197,7 +197,7 @@
},
{
"BriefDescription": "Stalls due to short latency integer ops",
- "MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "fxu_stall_cpi - fxlong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "fxu_other_stall_cpi"
},
@@ -208,7 +208,85 @@
"MetricName": "fxu_stall_cpi"
},
{
- "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL",
+ "BriefDescription": "Instruction Completion Table empty for this thread due to branch mispred",
+ "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_br_mpred_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss and branch mispred",
+ "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_br_mpred_icmiss_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table other stalls",
+ "MetricExpr": "nothing_dispatched_cpi - ict_noslot_ic_miss_cpi - ict_noslot_br_mpred_icmiss_cpi - ict_noslot_br_mpred_cpi - ict_noslot_disp_held_cpi",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_cyc_other_cpi"
+ },
+ {
+ "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_hb_full_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_issq_cpi"
+ },
+ {
+ "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
+ "MetricExpr": "ict_noslot_disp_held_cpi - ict_noslot_disp_held_hb_full_cpi - ict_noslot_disp_held_sync_cpi - ict_noslot_disp_held_tbegin_cpi - ict_noslot_disp_held_issq_cpi",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_other_cpi"
+ },
+ {
+ "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_sync_cpi"
+ },
+ {
+ "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_tbegin_cpi"
+ },
+ {
+ "BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
+ "MetricExpr": "ict_noslot_ic_miss_cpi - ict_noslot_ic_l3_cpi - ict_noslot_ic_l3miss_cpi",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_ic_l2_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from the local L3",
+ "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_ic_l3_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache",
+ "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_ic_l3miss_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss",
+ "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_ic_miss_cpi"
+ },
+ {
+ "MetricExpr": "ntc_issue_held_darq_full_cpi + ntc_issue_held_arb_cpi + ntc_issue_held_other_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "issue_hold_cpi"
},
@@ -249,7 +327,7 @@
"MetricName": "lrq_other_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL",
+ "MetricExpr": "lmq_full_stall_cpi + st_fwd_stall_cpi + lhs_stall_cpi + lsu_mfspr_stall_cpi + larx_stall_cpi + lrq_other_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "lrq_stall_cpi"
},
@@ -260,7 +338,7 @@
"MetricName": "lsaq_arb_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL",
+ "MetricExpr": "lrq_full_stall_cpi + srq_full_stall_cpi + lsaq_arb_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsaq_stall_cpi"
},
@@ -284,7 +362,7 @@
},
{
"BriefDescription": "Completion LSU stall for other reasons",
- "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL",
+ "MetricExpr": "lsu_stall_cpi - lsu_fin_stall_cpi - store_finish_stall_cpi - srq_stall_cpi - load_finish_stall_cpi + lsu_stall_dcache_miss_cpi - lrq_stall_cpi + emq_stall_cpi - lsaq_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_other_stall_cpi"
},
@@ -313,7 +391,7 @@
"MetricName": "nested_tend_stall_cpi"
},
{
- "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread",
+ "BriefDescription": "Number of cycles the Instruction Completion Table has no itags assigned to this thread",
"MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "nothing_dispatched_cpi"
@@ -356,13 +434,13 @@
},
{
"BriefDescription": "Cycles unaccounted for.",
- "MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL",
+ "MetricExpr": "run_cpi - completion_cpi - thread_block_stall_cpi - stall_cpi - nothing_dispatched_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "other_cpi"
},
{
"BriefDescription": "Completion stall for other reasons",
- "MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
+ "MetricExpr": "stall_cpi - ntc_disp_fin_stall_cpi - ntc_flush_stall_cpi - lsu_stall_cpi - exec_unit_stall_cpi - bru_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "other_stall_cpi"
},
@@ -391,7 +469,7 @@
"MetricName": "run_cyc_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL",
+ "MetricExpr": "fxu_stall_cpi + dp_stall_cpi + dfu_stall_cpi + pm_stall_cpi + crypto_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "scalar_stall_cpi"
},
@@ -414,7 +492,7 @@
"MetricName": "srq_full_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL",
+ "MetricExpr": "store_data_stall_cpi + eieio_stall_cpi + stcx_stall_cpi + slb_stall_cpi + tend_stall_cpi + paste_stall_cpi + tlbie_stall_cpi + store_pipe_arb_stall_cpi + store_fin_arb_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "srq_stall_cpi"
},
@@ -425,7 +503,7 @@
"MetricName": "st_fwd_stall_cpi"
},
{
- "BriefDescription": "Nothing completed and ICT not empty",
+ "BriefDescription": "Nothing completed and Instruction Completion Table not empty",
"MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "stall_cpi"
@@ -480,7 +558,7 @@
},
{
"BriefDescription": "Vector stalls due to small latency double precision ops",
- "MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "vdp_stall_cpi - vdplong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "vdp_other_stall_cpi"
},
@@ -497,7 +575,7 @@
"MetricName": "vdplong_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
+ "MetricExpr": "vfxu_stall_cpi + vdp_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "vector_stall_cpi"
},
@@ -509,7 +587,7 @@
},
{
"BriefDescription": "Vector stalls due to small latency integer ops",
- "MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "vfxu_stall_cpi - vfxlong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "vfxu_other_stall_cpi"
},
@@ -1766,7 +1844,7 @@
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
- "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL",
+ "MetricExpr": "dl1_reload_from_l31_mod_rate_percent + dl1_reload_from_l31_shr_rate_percent",
"MetricName": "dl1_reload_from_l31_rate_percent"
},
{
@@ -1820,71 +1898,6 @@
"MetricName": "fxu_all_idle"
},
{
- "BriefDescription": "Ict empty for this thread due to branch mispred",
- "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_br_mpred_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred",
- "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_br_mpred_icmiss_cpi"
- },
- {
- "BriefDescription": "ICT other stalls",
- "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_cyc_other_cpi"
- },
- {
- "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_hb_full_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_issq_cpi"
- },
- {
- "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
- "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_other_cpi"
- },
- {
- "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_sync_cpi"
- },
- {
- "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_tbegin_cpi"
- },
- {
- "BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
- "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_ic_l2_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3",
- "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_ic_l3_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache",
- "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_ic_l3miss_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to Icache Miss",
- "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_ic_miss_cpi"
- },
- {
"BriefDescription": "Rate of IERAT reloads from L2",
"MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_l2_rate_percent"
@@ -1966,7 +1979,7 @@
},
{
"BriefDescription": "Completion stall because a different thread was using the completion pipe",
- "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL",
+ "MetricExpr": "thread_block_stall_cpi - exception_stall_cpi - any_sync_stall_cpi - sync_pmu_int_stall_cpi - spec_finish_stall_cpi - flush_any_thread_stall_cpi - lsu_flush_next_stall_cpi - nested_tbegin_stall_cpi - nested_tend_stall_cpi - mtfpscr_stall_cpi",
"MetricName": "other_thread_cmpl_stall"
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
new file mode 100644
index 000000000000..7a5d1bf543f8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
@@ -0,0 +1,63 @@
+[
+ {
+ "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)",
+ "MetricName": "Memory_RD_BW_Chip",
+ "MetricGroup": "Memory_BW",
+ "ScaleUnit": "1.6e-2MB",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricExpr": "(hv_24x7@PM_MCS01_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_WR_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT23\\,chip\\=?@ )",
+ "MetricName": "Memory_WR_BW_Chip",
+ "MetricGroup": "Memory_BW",
+ "ScaleUnit": "1.6e-2MB",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )",
+ "MetricName": "PowerBUS_Frequency",
+ "ScaleUnit": "2.5e-7GHz",
+ "AggregationMode": "PerChip"
+ },
+ {
+ "MetricExpr": "(hv_24x7@CPM_CS_32MHZ_CYC\\,domain\\=3\\,core\\=?@ )",
+ "MetricName": "CPM_CS_32MHZ_CYC",
+ "ScaleUnit": "1MHz",
+ "AggregationMode": "PerCore"
+ },
+ {
+ "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@",
+ "MetricName" : "mcs01-read",
+ "MetricGroup" : "memory-bandwidth",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@",
+ "MetricName" : "mcs23-read",
+ "MetricGroup" : "memory-bandwidth",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@",
+ "MetricName" : "mcs01-write",
+ "MetricGroup" : "memory-bandwidth",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@",
+ "MetricName" : "mcs23-write",
+ "MetricGroup" : "memory-bandwidth",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_powerbus0_imc@PM_PB_CYC@",
+ "MetricName" : "powerbus_freq",
+ "ScaleUnit": "1e-9GHz"
+ },
+ {
+ "MetricExpr" : "(nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@)",
+ "MetricName" : "Memory-bandwidth-MCS",
+ "MetricGroup" : "memory-bandwidth",
+ "ScaleUnit": "6.1e-5MB"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
index 2df2e231e9ee..24c4ba2a9ae5 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
@@ -380,7 +380,7 @@
{
"Unit": "CPU-M-CF",
"EventCode": "265",
- "EventName": "DFLT_CCERROR",
+ "EventName": "DFLT_CCFINISH",
"BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2",
"PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2"
},
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
index a9943eeb8d6b..4ceb67a0db21 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
@@ -19,5 +19,10 @@
"EventName": "bp_de_redirect",
"EventCode": "0x91",
"BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)."
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB."
}
]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
index 404d4c569c01..4ea7ec4f496e 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
@@ -118,6 +118,11 @@
"UMask": "0x1"
},
{
+ "EventName": "l2_request_g1.all_no_prefetch",
+ "EventCode": "0x60",
+ "UMask": "0xf9"
+ },
+ {
"EventName": "l2_request_g2.group1",
"EventCode": "0x61",
"BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
@@ -244,12 +249,48 @@
"UMask": "0x1"
},
{
+ "EventName": "l2_cache_req_stat.ic_access_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.",
+ "UMask": "0x7"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).",
+ "UMask": "0x9"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).",
+ "UMask": "0xf6"
+ },
+ {
"EventName": "l2_fill_pending.l2_fill_busy",
"EventCode": "0x6d",
"BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
"UMask": "0x1"
},
{
+ "EventName": "l2_pf_hit_l2",
+ "EventCode": "0x70",
+ "BriefDescription": "L2 prefetch hit in L2.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_hit_l3",
+ "EventCode": "0x71",
+ "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_l3",
+ "EventCode": "0x72",
+ "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.",
+ "UMask": "0xff"
+ },
+ {
"EventName": "l3_request_g1.caching_l3_cache_accesses",
"EventCode": "0x01",
"BriefDescription": "Caching: L3 cache accesses",
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
index 7e1aa8273935..653b11b23399 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/core.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
@@ -61,7 +61,7 @@
{
"EventName": "ex_ret_brn_ind_misp",
"EventCode": "0xca",
- "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.",
+ "BriefDescription": "Retired Indirect Branch Instructions Mispredicted."
},
{
"EventName": "ex_ret_mmx_fp_instr.sse_instr",
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json
new file mode 100644
index 000000000000..40271df40015
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json
@@ -0,0 +1,98 @@
+[
+ {
+ "EventName": "remote_outbound_data_controller_0",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0",
+ "EventCode": "0x7c7",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_1",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1",
+ "EventCode": "0x807",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_2",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2",
+ "EventCode": "0x847",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_3",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3",
+ "EventCode": "0x887",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_0",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x07",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_1",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x47",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_2",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x87",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_3",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0xc7",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_4",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x107",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_5",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x147",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_6",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x187",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_7",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x1c7",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
new file mode 100644
index 000000000000..2cfe2d2f3bfd
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
@@ -0,0 +1,178 @@
+[
+ {
+ "MetricName": "branch_misprediction_ratio",
+ "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)",
+ "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)",
+ "MetricGroup": "branch_prediction",
+ "ScaleUnit": "100%"
+ },
+ {
+ "EventName": "all_dc_accesses",
+ "EventCode": "0x29",
+ "BriefDescription": "All L1 Data Cache Accesses",
+ "UMask": "0x7"
+ },
+ {
+ "MetricName": "all_l2_cache_accesses",
+ "BriefDescription": "All L2 Cache Accesses",
+ "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_accesses_from_ic_misses",
+ "EventCode": "0x60",
+ "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_cache_accesses_from_dc_misses",
+ "EventCode": "0x60",
+ "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)",
+ "UMask": "0xc8"
+ },
+ {
+ "MetricName": "l2_cache_accesses_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Accesses from L2 HWPF",
+ "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "MetricName": "all_l2_cache_misses",
+ "BriefDescription": "All L2 Cache Misses",
+ "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_misses_from_ic_miss",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses",
+ "UMask": "0x01"
+ },
+ {
+ "EventName": "l2_cache_misses_from_dc_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses",
+ "UMask": "0x08"
+ },
+ {
+ "MetricName": "l2_cache_misses_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Misses from L2 HWPF",
+ "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "MetricName": "all_l2_cache_hits",
+ "BriefDescription": "All L2 Cache Hits",
+ "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_hits_from_ic_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses",
+ "UMask": "0x06"
+ },
+ {
+ "EventName": "l2_cache_hits_from_dc_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses",
+ "UMask": "0x70"
+ },
+ {
+ "MetricName": "l2_cache_hits_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Hits from L2 HWPF",
+ "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l3_accesses",
+ "EventCode": "0x04",
+ "BriefDescription": "L3 Accesses",
+ "UMask": "0xff",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_misses",
+ "EventCode": "0x04",
+ "BriefDescription": "L3 Misses (includes Chg2X)",
+ "UMask": "0x01",
+ "Unit": "L3PMC"
+ },
+ {
+ "MetricName": "l3_read_miss_latency",
+ "BriefDescription": "Average L3 Read Miss Latency (in core clocks)",
+ "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1.all_l3_miss_req_typs",
+ "MetricGroup": "l3_cache",
+ "ScaleUnit": "1core clocks"
+ },
+ {
+ "MetricName": "ic_fetch_miss_ratio",
+ "BriefDescription": "L1 Instruction Cache (32B) Fetch Miss Ratio",
+ "MetricExpr": "d_ratio(l2_cache_req_stat.ic_access_in_l2, bp_l1_tlb_fetch_hit + bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_miss)",
+ "MetricGroup": "l2_cache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "MetricName": "l1_itlb_misses",
+ "BriefDescription": "L1 ITLB Misses",
+ "MetricExpr": "bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_miss",
+ "MetricGroup": "tlb"
+ },
+ {
+ "EventName": "l2_itlb_misses",
+ "EventCode": "0x85",
+ "BriefDescription": "L2 ITLB Misses & Instruction page walks",
+ "UMask": "0x07"
+ },
+ {
+ "EventName": "l1_dtlb_misses",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Misses",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_dtlb_misses",
+ "EventCode": "0x45",
+ "BriefDescription": "L2 DTLB Misses & Data page walks",
+ "UMask": "0xf0"
+ },
+ {
+ "EventName": "all_tlbs_flushed",
+ "EventCode": "0x78",
+ "BriefDescription": "All TLBs Flushed",
+ "UMask": "0xdf"
+ },
+ {
+ "EventName": "uops_dispatched",
+ "EventCode": "0xaa",
+ "BriefDescription": "Micro-ops Dispatched",
+ "UMask": "0x03"
+ },
+ {
+ "EventName": "sse_avx_stalls",
+ "EventCode": "0x0e",
+ "BriefDescription": "Mixed SSE/AVX Stalls",
+ "UMask": "0x0e"
+ },
+ {
+ "EventName": "uops_retired",
+ "EventCode": "0xc1",
+ "BriefDescription": "Micro-ops Retired"
+ },
+ {
+ "MetricName": "all_remote_links_outbound",
+ "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)",
+ "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3",
+ "MetricGroup": "data_fabric",
+ "PerPkg": "1",
+ "ScaleUnit": "3e-5MiB"
+ },
+ {
+ "MetricName": "nps1_die_to_dram",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+ "MetricGroup": "data_fabric",
+ "PerPkg": "1",
+ "ScaleUnit": "6.1e-5MiB"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
index 1c60bfa0f00b..f61b982f83ca 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
@@ -48,6 +48,11 @@
"UMask": "0x1"
},
{
+ "EventName": "l2_request_g1.all_no_prefetch",
+ "EventCode": "0x60",
+ "UMask": "0xf9"
+ },
+ {
"EventName": "l2_request_g2.group1",
"EventCode": "0x61",
"BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
@@ -174,6 +179,24 @@
"UMask": "0x1"
},
{
+ "EventName": "l2_cache_req_stat.ic_access_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.",
+ "UMask": "0x7"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).",
+ "UMask": "0x9"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).",
+ "UMask": "0xf6"
+ },
+ {
"EventName": "l2_fill_pending.l2_fill_busy",
"EventCode": "0x6d",
"BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
index de89e5a44ff1..4b75183da94a 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen2/core.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
@@ -125,6 +125,6 @@
{
"EventName": "ex_ret_fus_brnch_inst",
"EventCode": "0x1d0",
- "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.",
+ "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8."
}
]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json
new file mode 100644
index 000000000000..40271df40015
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json
@@ -0,0 +1,98 @@
+[
+ {
+ "EventName": "remote_outbound_data_controller_0",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0",
+ "EventCode": "0x7c7",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_1",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1",
+ "EventCode": "0x807",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_2",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2",
+ "EventCode": "0x847",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "remote_outbound_data_controller_3",
+ "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3",
+ "EventCode": "0x887",
+ "UMask": "0x02",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_0",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x07",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_1",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x47",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_2",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x87",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_3",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0xc7",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_4",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x107",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_5",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x147",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_6",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x187",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ },
+ {
+ "EventName": "dram_channel_data_controller_7",
+ "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0",
+ "EventCode": "0x1c7",
+ "UMask": "0x38",
+ "PerPkg": "1",
+ "Unit": "DFPMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
new file mode 100644
index 000000000000..2ef91e25e661
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
@@ -0,0 +1,178 @@
+[
+ {
+ "MetricName": "branch_misprediction_ratio",
+ "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)",
+ "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)",
+ "MetricGroup": "branch_prediction",
+ "ScaleUnit": "100%"
+ },
+ {
+ "EventName": "all_dc_accesses",
+ "EventCode": "0x29",
+ "BriefDescription": "All L1 Data Cache Accesses",
+ "UMask": "0x7"
+ },
+ {
+ "MetricName": "all_l2_cache_accesses",
+ "BriefDescription": "All L2 Cache Accesses",
+ "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_accesses_from_ic_misses",
+ "EventCode": "0x60",
+ "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_cache_accesses_from_dc_misses",
+ "EventCode": "0x60",
+ "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)",
+ "UMask": "0xc8"
+ },
+ {
+ "MetricName": "l2_cache_accesses_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Accesses from L2 HWPF",
+ "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "MetricName": "all_l2_cache_misses",
+ "BriefDescription": "All L2 Cache Misses",
+ "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_misses_from_ic_miss",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses",
+ "UMask": "0x01"
+ },
+ {
+ "EventName": "l2_cache_misses_from_dc_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses",
+ "UMask": "0x08"
+ },
+ {
+ "MetricName": "l2_cache_misses_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Misses from L2 HWPF",
+ "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "MetricName": "all_l2_cache_hits",
+ "BriefDescription": "All L2 Cache Hits",
+ "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l2_cache_hits_from_ic_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses",
+ "UMask": "0x06"
+ },
+ {
+ "EventName": "l2_cache_hits_from_dc_misses",
+ "EventCode": "0x64",
+ "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses",
+ "UMask": "0x70"
+ },
+ {
+ "MetricName": "l2_cache_hits_from_l2_hwpf",
+ "BriefDescription": "L2 Cache Hits from L2 HWPF",
+ "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
+ "MetricGroup": "l2_cache"
+ },
+ {
+ "EventName": "l3_accesses",
+ "EventCode": "0x04",
+ "BriefDescription": "L3 Accesses",
+ "UMask": "0xff",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_misses",
+ "EventCode": "0x04",
+ "BriefDescription": "L3 Misses (includes Chg2X)",
+ "UMask": "0x01",
+ "Unit": "L3PMC"
+ },
+ {
+ "MetricName": "l3_read_miss_latency",
+ "BriefDescription": "Average L3 Read Miss Latency (in core clocks)",
+ "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1.all_l3_miss_req_typs",
+ "MetricGroup": "l3_cache",
+ "ScaleUnit": "1core clocks"
+ },
+ {
+ "MetricName": "ic_fetch_miss_ratio",
+ "BriefDescription": "L1 Instruction Cache (32B) Fetch Miss Ratio",
+ "MetricExpr": "d_ratio(l2_cache_req_stat.ic_access_in_l2, bp_l1_tlb_fetch_hit + bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_tlb_miss)",
+ "MetricGroup": "l2_cache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "MetricName": "l1_itlb_misses",
+ "BriefDescription": "L1 ITLB Misses",
+ "MetricExpr": "bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_tlb_miss",
+ "MetricGroup": "tlb"
+ },
+ {
+ "EventName": "l2_itlb_misses",
+ "EventCode": "0x85",
+ "BriefDescription": "L2 ITLB Misses & Instruction page walks",
+ "UMask": "0x07"
+ },
+ {
+ "EventName": "l1_dtlb_misses",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Misses",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_dtlb_misses",
+ "EventCode": "0x45",
+ "BriefDescription": "L2 DTLB Misses & Data page walks",
+ "UMask": "0xf0"
+ },
+ {
+ "EventName": "all_tlbs_flushed",
+ "EventCode": "0x78",
+ "BriefDescription": "All TLBs Flushed",
+ "UMask": "0xdf"
+ },
+ {
+ "EventName": "uops_dispatched",
+ "EventCode": "0xaa",
+ "BriefDescription": "Micro-ops Dispatched",
+ "UMask": "0x03"
+ },
+ {
+ "EventName": "sse_avx_stalls",
+ "EventCode": "0x0e",
+ "BriefDescription": "Mixed SSE/AVX Stalls",
+ "UMask": "0x0e"
+ },
+ {
+ "EventName": "uops_retired",
+ "EventCode": "0xc1",
+ "BriefDescription": "Micro-ops Retired"
+ },
+ {
+ "MetricName": "all_remote_links_outbound",
+ "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)",
+ "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3",
+ "MetricGroup": "data_fabric",
+ "PerPkg": "1",
+ "ScaleUnit": "3e-5MiB"
+ },
+ {
+ "MetricName": "nps1_die_to_dram",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+ "MetricGroup": "data_fabric",
+ "PerPkg": "1",
+ "ScaleUnit": "6.1e-5MiB"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json
index 3fba310a5012..3c0f5837480f 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json
@@ -8064,6 +8064,20 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "Deprecated": "1",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1000020004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
@@ -9256,20 +9270,6 @@
"UMask": "0x1"
},
{
- "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
- "Counter": "0,1,2,3",
- "CounterHTOff": "0,1,2,3",
- "Deprecated": "1",
- "EventCode": "0xB7, 0xBB",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x1000020004",
- "Offcore": "1",
- "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
- "SampleAfterValue": "100003",
- "UMask": "0x1"
- },
- {
"BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 7fde0d2943cd..de3193552277 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -4,14 +4,14 @@
"MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
"MetricGroup": "TopdownL1",
"MetricName": "Frontend_Bound",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Frontend_Bound_SMT",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
@@ -22,13 +22,14 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Bad_Speculation_SMT",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
"MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
"MetricGroup": "TopdownL1",
"MetricName": "Backend_Bound",
@@ -36,7 +37,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+ "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Backend_Bound_SMT",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
@@ -50,7 +51,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Retiring_SMT",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
@@ -58,7 +59,7 @@
{
"BriefDescription": "Instructions Per Cycle (per Logical Processor)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "TopDownL1",
+ "MetricGroup": "Summary",
"MetricName": "IPC"
},
{
@@ -74,24 +75,6 @@
"MetricName": "IpTB"
},
{
- "BriefDescription": "Branch instructions per taken branch. ",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;PGO",
- "MetricName": "BpTB"
- },
- {
- "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
- "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )",
- "MetricGroup": "PGO;IcMiss",
- "MetricName": "IFetch_Line_Utilization"
- },
- {
- "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
- "MetricGroup": "DSB;Fetch_BW",
- "MetricName": "DSB_Coverage"
- },
- {
"BriefDescription": "Cycles Per Instruction (per Logical Processor)",
"MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary",
@@ -104,86 +87,110 @@
"MetricName": "CLKS"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
"MetricExpr": "4 * cycles",
"MetricGroup": "TopDownL1",
"MetricName": "SLOTS"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
- "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "TopDownL1_SMT",
"MetricName": "SLOTS_SMT"
},
{
- "BriefDescription": "Instructions per Load (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "Instruction_Type",
- "MetricName": "IpL"
+ "MetricName": "IpLoad"
},
{
- "BriefDescription": "Instructions per Store (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "Instruction_Type",
- "MetricName": "IpS"
+ "MetricName": "IpStore"
},
{
- "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Instruction_Type",
- "MetricName": "IpB"
+ "MetricName": "IpBranch"
},
{
- "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches",
"MetricName": "IpCall"
},
{
+ "BriefDescription": "Branch instructions per taken branch. ",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;PGO",
+ "MetricName": "BpTkBranch"
+ },
+ {
+ "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+ "MetricGroup": "FLOPS;FP_Arith;Instruction_Type",
+ "MetricName": "IpFLOP"
+ },
+ {
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
- "MetricGroup": "Summary",
+ "MetricGroup": "Summary;TopDownL1",
"MetricName": "Instructions"
},
{
+ "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
+ "MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+ "MetricGroup": "LSD",
+ "MetricName": "LSD_Coverage"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+ "MetricGroup": "DSB;Fetch_BW",
+ "MetricName": "DSB_Coverage"
+ },
+ {
"BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / cycles",
- "MetricGroup": "SMT",
+ "MetricGroup": "SMT;TopDownL1",
"MetricName": "CoreIPC"
},
{
"BriefDescription": "Instructions Per Cycle (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
- "MetricGroup": "SMT",
+ "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "SMT;TopDownL1",
"MetricName": "CoreIPC_SMT"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles",
"MetricGroup": "FLOPS",
"MetricName": "FLOPc"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "FLOPS_SMT",
"MetricName": "FLOPc_SMT"
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Pipeline",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
+ "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP"
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "BrMispredicts",
"MetricName": "Branch_Misprediction_Cost"
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "BrMispredicts_SMT",
"MetricName": "Branch_Misprediction_Cost_SMT"
},
@@ -213,14 +220,14 @@
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization",
- "MetricConstraint": "NO_NMI_WATCHDOG"
+ "MetricName": "Page_Walks_Utilization"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
"MetricGroup": "TLB_SMT",
"MetricName": "Page_Walks_Utilization_SMT"
},
@@ -245,7 +252,7 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;Offcore",
"MetricName": "L3_Cache_Access_BW"
},
{
@@ -263,7 +270,7 @@
{
"BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
- "MetricGroup": "Cache_Misses",
+ "MetricGroup": "Cache_Misses;Offcore",
"MetricName": "L2MPKI_All"
},
{
@@ -298,7 +305,7 @@
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
+ "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
"MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs"
},
@@ -310,62 +317,74 @@
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
- "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )",
"MetricGroup": "SMT;Summary",
"MetricName": "SMT_2T_Utilization"
},
{
- "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Summary",
+ "MetricGroup": "OS",
"MetricName": "Kernel_Utilization"
},
{
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;SoC",
"MetricName": "DRAM_BW_Use"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
- "MetricGroup": "Memory_Lat",
- "MetricName": "DRAM_Read_Latency"
+ "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+ "MetricGroup": "Memory_Lat;SoC",
+ "MetricName": "MEM_Read_Latency"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@",
- "MetricGroup": "Memory_BW",
- "MetricName": "DRAM_Parallel_Reads"
+ "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
+ "MetricGroup": "Memory_BW;SoC",
+ "MetricName": "MEM_Parallel_Reads"
},
{
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
- "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0",
- "MetricGroup": "Memory_Lat",
+ "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )",
+ "MetricGroup": "Memory_Lat;SoC;Server",
"MetricName": "MEM_PMM_Read_Latency"
},
{
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
- "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
- "MetricGroup": "Memory_BW",
+ "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )",
+ "MetricGroup": "Memory_BW;SoC;Server",
"MetricName": "PMM_Read_BW"
},
{
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
- "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
- "MetricGroup": "Memory_BW",
+ "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )",
+ "MetricGroup": "Memory_BW;SoC;Server",
"MetricName": "PMM_Write_BW"
},
{
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
+ "MetricGroup": "IO_BW;SoC;Server",
+ "MetricName": "IO_Write_BW"
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
+ "MetricGroup": "IO_BW;SoC;Server",
+ "MetricName": "IO_Read_BW"
+ },
+ {
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cha_0@event\\=0x0@",
- "MetricGroup": "",
+ "MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )",
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )",
- "MetricGroup": "",
+ "MetricGroup": "Branches;OS",
"MetricName": "IpFarBranch"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
index 3553472ad266..0716b2e3ff75 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
@@ -247,6 +247,30 @@
"UMask": "0x10"
},
{
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xAB",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x400106",
+ "PEBS": "2",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -360,6 +384,16 @@
"UMask": "0x24"
},
{
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xE6",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
index cc66a51c6a7b..0c07cb4fbf58 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
@@ -1,6 +1,6 @@
[
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -13,7 +13,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -26,7 +26,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -81,7 +81,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -177,7 +177,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -218,7 +218,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -245,7 +245,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -286,7 +286,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -313,7 +313,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -350,7 +350,7 @@
"UMask": "0x8"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -405,7 +405,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -432,7 +432,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -473,7 +473,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -513,7 +513,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -540,7 +540,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -678,7 +678,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -705,7 +705,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -779,7 +779,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -792,7 +792,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -847,7 +847,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -874,7 +874,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -950,7 +950,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -977,7 +977,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1045,7 +1045,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1058,7 +1058,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1071,7 +1071,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1084,7 +1084,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1097,7 +1097,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1258,7 +1258,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1285,7 +1285,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1298,7 +1298,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1339,7 +1339,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1380,7 +1380,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1393,7 +1393,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1406,7 +1406,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1419,7 +1419,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1432,7 +1432,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1445,7 +1445,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1513,7 +1513,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1540,7 +1540,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1619,7 +1619,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1632,7 +1632,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1645,7 +1645,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1672,7 +1672,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1699,7 +1699,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1712,7 +1712,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1725,7 +1725,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1738,7 +1738,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1765,7 +1765,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_MISS OCR.ALL_READS.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1806,7 +1806,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1833,7 +1833,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1874,7 +1874,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1887,7 +1887,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1942,7 +1942,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1969,7 +1969,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1982,7 +1982,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1995,7 +1995,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2008,7 +2008,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2021,7 +2021,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2048,7 +2048,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2089,7 +2089,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2144,7 +2144,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2185,7 +2185,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2212,7 +2212,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2225,7 +2225,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2241,7 +2241,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
@@ -2290,7 +2291,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2345,7 +2346,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2386,7 +2387,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2399,7 +2400,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2426,7 +2427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2439,7 +2440,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2452,7 +2453,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2493,7 +2494,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2520,7 +2521,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2547,7 +2548,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2560,7 +2561,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2586,7 +2587,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2599,7 +2600,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2612,7 +2613,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2653,7 +2654,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2694,7 +2695,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2707,7 +2708,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2730,7 +2731,7 @@
"UMask": "0x40"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2743,7 +2744,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2756,7 +2757,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2769,7 +2770,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2782,7 +2783,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2809,7 +2810,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2822,7 +2823,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2835,7 +2836,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2848,7 +2849,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2875,7 +2876,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2902,7 +2903,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2915,7 +2916,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2928,7 +2929,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2941,7 +2942,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2954,7 +2955,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2981,7 +2982,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3050,7 +3051,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3104,7 +3105,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3117,7 +3118,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3195,7 +3196,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3222,7 +3223,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3235,7 +3236,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3290,7 +3291,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3373,7 +3374,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3413,7 +3414,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3426,7 +3427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3481,7 +3482,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3522,7 +3523,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3535,7 +3536,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3548,7 +3549,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3561,7 +3562,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3588,7 +3589,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3601,7 +3602,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3614,7 +3615,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3778,7 +3779,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3791,7 +3792,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3804,7 +3805,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3859,7 +3860,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3886,7 +3887,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3899,7 +3900,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3925,7 +3926,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3938,7 +3939,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3979,7 +3980,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4044,7 +4045,7 @@
"UMask": "0x20"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4071,7 +4072,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4111,7 +4112,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4138,7 +4139,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4151,7 +4152,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4164,7 +4165,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4205,7 +4206,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4243,7 +4244,7 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4284,7 +4285,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4297,7 +4298,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4310,7 +4311,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4323,7 +4324,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4336,7 +4337,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4363,7 +4364,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4390,7 +4391,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4503,7 +4504,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4529,7 +4530,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4556,7 +4557,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4569,7 +4570,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4582,7 +4583,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4623,7 +4624,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4636,7 +4637,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4649,7 +4650,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4662,7 +4663,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4685,7 +4686,7 @@
"UMask": "0x80"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4698,7 +4699,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4711,7 +4712,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4724,7 +4725,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_MISS OCR.ALL_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4765,7 +4766,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4802,7 +4803,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4828,7 +4829,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4841,7 +4842,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4909,7 +4910,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4950,7 +4951,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4977,7 +4978,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4990,21 +4991,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
- "Counter": "0,1,2,3",
- "CounterHTOff": "0,1,2,3",
- "Deprecated": "1",
- "EventCode": "0xB7, 0xBB",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0110000001",
- "Offcore": "1",
- "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
- "SampleAfterValue": "100003",
- "UMask": "0x1"
- },
- {
- "BriefDescription": "ALL_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5017,7 +5004,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5030,7 +5017,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5043,7 +5030,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5070,7 +5057,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HITM OCR.ALL_READS.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5097,7 +5084,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5152,7 +5139,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5188,7 +5175,7 @@
"UMask": "0x10"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HITM OCR.ALL_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5201,7 +5188,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5214,7 +5201,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5227,7 +5214,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5240,7 +5227,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5253,7 +5240,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5266,7 +5253,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5335,7 +5322,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5390,7 +5377,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5403,7 +5390,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5419,7 +5406,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
@@ -5444,7 +5432,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5457,7 +5445,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5470,7 +5458,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5510,7 +5498,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5523,7 +5511,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5536,7 +5524,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5549,7 +5537,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5585,7 +5573,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_NONE OCR.ALL_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5598,7 +5586,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5667,7 +5655,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5708,7 +5696,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5747,7 +5735,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5760,7 +5748,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_NONE OCR.ALL_READS.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5829,7 +5817,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5870,7 +5858,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5925,7 +5913,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5938,7 +5926,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5951,7 +5939,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5964,7 +5952,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5977,7 +5965,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6004,7 +5992,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6017,7 +6005,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6030,7 +6018,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6099,7 +6087,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6181,7 +6169,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6222,7 +6210,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6235,7 +6223,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6262,7 +6250,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6359,7 +6347,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6400,7 +6388,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6413,7 +6401,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6426,7 +6414,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6439,7 +6427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6452,7 +6440,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HITM_OTHER_CORE OCR.OTHER.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6496,7 +6484,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
@@ -6592,7 +6581,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
@@ -6640,7 +6630,7 @@
"UMask": "0x20"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6694,7 +6684,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6707,7 +6697,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6763,7 +6753,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6776,7 +6766,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6789,7 +6779,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6816,7 +6806,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6843,7 +6833,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6873,7 +6863,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
@@ -6893,7 +6884,7 @@
"UMask": "0x20"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6962,7 +6953,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6975,7 +6966,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7016,7 +7007,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7029,7 +7020,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7070,7 +7061,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7083,7 +7074,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7096,7 +7087,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7123,7 +7114,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7136,7 +7127,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7149,7 +7140,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7204,7 +7195,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7217,7 +7208,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7243,7 +7234,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
@@ -7254,7 +7246,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7295,7 +7287,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7336,7 +7328,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7405,7 +7397,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7418,7 +7410,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.ANY_SNOOP OCR.OTHER.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7431,7 +7423,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7444,7 +7436,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7457,7 +7449,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7470,7 +7462,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7483,7 +7475,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7524,7 +7516,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7551,7 +7543,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7564,7 +7556,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7577,7 +7569,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7604,7 +7596,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7617,7 +7609,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7658,7 +7650,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7681,7 +7673,7 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7708,7 +7700,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7735,7 +7727,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7762,7 +7754,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7789,7 +7781,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7802,7 +7794,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7815,7 +7807,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7828,7 +7820,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7841,7 +7833,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7854,7 +7846,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7895,7 +7887,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7922,7 +7914,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7949,7 +7941,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7976,19 +7968,6 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
- "Counter": "0,1,2,3",
- "CounterHTOff": "0,1,2,3",
- "EventCode": "0xB7, 0xBB",
- "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0810000001",
- "Offcore": "1",
- "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
- "SampleAfterValue": "100003",
- "UMask": "0x1"
- },
- {
"BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
@@ -8003,7 +7982,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8026,7 +8005,7 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8039,7 +8018,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8070,7 +8049,7 @@
"UMask": "0x10"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8083,7 +8062,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8236,7 +8215,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8249,7 +8228,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8262,7 +8241,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8289,7 +8268,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8344,7 +8323,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8357,7 +8336,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8426,7 +8405,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8439,7 +8418,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8452,7 +8431,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8520,7 +8499,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8630,7 +8609,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8713,7 +8692,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8726,7 +8705,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8766,7 +8745,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8803,7 +8782,7 @@
"UMask": "0x10"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8816,7 +8795,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8857,7 +8836,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8968,7 +8947,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8981,7 +8960,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8994,7 +8973,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9021,7 +9000,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9047,7 +9026,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9060,7 +9039,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9073,7 +9052,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9169,6 +9148,19 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0810000001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
@@ -9261,7 +9253,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9316,7 +9308,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9329,7 +9321,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9356,7 +9348,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9383,7 +9375,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9424,7 +9416,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9465,7 +9457,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9506,7 +9498,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9533,7 +9525,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9546,7 +9538,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9601,7 +9593,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9614,7 +9606,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9641,7 +9633,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9678,7 +9670,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9705,7 +9697,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9718,7 +9710,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9745,7 +9737,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9772,7 +9764,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9785,7 +9777,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9812,7 +9804,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_MISS & REMOTE_HITM",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9839,7 +9831,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9855,7 +9847,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
@@ -9883,7 +9876,8 @@
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
- "EventCode": "0xCD",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
@@ -9894,7 +9888,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_MISS & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -9905,5 +9899,19 @@
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
"UMask": "0x1"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "Deprecated": "1",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0110000001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
index 05d13d53c374..f77d78e90954 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
@@ -1,6 +1,6 @@
[
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -13,7 +13,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -26,7 +26,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -39,7 +39,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -52,7 +52,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -65,7 +65,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -78,7 +78,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -104,7 +104,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -117,7 +117,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -130,7 +130,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -143,7 +143,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -156,7 +156,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -169,7 +169,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -182,7 +182,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -195,7 +195,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -208,7 +208,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -221,7 +221,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -234,7 +234,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -247,7 +247,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -260,7 +260,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -273,7 +273,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -299,7 +299,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -312,7 +312,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -325,7 +325,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -338,7 +338,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -351,7 +351,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -364,7 +364,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -377,7 +377,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -390,7 +390,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -403,7 +403,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -416,7 +416,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -429,7 +429,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -442,7 +442,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -478,7 +478,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -491,7 +491,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -517,7 +517,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -530,7 +530,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -543,7 +543,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -556,7 +556,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -582,7 +582,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -595,7 +595,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -621,7 +621,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -634,7 +634,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -660,7 +660,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -673,7 +673,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -699,7 +699,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -712,7 +712,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -738,7 +738,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -751,7 +751,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_MISS OCR.ALL_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -764,7 +764,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -777,7 +777,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -790,7 +790,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -803,7 +803,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -816,7 +816,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -829,7 +829,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -842,7 +842,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -855,7 +855,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -868,7 +868,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -881,7 +881,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -894,7 +894,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -946,7 +946,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -985,7 +985,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -998,7 +998,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1011,7 +1011,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1024,7 +1024,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1050,7 +1050,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1063,7 +1063,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1076,7 +1076,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1089,7 +1089,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1102,7 +1102,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1115,7 +1115,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1128,7 +1128,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1141,7 +1141,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1167,7 +1167,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1180,7 +1180,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1193,7 +1193,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1206,7 +1206,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1219,7 +1219,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1232,7 +1232,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1245,7 +1245,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1258,7 +1258,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1297,7 +1297,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1310,7 +1310,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1323,7 +1323,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1336,7 +1336,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1349,7 +1349,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1362,7 +1362,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1388,7 +1388,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1401,7 +1401,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1414,7 +1414,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1427,7 +1427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1440,7 +1440,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.ANY_SNOOP OCR.ALL_READS.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1453,7 +1453,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1466,7 +1466,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1479,7 +1479,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1492,7 +1492,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1505,7 +1505,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1518,7 +1518,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1531,7 +1531,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1544,7 +1544,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1557,7 +1557,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1570,7 +1570,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1583,7 +1583,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1596,7 +1596,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1609,7 +1609,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1622,7 +1622,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1635,7 +1635,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1661,7 +1661,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_RFO.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1726,7 +1726,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1739,7 +1739,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1752,7 +1752,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1765,7 +1765,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1778,7 +1778,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1791,7 +1791,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1804,7 +1804,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1817,7 +1817,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_MISS OCR.ALL_READS.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1830,7 +1830,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1843,7 +1843,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1856,7 +1856,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1908,7 +1908,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1921,7 +1921,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1934,7 +1934,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1947,7 +1947,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1960,7 +1960,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1973,7 +1973,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1986,7 +1986,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -1999,7 +1999,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2012,7 +2012,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2025,7 +2025,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2038,7 +2038,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2051,7 +2051,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2077,7 +2077,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2090,7 +2090,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2103,7 +2103,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2116,7 +2116,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2129,7 +2129,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2142,7 +2142,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2155,7 +2155,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2168,7 +2168,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2194,7 +2194,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2207,7 +2207,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2220,7 +2220,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2233,7 +2233,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2246,7 +2246,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2259,7 +2259,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2272,7 +2272,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2285,7 +2285,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2337,7 +2337,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2350,7 +2350,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2363,7 +2363,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2376,7 +2376,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2389,7 +2389,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2402,7 +2402,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2428,7 +2428,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2441,7 +2441,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2454,7 +2454,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2490,7 +2490,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2503,7 +2503,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2516,7 +2516,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2529,7 +2529,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2542,7 +2542,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2555,7 +2555,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2568,7 +2568,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2581,7 +2581,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2594,7 +2594,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2607,7 +2607,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2620,7 +2620,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_PF_RFO.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2633,7 +2633,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2646,7 +2646,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2659,7 +2659,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2672,7 +2672,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2685,7 +2685,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2698,7 +2698,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2711,7 +2711,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2724,7 +2724,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2747,7 +2747,7 @@
"UMask": "0x40"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2773,7 +2773,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2799,7 +2799,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2812,7 +2812,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2825,7 +2825,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2838,7 +2838,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2851,7 +2851,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2877,7 +2877,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2890,7 +2890,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2916,7 +2916,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2929,7 +2929,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2942,7 +2942,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2955,7 +2955,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2968,7 +2968,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -2994,7 +2994,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3020,7 +3020,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3046,7 +3046,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3059,7 +3059,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3072,7 +3072,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3085,7 +3085,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3098,7 +3098,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3111,7 +3111,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3124,7 +3124,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3137,7 +3137,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_DATA_RD.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3150,7 +3150,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3163,7 +3163,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3189,7 +3189,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3202,7 +3202,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3215,7 +3215,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3241,7 +3241,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3254,7 +3254,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3267,7 +3267,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3280,7 +3280,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3306,7 +3306,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3319,7 +3319,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3332,7 +3332,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3345,7 +3345,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3358,7 +3358,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3371,7 +3371,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3384,7 +3384,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3397,7 +3397,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3423,7 +3423,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3436,7 +3436,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3449,7 +3449,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3462,7 +3462,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3475,7 +3475,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3488,7 +3488,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3501,7 +3501,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3514,7 +3514,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3527,7 +3527,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3540,7 +3540,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3553,7 +3553,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3566,7 +3566,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3579,7 +3579,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3592,7 +3592,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3605,7 +3605,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3618,7 +3618,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3631,7 +3631,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3644,7 +3644,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3657,7 +3657,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3670,7 +3670,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3696,7 +3696,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3709,7 +3709,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3735,7 +3735,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3748,7 +3748,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3771,7 +3771,7 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3797,7 +3797,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3810,7 +3810,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3823,7 +3823,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3836,7 +3836,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3849,7 +3849,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3875,7 +3875,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3888,7 +3888,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3914,7 +3914,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3927,7 +3927,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3940,7 +3940,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3953,7 +3953,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3966,7 +3966,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3979,7 +3979,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -3992,7 +3992,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4005,7 +4005,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4018,7 +4018,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4031,7 +4031,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4044,7 +4044,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4057,7 +4057,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4070,7 +4070,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4092,7 +4092,7 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4118,7 +4118,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4131,7 +4131,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4144,7 +4144,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4157,7 +4157,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4170,7 +4170,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4196,7 +4196,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4209,7 +4209,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4235,7 +4235,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4248,7 +4248,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4261,7 +4261,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4274,7 +4274,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4287,7 +4287,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4300,7 +4300,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4313,7 +4313,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4326,7 +4326,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4339,7 +4339,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4352,7 +4352,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4365,7 +4365,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4378,7 +4378,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4391,7 +4391,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4404,7 +4404,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4417,7 +4417,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4430,7 +4430,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4443,7 +4443,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4469,7 +4469,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4495,7 +4495,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4521,7 +4521,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4534,7 +4534,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4560,7 +4560,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4573,7 +4573,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4586,7 +4586,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4599,7 +4599,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4612,7 +4612,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4625,7 +4625,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4638,7 +4638,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4651,7 +4651,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4677,7 +4677,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4690,7 +4690,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4716,7 +4716,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_NONE OCR.ALL_READS.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4729,7 +4729,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4742,7 +4742,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4755,7 +4755,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4768,7 +4768,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4781,7 +4781,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4794,7 +4794,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4807,7 +4807,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4820,7 +4820,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4833,7 +4833,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.ANY_SNOOP OCR.ALL_READS.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4846,7 +4846,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4859,7 +4859,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & ANY_RESPONSE have any response type.",
+ "BriefDescription": "OCR.ALL_READS.ANY_RESPONSE have any response type.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4872,7 +4872,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4885,7 +4885,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4898,7 +4898,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4911,7 +4911,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4924,7 +4924,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4937,7 +4937,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4950,7 +4950,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4963,7 +4963,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -4976,7 +4976,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5015,7 +5015,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5041,7 +5041,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5054,7 +5054,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5067,7 +5067,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5080,7 +5080,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5093,7 +5093,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5106,7 +5106,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5119,7 +5119,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.ANY_SNOOP OCR.ALL_READS.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5132,7 +5132,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5145,7 +5145,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5158,7 +5158,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5171,7 +5171,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5184,7 +5184,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5197,7 +5197,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5210,7 +5210,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5236,7 +5236,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5249,7 +5249,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5262,7 +5262,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5275,7 +5275,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_NONE OCR.ALL_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5288,7 +5288,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5301,7 +5301,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5314,7 +5314,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5327,7 +5327,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5340,7 +5340,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5353,7 +5353,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5366,7 +5366,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5379,7 +5379,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5401,7 +5401,7 @@
"UMask": "0x2"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5414,7 +5414,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5427,7 +5427,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5440,7 +5440,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5476,7 +5476,7 @@
"UMask": "0x20"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5489,7 +5489,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5502,7 +5502,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5515,7 +5515,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5528,7 +5528,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5541,7 +5541,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5554,7 +5554,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5567,7 +5567,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5580,7 +5580,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5593,7 +5593,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5606,7 +5606,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5619,7 +5619,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5645,7 +5645,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5658,7 +5658,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5671,7 +5671,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5684,7 +5684,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5697,7 +5697,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5710,7 +5710,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5723,7 +5723,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5736,7 +5736,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5749,7 +5749,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5762,7 +5762,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5775,7 +5775,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5788,7 +5788,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5801,7 +5801,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5814,7 +5814,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5827,7 +5827,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5840,7 +5840,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5853,7 +5853,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5866,7 +5866,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5879,7 +5879,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5905,7 +5905,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5918,7 +5918,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5931,7 +5931,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5957,7 +5957,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5970,7 +5970,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5983,7 +5983,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -5996,7 +5996,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6009,7 +6009,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6022,7 +6022,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6035,7 +6035,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6048,7 +6048,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6074,7 +6074,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6100,7 +6100,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6113,7 +6113,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6139,7 +6139,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6152,7 +6152,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6165,7 +6165,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6191,7 +6191,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6204,7 +6204,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6217,7 +6217,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6230,7 +6230,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6243,7 +6243,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6256,7 +6256,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6269,7 +6269,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.ANY_SNOOP OCR.OTHER.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6282,7 +6282,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6295,7 +6295,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6308,7 +6308,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6321,7 +6321,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6347,7 +6347,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6360,7 +6360,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6373,7 +6373,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6386,7 +6386,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6412,7 +6412,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6438,7 +6438,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6451,7 +6451,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6464,7 +6464,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6477,7 +6477,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6490,7 +6490,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6529,7 +6529,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6542,7 +6542,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6555,7 +6555,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6568,7 +6568,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6620,7 +6620,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6633,7 +6633,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6659,7 +6659,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6672,7 +6672,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6685,7 +6685,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6698,7 +6698,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6711,7 +6711,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6763,7 +6763,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6776,7 +6776,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6789,7 +6789,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6815,7 +6815,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6828,7 +6828,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6841,7 +6841,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6867,7 +6867,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6880,7 +6880,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6893,7 +6893,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_M & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6906,7 +6906,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6919,7 +6919,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6958,7 +6958,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6971,7 +6971,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6984,7 +6984,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -6997,7 +6997,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7019,7 +7019,7 @@
"UMask": "0x8"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7032,7 +7032,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7045,7 +7045,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7058,7 +7058,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7071,7 +7071,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_M & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7084,7 +7084,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7097,7 +7097,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7110,7 +7110,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7123,7 +7123,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7136,7 +7136,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7149,7 +7149,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7162,7 +7162,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7175,7 +7175,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7188,7 +7188,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7201,7 +7201,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7214,7 +7214,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7227,7 +7227,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7240,7 +7240,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7253,7 +7253,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7276,7 +7276,7 @@
"UMask": "0x18"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7289,7 +7289,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7315,7 +7315,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7328,7 +7328,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7341,7 +7341,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7354,7 +7354,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7367,7 +7367,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7380,7 +7380,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7393,7 +7393,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7406,7 +7406,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7419,7 +7419,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7432,7 +7432,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7445,7 +7445,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7458,7 +7458,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7520,7 +7520,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7533,7 +7533,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7546,7 +7546,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7572,7 +7572,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7585,7 +7585,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7598,7 +7598,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7611,7 +7611,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7637,7 +7637,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7650,7 +7650,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7676,7 +7676,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7689,7 +7689,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7702,7 +7702,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7728,7 +7728,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_E & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7741,7 +7741,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7754,7 +7754,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7767,7 +7767,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7780,7 +7780,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7793,7 +7793,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7806,7 +7806,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & ANY_SNOOP",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7832,7 +7832,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7845,7 +7845,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7858,7 +7858,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7884,7 +7884,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7897,7 +7897,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7910,7 +7910,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HITM_OTHER_CORE",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7949,7 +7949,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7962,7 +7962,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -7975,7 +7975,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8001,7 +8001,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8014,7 +8014,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8027,7 +8027,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8053,7 +8053,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8092,7 +8092,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8105,7 +8105,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8118,7 +8118,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_F & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8131,7 +8131,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8144,7 +8144,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8157,7 +8157,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8170,7 +8170,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8183,7 +8183,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT & HITM_OTHER_CORE",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HITM_OTHER_CORE OCR.OTHER.L3_HIT.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8196,7 +8196,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8222,7 +8222,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8235,7 +8235,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HITM_OTHER_CORE",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8257,7 +8257,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8270,7 +8270,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8283,7 +8283,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & L3_HIT_S & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8309,7 +8309,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8322,7 +8322,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8348,7 +8348,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_E & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8361,7 +8361,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8374,7 +8374,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8387,7 +8387,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & L3_HIT_F & SNOOP_NONE",
+ "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8400,7 +8400,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8413,7 +8413,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8426,7 +8426,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8439,7 +8439,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8452,7 +8452,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8465,7 +8465,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & NO_SNOOP_NEEDED",
+ "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8478,7 +8478,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & ANY_SNOOP",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.ANY_SNOOP OCR.ALL_READS.L3_HIT_S.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8504,7 +8504,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8517,7 +8517,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8543,7 +8543,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8556,7 +8556,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8569,7 +8569,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8582,7 +8582,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & ANY_SNOOP",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8595,7 +8595,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED",
+ "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8608,7 +8608,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8621,7 +8621,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HIT_OTHER_CORE_FWD",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8634,7 +8634,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
@@ -8647,7 +8647,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "ALL_READS & L3_HIT_S & SNOOP_MISS",
+ "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_MISS",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
index 5ec668f46ac1..023f31c72a42 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
@@ -827,16 +827,6 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
- "Counter": "0,1,2,3",
- "CounterHTOff": "0,1,2,3,4,5,6,7",
- "EventCode": "0xE6",
- "EventName": "BACLEARS.ANY",
- "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
- "SampleAfterValue": "100003",
- "UMask": "0x1"
- },
- {
"BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
index 3fb5cdce842f..4ba9e6d9f25e 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
@@ -90,32 +90,32 @@
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB/sec). Derived from unc_m_pmm_rpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.READ",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MB",
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB/sec). Derived from unc_m_pmm_wpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB). Derived from unc_m_pmm_wpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE7",
"EventName": "UNC_M_PMM_BANDWIDTH.WRITE",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MB",
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB/sec). Derived from unc_m_pmm_rpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"MetricExpr": "UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS",
"MetricName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MB",
"Unit": "iMC"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json
index df355ba7acc8..0cd083839e75 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json
@@ -538,6 +538,27 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD",
+ "Filter": "config1=0x40433",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0x21",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD",
+ "Filter": "config1=0x40433",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0x21",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "Clockticks of the IIO Traffic Controller",
"Counter": "0,1,2,3",
"EventCode": "0x1",
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 25b06cf98747..2f2a209e87e1 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -38,3 +38,4 @@ GenuineIntel-6-7E,v1,icelake,core
GenuineIntel-6-86,v1,tremontx,core
AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core
AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core
+AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen2,core
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
index 24df183693fa..e750a21976f1 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
@@ -1,1663 +1,1675 @@
[
{
- "EventCode": "0x24",
- "UMask": "0x21",
- "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0x22",
- "BriefDescription": "RFO requests that miss L2 cache",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x24",
- "UMask": "0x24",
- "BriefDescription": "L2 cache misses when fetching instructions",
+ "BriefDescription": "Counts all demand code reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x24",
- "UMask": "0x27",
+ "EventName": "L2_RQSTS.PF_MISS",
+ "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x38"
+ },
+ {
"BriefDescription": "Demand requests that miss L2 cache",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
"PublicDescription": "Demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x27"
},
{
- "EventCode": "0x24",
- "UMask": "0x38",
- "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+ "BriefDescription": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.PF_MISS",
- "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0x3f",
- "BriefDescription": "All requests that miss L2 cache",
+ "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.MISS",
- "PublicDescription": "All requests that miss L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x4"
},
{
- "EventCode": "0x24",
- "UMask": "0xc1",
- "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "BriefDescription": "L2 writebacks that access L2 cache",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF0",
+ "EventName": "L2_TRANS.L2_WB",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0x24",
- "UMask": "0xc2",
- "BriefDescription": "RFO requests that hit L2 cache",
+ "BriefDescription": "L2 cache lines filling L2",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF1",
+ "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1f"
},
{
- "EventCode": "0x24",
- "UMask": "0xc4",
- "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xd8",
- "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.PF_HIT",
- "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xe1",
- "BriefDescription": "Demand Data Read requests",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xe2",
- "BriefDescription": "RFO requests to L2 cache",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xe4",
- "BriefDescription": "L2 code requests",
+ "BriefDescription": "Retired load instructions missed L3 cache as data sources",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x20"
},
{
- "EventCode": "0x24",
- "UMask": "0xe7",
- "BriefDescription": "Demand requests to L2 cache",
+ "BriefDescription": "All retired store instructions.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "PublicDescription": "Demand requests to L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.ALL_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x82"
},
{
- "EventCode": "0x24",
- "UMask": "0xf8",
- "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
+ "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.ALL_PF",
- "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.SILENT",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x24",
- "UMask": "0xff",
- "BriefDescription": "All L2 requests",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "All L2 requests.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x2E",
- "UMask": "0x41",
- "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "LONGEST_LAT_CACHE.MISS",
- "Errata": "SKL057",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x2E",
- "UMask": "0x4f",
- "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
"Counter": "0,1,2,3",
- "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"Errata": "SKL057",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.",
+ "EventCode": "0x2E",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x41"
},
{
- "EventCode": "0x48",
- "UMask": "0x1",
- "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
"Counter": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf8"
},
{
- "EventCode": "0x48",
- "UMask": "0x1",
- "BriefDescription": "L1D miss outstandings duration in cycles",
+ "BriefDescription": "Retired load instructions whose data sources was remote HITM",
"Counter": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions whose data sources was remote HITM.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4"
},
{
- "EventCode": "0x48",
- "UMask": "0x1",
- "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "BriefDescription": "Counts all prefetch data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
- "AnyThread": "1",
- "CounterMask": "1",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x48",
- "UMask": "0x2",
- "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
+ "BriefDescription": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x51",
- "UMask": "0x1",
- "BriefDescription": "L1D data line replacements",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x1",
- "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0x60",
- "UMask": "0x1",
- "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x1",
- "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "BriefDescription": "RFO requests that miss L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
- "CounterMask": "6",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x22"
},
{
- "EventCode": "0x60",
- "UMask": "0x2",
- "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+ "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.NON_SILENT",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
},
{
- "EventCode": "0x60",
- "UMask": "0x2",
- "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+ "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "CounterMask": "1",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF2",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x60",
- "UMask": "0x4",
- "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
- "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x4",
- "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+ "BriefDescription": "All requests that miss L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "CounterMask": "1",
- "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.MISS",
+ "PublicDescription": "All requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x3f"
},
{
- "EventCode": "0x60",
- "UMask": "0x8",
- "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "BriefDescription": "L2 code requests",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "PublicDescription": "Counts the total number of L2 code requests.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe4"
},
{
- "EventCode": "0x60",
- "UMask": "0x8",
- "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
- "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8"
},
{
- "EventCode": "0xB0",
- "UMask": "0x1",
- "BriefDescription": "Demand Data Read requests sent to uncore",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB0",
- "UMask": "0x2",
- "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB0",
- "UMask": "0x4",
- "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x2"
},
{
- "EventCode": "0xB0",
- "UMask": "0x8",
- "BriefDescription": "Demand and prefetch data reads",
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2"
},
{
- "EventCode": "0xB0",
- "UMask": "0x80",
- "BriefDescription": "Any memory transaction that reached the SQ.",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB2",
- "UMask": "0x1",
- "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+ "BriefDescription": "Retired load instructions missed L1 cache as data sources",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
- "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
},
{
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0020",
+ "Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x11",
- "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
- "PublicDescription": "Retired load instructions that miss the STLB.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x12",
- "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "L2 cache misses when fetching instructions",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
- "PublicDescription": "Retired store instructions that miss the STLB.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x24"
+ },
+ {
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "L1_Hit_Indication": "1",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x21",
- "BriefDescription": "Retired load instructions with locked access. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x41",
- "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts demand data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x42",
- "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "L1_Hit_Indication": "1",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x81",
- "BriefDescription": "All retired load instructions. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.ALL_LOADS",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD0",
- "UMask": "0x82",
- "BriefDescription": "All retired store instructions. (Precise Event)",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
"Counter": "0,1,2,3",
- "EventName": "MEM_INST_RETIRED.ALL_STORES",
- "PublicDescription": "All retired store instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "2000003",
- "L1_Hit_Indication": "1",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x2"
},
{
- "EventCode": "0xD1",
- "UMask": "0x1",
- "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Demand requests to L2 cache",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L1_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "PublicDescription": "Demand requests to L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe7"
},
{
- "EventCode": "0xD1",
- "UMask": "0x2",
- "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L2_HIT",
- "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x4",
- "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L3_HIT",
- "PublicDescription": "Retired load instructions with L3 cache hits as data sources.",
- "SampleAfterValue": "50021",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x8",
- "BriefDescription": "Retired load instructions missed L1 cache as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L1_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x10",
- "BriefDescription": "Retired load instructions missed L2 cache as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L2_MISS",
- "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
- "SampleAfterValue": "50021",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x20",
- "BriefDescription": "Retired load instructions missed L3 cache as data sources",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.L3_MISS",
- "PublicDescription": "Retired load instructions missed L3 cache as data sources.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD1",
- "UMask": "0x40",
- "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_RETIRED.FB_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD2",
- "UMask": "0x1",
- "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
- "Data_LA": "1",
- "PEBS": "1",
+ "AnyThread": "1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
- "SampleAfterValue": "20011",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD2",
- "UMask": "0x2",
- "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
- "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
- "SampleAfterValue": "20011",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD2",
- "UMask": "0x4",
- "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
- "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
- "SampleAfterValue": "20011",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL057",
+ "EventCode": "0x2E",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4f"
},
{
- "EventCode": "0xD2",
- "UMask": "0x8",
- "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
- "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xD3",
- "UMask": "0x1",
- "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD3",
- "UMask": "0x2",
- "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD3",
- "UMask": "0x4",
- "BriefDescription": "Retired load instructions whose data sources was remote HITM",
+ "BriefDescription": "Retired load instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
"Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
"PEBS": "1",
- "Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "SampleAfterValue": "100003",
+ "UMask": "0x11"
},
{
- "EventCode": "0xD3",
- "UMask": "0x8",
- "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts demand data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xD4",
- "UMask": "0x4",
- "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
- "Data_LA": "1",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_LOAD_MISC_RETIRED.UC",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xF0",
- "UMask": "0x40",
- "BriefDescription": "L2 writebacks that access L2 cache",
+ "BriefDescription": "L1D data line replacements",
"Counter": "0,1,2,3",
- "EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x51",
+ "EventName": "L1D.REPLACEMENT",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xF1",
- "UMask": "0x1f",
- "BriefDescription": "L2 cache lines filling L2",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xF2",
- "UMask": "0x1",
- "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
+ "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
"Counter": "0,1,2,3",
- "EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD4",
+ "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x4"
},
{
- "EventCode": "0xF2",
- "UMask": "0x2",
- "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped",
+ "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
"Counter": "0,1,2,3",
- "EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
},
{
- "EventCode": "0xF2",
- "UMask": "0x4",
"BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
- "Deprecated": "1",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
+ "EventCode": "0xF2",
"EventName": "L2_LINES_OUT.USELESS_PREF",
- "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xF2",
- "UMask": "0x4",
- "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
"Counter": "0,1,2,3",
- "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.PF_HIT",
+ "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0xd8"
},
{
- "EventCode": "0xF4",
- "UMask": "0x10",
- "BriefDescription": "Number of cache line split locks sent to uncore.",
+ "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "SQ_MISC.SPLIT_LOCK",
- "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "Offcore": "1",
+ "CounterHTOff": "0,1,2,3",
"EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads have any response type.",
- "MSRValue": "0x0000010001",
- "Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads have any response type.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
- },
- {
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010122",
"Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x01003C0001",
- "Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x04003C0001",
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x21"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x10003C0001",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x3F803C0001",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) have any response type.",
- "MSRValue": "0x0000010002",
+ "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x8"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x01003C0002",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x04003C0002",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x10003C0002",
+ "BriefDescription": "All retired load instructions.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x81"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x3F803C0002",
+ "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+ "PEBS": "1",
+ "SampleAfterValue": "20011",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
- "MSRValue": "0x0000010004",
+ "BriefDescription": "Demand Data Read requests",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x01003C0004",
+ "BriefDescription": "All L2 requests",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "PublicDescription": "All L2 requests.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xff"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x04003C0004",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x10003C0004",
+ "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x3F803C0004",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
- "MSRValue": "0x0000010010",
+ "BriefDescription": "Number of cache line split locks sent to uncore.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xF4",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x01003C0010",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x04003C0010",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x10003C0010",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x3F803C0010",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
- "MSRValue": "0x0000010020",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x01003C0020",
+ "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x04003C0020",
+ "BriefDescription": "RFO requests that hit L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x10003C0020",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x3F803C0020",
+ "BriefDescription": "L1D miss outstandings duration in cycles",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
- "MSRValue": "0x0000010080",
+ "BriefDescription": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x01003C0080",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x04003C0080",
+ "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+ "SampleAfterValue": "20011",
+ "UMask": "0x4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x10003C0080",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x3F803C0080",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
- "MSRValue": "0x0000010100",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x01003C0100",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x04003C0100",
+ "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x42"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x10003C0100",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x3F803C0100",
+ "BriefDescription": "Any memory transaction that reached the SQ.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x80"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.",
- "MSRValue": "0x0000010400",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x01003C0400",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x04003C0400",
+ "BriefDescription": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x10003C0400",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x3F803C0400",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD have any response type.",
- "MSRValue": "0x0000010490",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x01003C0490",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x04003C0490",
+ "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x48",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x10003C0490",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3F803C0490",
+ "BriefDescription": "Counts all demand code reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD have any response type.",
- "MSRValue": "0x0000010120",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x01003C0120",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x04003C0120",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x10003C0120",
+ "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3F803C0120",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD have any response type.",
- "MSRValue": "0x0000010491",
+ "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x01003C0491",
+ "BriefDescription": "Retired load instructions with locked access.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "UMask": "0x21"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x04003C0491",
+ "BriefDescription": "Demand and prefetch data reads",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x8"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x10003C0491",
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3F803C0491",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD have any response type.",
- "MSRValue": "0x0000010122",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD have any response type.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x01003C0122",
+ "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x41"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x04003C0122",
+ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB2",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x10003C0122",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "6",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3F803C0122",
+ "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads",
- "MSRValue": "0x08007C0001",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts demand data reads",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs)",
- "MSRValue": "0x08007C0002",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts all demand data writes (RFOs)",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
- "MSRValue": "0x08007C0004",
+ "BriefDescription": "Counts prefetch RFOs that have any response type.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0000010120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
- "MSRValue": "0x08007C0010",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
- "MSRValue": "0x08007C0020",
+ "BriefDescription": "Retired store instructions that miss the STLB.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x12"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
- "MSRValue": "0x08007C0080",
+ "BriefDescription": "RFO requests to L2 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
- "MSRValue": "0x08007C0100",
+ "BriefDescription": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x08003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
- "MSRValue": "0x08007C0400",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD",
- "MSRValue": "0x08007C0490",
+ "BriefDescription": "Retired load instructions missed L2 cache as data sources",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD1",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+ "SampleAfterValue": "50021",
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD",
- "MSRValue": "0x08007C0120",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x01003C0001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD",
- "MSRValue": "0x08007C0491",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD",
- "MSRValue": "0x08007C0122",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
- "PublicDescription": "TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x04003C0122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
index c5d0babe89fc..e197cde15047 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
@@ -1,85 +1,85 @@
[
{
- "EventCode": "0xC7",
- "UMask": "0x1",
- "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xC7",
- "UMask": "0x2",
- "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0xC7",
- "UMask": "0x4",
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xC7",
- "UMask": "0x8",
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xC7",
- "UMask": "0x10",
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x80"
},
{
- "EventCode": "0xC7",
- "UMask": "0x20",
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0xC7",
- "UMask": "0x40",
- "BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xCA",
+ "EventName": "FP_ASSIST.ANY",
+ "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1e"
},
{
- "EventCode": "0xC7",
- "UMask": "0x80",
- "BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)",
+ "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xCA",
- "UMask": "0x1e",
- "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
- "EventName": "FP_ASSIST.ANY",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC7",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
index 4dc583cfb545..cdf95bd2a73d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
@@ -1,482 +1,516 @@
[
{
- "EventCode": "0x79",
- "UMask": "0x4",
- "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MITE_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x80",
+ "EventName": "ICACHE_16B.IFDATA_STALL",
+ "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0x79",
- "UMask": "0x4",
- "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x14",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x408006",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x8",
"BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
"Counter": "0,1,2,3",
- "EventName": "IDQ.DSB_CYCLES",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_CYCLES",
"PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0x79",
- "UMask": "0x8",
- "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
"Counter": "0,1,2,3",
- "EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "3",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x10",
- "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_DSB_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xE6",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x18",
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
- "CounterMask": "1",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x11",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x18",
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
"Counter": "0,1,2,3",
- "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "4",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x20",
- "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_MITE_UOPS",
- "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x401006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x24",
- "BriefDescription": "Cycles MITE is delivering any Uop",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
"Counter": "0,1,2,3",
- "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
- "CounterMask": "1",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0x79",
- "UMask": "0x24",
- "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
"Counter": "0,1,2,3",
- "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
- "CounterMask": "4",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x79",
- "UMask": "0x30",
"BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_CYCLES",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_CYCLES",
"PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x30"
},
{
- "EventCode": "0x79",
- "UMask": "0x30",
- "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x24"
+ },
+ {
+ "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1"
},
{
- "EdgeDetect": "1",
- "EventCode": "0x79",
- "UMask": "0x30",
"BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
"Counter": "0,1,2,3",
- "EventName": "IDQ.MS_SWITCHES",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_SWITCHES",
"PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x30"
},
{
- "EventCode": "0x80",
- "UMask": "0x4",
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
"Counter": "0,1,2,3",
- "EventName": "ICACHE_16B.IFDATA_STALL",
- "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.L2_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x13",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0x83",
- "UMask": "0x1",
- "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
"Counter": "0,1,2,3",
- "EventName": "ICACHE_64B.IFTAG_HIT",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MITE_CYCLES",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x83",
- "UMask": "0x2",
- "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "ICACHE_64B.IFTAG_MISS",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x404006",
+ "PEBS": "1",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0x83",
- "UMask": "0x4",
- "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
"Counter": "0,1,2,3",
- "EventName": "ICACHE_64B.IFTAG_STALL",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "Invert": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
- "Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
- "CounterMask": "1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
- "CounterMask": "1",
- "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
- "CounterMask": "2",
- "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_STALL",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
- "CounterMask": "3",
- "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xAB",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "CounterMask": "4",
- "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x18"
},
{
- "EventCode": "0x9C",
- "UMask": "0x1",
- "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
"Counter": "0,1,2,3",
- "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x15",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xAB",
- "UMask": "0x2",
- "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
"Counter": "0,1,2,3",
- "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x79",
+ "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x400406",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
- "TakenAlone": "1",
+ "MSRValue": "0x420006",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x200206",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x400806",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
+ "SampleAfterValue": "100007",
"TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x400106",
+ "PEBS": "2",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x400206",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
- "TakenAlone": "1",
+ "MSRValue": "0x400206",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x15",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
- "TakenAlone": "1",
+ "MSRValue": "0x400406",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x14",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.ITLB_MISS",
- "MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0x79",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x24"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x13",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.L2_MISS",
- "MSRIndex": "0x3F7",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x12",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.L1I_MISS",
- "MSRIndex": "0x3F7",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x79",
+ "EventName": "IDQ.MS_UOPS",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x30"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x11",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
"MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
- "TakenAlone": "1",
+ "MSRValue": "0x410006",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
"EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x200206",
"PEBS": "1",
- "MSRValue": "0x300206",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
"MSRIndex": "0x3F7",
- "TakenAlone": "1",
+ "MSRValue": "0x300206",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x100206",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
"MSRIndex": "0x3F7",
+ "MSRValue": "0x100206",
+ "PEBS": "1",
"PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
- "TakenAlone": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
- },
- {
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x420006",
- "Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
- "MSRIndex": "0x3F7",
"TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x410006",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
- "MSRIndex": "0x3F7",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0x79",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x18"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x408006",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
- "MSRIndex": "0x3F7",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xAB",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x404006",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
"MSRIndex": "0x3F7",
- "TakenAlone": "1",
+ "MSRValue": "0x402006",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x402006",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
- "MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x9C",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "Invert": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
- "PEBS": "1",
- "MSRValue": "0x401006",
+ "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
- "MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x83",
+ "EventName": "ICACHE_64B.IFTAG_MISS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xC6",
- "UMask": "0x1",
- "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
- "PEBS": "1",
- "MSRValue": "0x400806",
+ "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
"Counter": "0,1,2,3",
- "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
- "TakenAlone": "1",
+ "MSRValue": "0x12",
+ "PEBS": "1",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
index 48a9cdf81307..6c3fd89d204d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
@@ -1,1396 +1,1403 @@
[
{
- "EventCode": "0x54",
- "UMask": "0x1",
- "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_CONFLICT",
- "PublicDescription": "Number of times a TSX line had a cache conflict.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x2",
- "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_CAPACITY",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x4",
- "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+ "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
- "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "6",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x54",
- "UMask": "0x8",
- "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
- "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x10",
- "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
- "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x20",
- "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
- "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x54",
- "UMask": "0x40",
- "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+ "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
"Counter": "0,1,2,3",
- "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
- "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_TIMER",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x5d",
- "UMask": "0x1",
- "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC1",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x5d",
- "UMask": "0x2",
- "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC2",
- "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_MEM",
+ "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0x5d",
- "UMask": "0x4",
- "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC3",
- "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x5d",
- "UMask": "0x8",
- "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC4",
- "PublicDescription": "RTM region detected inside HLE.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+ "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0x5d",
- "UMask": "0x10",
- "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "TX_EXEC.MISC5",
- "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x10",
- "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x10",
- "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
+ "BriefDescription": "Counts all prefetch data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
- "CounterMask": "6",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x60",
- "UMask": "0x10",
- "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
- "CounterMask": "1",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x2",
- "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
- "CounterMask": "2",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x6",
- "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+ "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xB0",
- "UMask": "0x10",
- "BriefDescription": "Demand Data Read requests who miss L3 cache",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC3",
- "UMask": "0x2",
- "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "Errata": "SKL089",
- "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x1",
- "BriefDescription": "Number of times an HLE execution started.",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.START",
- "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x2",
- "BriefDescription": "Number of times an HLE execution successfully committed",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.COMMIT",
- "PublicDescription": "Number of times HLE commit succeeded.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x20",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x4",
- "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
- "PEBS": "1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED",
- "PublicDescription": "Number of times HLE abort was triggered. (PEBS)",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x8",
- "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_MEM",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x10",
- "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_TIMER",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x20",
- "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+ "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
- "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xC8",
- "UMask": "0x40",
- "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
- "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "PublicDescription": "Number of times a TSX line had a cache conflict.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC8",
- "UMask": "0x80",
- "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "HLE_RETIRED.ABORTED_EVENTS",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC9",
- "UMask": "0x1",
- "BriefDescription": "Number of times an RTM execution started.",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.START",
- "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x40",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "2003",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC9",
- "UMask": "0x2",
- "BriefDescription": "Number of times an RTM execution successfully committed",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.COMMIT",
- "PublicDescription": "Number of times RTM commit succeeded.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC9",
- "UMask": "0x4",
- "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
- "PEBS": "1",
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED",
- "PublicDescription": "Number of times RTM abort was triggered. (PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_MEM",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xC9",
- "UMask": "0x8",
- "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_MEM",
- "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xC9",
- "UMask": "0x10",
- "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_TIMER",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC9",
- "UMask": "0x20",
- "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+ "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
- "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC5",
+ "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xC9",
- "UMask": "0x40",
- "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+ "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
- "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC4",
+ "PublicDescription": "RTM region detected inside HLE.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xC9",
- "UMask": "0x80",
- "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
"Counter": "0,1,2,3",
- "EventName": "RTM_RETIRED.ABORTED_EVENTS",
- "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC3",
+ "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
- "PEBS": "2",
- "MSRValue": "0x200",
+ "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "101",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC2",
+ "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
- "PEBS": "2",
- "MSRValue": "0x100",
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "503",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5d",
+ "EventName": "TX_EXEC.MISC1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
- "PEBS": "2",
- "MSRValue": "0x80",
+ "BriefDescription": "Number of times an RTM execution successfully committed",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "1009",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "PublicDescription": "Number of times RTM commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
- "PEBS": "2",
- "MSRValue": "0x40",
+ "BriefDescription": "Counts prefetch RFOs that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "2003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
- "PEBS": "2",
- "MSRValue": "0x20",
+ "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
- "PEBS": "2",
- "MSRValue": "0x10",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "20011",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
- "PEBS": "2",
- "MSRValue": "0x8",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
- "SampleAfterValue": "50021",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xCD",
- "UMask": "0x1",
- "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
- "PEBS": "2",
- "MSRValue": "0x4",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
- "MSRIndex": "0x3F6",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
- "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD TBD",
- "MSRValue": "0x3FBC000001",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x083FC00001",
+ "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_TIMER",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x103FC00001",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x063FC00001",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x063B800001",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00490",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand data reads TBD",
- "MSRValue": "0x0604000001",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
- "MSRValue": "0x3FBC000002",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x083FC00002",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x103FC00002",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x063FC00002",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x063B800002",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all demand data writes (RFOs) TBD",
- "MSRValue": "0x0604000002",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all demand data writes (RFOs) TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
- "MSRValue": "0x3FBC000004",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x083FC00004",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x103FC00004",
+ "BriefDescription": "Number of times an HLE execution successfully committed",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "PublicDescription": "Number of times HLE commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x063FC00004",
+ "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x063B800004",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
- "MSRValue": "0x0604000004",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
- "MSRValue": "0x3FBC000010",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x083FC00010",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x103FC00010",
+ "BriefDescription": "Demand Data Read requests who miss L3 cache",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB0",
+ "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "Demand Data Read requests who miss L3 cache.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x063FC00010",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x063B800010",
+ "BriefDescription": "Counts demand data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "MSRValue": "0x0604000010",
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+ "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
- "MSRValue": "0x3FBC000020",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x083FC00020",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x103FC00020",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x063FC00020",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x063B800020",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x100",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "503",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
- "MSRValue": "0x0604000020",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
- "MSRValue": "0x3FBC000080",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x083FC00080",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00020",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x103FC00080",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x063FC00080",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x063B800080",
+ "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "PEBS": "1",
+ "PublicDescription": "Number of times RTM abort was triggered.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
- "MSRValue": "0x0604000080",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "MSRValue": "0x3FBC000100",
+ "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "PEBS": "1",
+ "PublicDescription": "Number of times HLE abort was triggered.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x083FC00100",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x10",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "20011",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x103FC00100",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x20"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x063FC00100",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x063B800100",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "MSRValue": "0x0604000100",
+ "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x60",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
- "MSRValue": "0x3FBC000400",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000002",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x083FC00400",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x200",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "101",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x103FC00400",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x063FC00400",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000010",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x063B800400",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "MSRValue": "0x0604000400",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_CAPACITY",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3FBC000490",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063B800120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x083FC00490",
+ "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+ "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x103FC00490",
+ "BriefDescription": "Number of times an RTM execution started.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.START",
+ "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063FC00490",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL089",
+ "EventCode": "0xC3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x2"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063B800490",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x54",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x0604000490",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3FBC000120",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x083FC00120",
+ "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00001",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x103FC00120",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063FC00120",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063B800120",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000080",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x0604000120",
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC9",
+ "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+ "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3FBC000491",
+ "BriefDescription": "Number of times an HLE execution started.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.START",
+ "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x083FC00491",
+ "BriefDescription": "Counts all demand code reads that miss in the L3.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBC000004",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x103FC00491",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x80",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "1009",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063FC00491",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x083FC00491",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063B800491",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x103FC00120",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x0604000491",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x063FC00100",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD TBD",
- "MSRValue": "0x3FBC000122",
+ "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "6",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x6"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x083FC00122",
+ "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC8",
+ "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x103FC00122",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000122",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063FC00122",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x4",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x063B800122",
+ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xcd",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x8",
+ "PEBS": "2",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
+ "SampleAfterValue": "50021",
+ "TakenAlone": "1",
+ "UMask": "0x1"
},
{
- "Offcore": "1",
- "EventCode": "0xB7, 0xBB",
- "UMask": "0x1",
- "BriefDescription": "TBD TBD",
- "MSRValue": "0x0604000122",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.",
"Counter": "0,1,2,3",
- "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
- "MSRIndex": "0x1a6, 0x1a7",
- "PublicDescription": "TBD TBD",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x0604000400",
+ "Offcore": "1",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json
index 778a541463eb..f6b147ba8ef6 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json
@@ -1,164 +1,116 @@
[
{
- "EventCode": "0x28",
- "UMask": "0x7",
- "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
+ "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
"Counter": "0,1,2,3",
- "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
- "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x28",
- "UMask": "0x18",
- "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
- "Counter": "0,1,2,3",
- "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
- "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+ "EventName": "CORE_POWER.THROTTLE",
+ "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
"SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0x28",
- "UMask": "0x20",
- "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+ "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
"Counter": "0,1,2,3",
- "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
- "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xFE",
+ "EventName": "IDI_MISC.WB_DOWNGRADE",
+ "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x28",
- "UMask": "0x40",
- "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
+ "BriefDescription": "Number of PREFETCHW instructions executed.",
"Counter": "0,1,2,3",
- "EventName": "CORE_POWER.THROTTLE",
- "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
- "SampleAfterValue": "200003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x32",
- "UMask": "0x1",
- "BriefDescription": "Number of PREFETCHNTA instructions executed.",
- "Counter": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.NTA",
+ "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0x32",
- "UMask": "0x2",
- "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
"Counter": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.T0",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7"
},
{
- "EventCode": "0x32",
- "UMask": "0x4",
- "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
"Counter": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x18"
},
{
- "EventCode": "0x32",
- "UMask": "0x8",
- "BriefDescription": "Number of PREFETCHW instructions executed.",
+ "BriefDescription": "Number of PREFETCHT0 instructions executed.",
"Counter": "0,1,2,3",
- "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T0",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xCB",
- "UMask": "0x1",
"BriefDescription": "Number of hardware interrupts received by the processor.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xCB",
"EventName": "HW_INTERRUPTS.RECEIVED",
"PublicDescription": "Counts the number of hardware interruptions received by the processor.",
"SampleAfterValue": "203",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xEF",
- "UMask": "0x1",
- "Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITI",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xEF",
- "UMask": "0x2",
- "Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITFSE",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xEF",
- "UMask": "0x4",
- "Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_SHITFSE",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xEF",
- "UMask": "0x8",
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
"Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDM",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x20"
},
{
- "EventCode": "0xEF",
- "UMask": "0x10",
+ "BriefDescription": "Number of PREFETCHNTA instructions executed.",
"Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDM",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.NTA",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xEF",
- "UMask": "0x20",
+ "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDFE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "SW_PREFETCH_ACCESS.T1_T2",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xEF",
- "UMask": "0x40",
"Counter": "0,1,2,3",
- "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDFE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x09",
+ "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xFE",
- "UMask": "0x2",
"BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xFE",
"EventName": "IDI_MISC.WB_UPGRADE",
"PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xFE",
- "UMask": "0x4",
- "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
- "Counter": "0,1,2,3",
- "EventName": "IDI_MISC.WB_DOWNGRADE",
- "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
index 369f56c1d1b5..3bfc6943ddf9 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
@@ -1,967 +1,959 @@
[
{
- "UMask": "0x1",
- "BriefDescription": "Instructions retired from execution.",
- "Counter": "Fixed counter 0",
- "EventName": "INST_RETIRED.ANY",
- "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "Fixed counter 0"
- },
- {
- "UMask": "0x2",
- "BriefDescription": "Core cycles when the thread is not in halt state",
- "Counter": "Fixed counter 1",
- "EventName": "CPU_CLK_UNHALTED.THREAD",
- "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "Fixed counter 1"
- },
- {
- "UMask": "0x2",
- "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
- "Counter": "Fixed counter 1",
- "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
- "AnyThread": "1",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "Fixed counter 1"
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091, SKL044",
+ "EventCode": "0xC0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "SampleAfterValue": "2000003"
},
{
- "UMask": "0x3",
- "BriefDescription": "Reference cycles when the core is not in halt state.",
- "Counter": "Fixed counter 2",
- "EventName": "CPU_CLK_UNHALTED.REF_TSC",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "Fixed counter 2"
+ "UMask": "0x1"
},
{
- "EventCode": "0x03",
- "UMask": "0x2",
- "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .",
+ "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
- "EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0x03",
- "UMask": "0x8",
- "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"Counter": "0,1,2,3",
- "EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x14",
+ "EventName": "ARITH.DIVIDER_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x07",
- "UMask": "0x1",
"BriefDescription": "False dependencies in MOB due to partial compare on address.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x07",
"EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
"PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x0D",
- "UMask": "0x1",
- "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+ "BriefDescription": "Far branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PEBS": "1",
+ "PublicDescription": "This event counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40"
},
{
- "EventCode": "0x0D",
- "UMask": "0x1",
- "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "BriefDescription": "Counts the number of x87 uops dispatched.",
"Counter": "0,1,2,3",
- "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
- "AnyThread": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.X87",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x0D",
- "UMask": "0x80",
- "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
+ "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
"Counter": "0,1,2,3",
- "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x4C",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "Invert": "1",
- "EventCode": "0x0E",
- "UMask": "0x1",
- "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+ "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_ISSUED.STALL_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC5",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x2"
},
{
- "EventCode": "0x0E",
- "UMask": "0x1",
- "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "BriefDescription": "Total execution stalls.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0x0E",
- "UMask": "0x2",
- "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
- "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "5",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x5"
},
{
- "EventCode": "0x0E",
- "UMask": "0x20",
"BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0E",
"EventName": "UOPS_ISSUED.SLOW_LEA",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0x14",
- "UMask": "0x1",
- "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
"Counter": "0,1,2,3",
- "EventName": "ARITH.DIVIDER_ACTIVE",
- "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "10",
+ "EventCode": "0xC2",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0x3C",
- "UMask": "0x0",
"BriefDescription": "Thread cycles when thread is not in halt state",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
"PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "SampleAfterValue": "2000003"
},
{
- "EventCode": "0x3C",
- "UMask": "0x0",
- "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
- "AnyThread": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EdgeDetect": "1",
- "EventCode": "0x3C",
- "UMask": "0x0",
- "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
- "Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
- "CounterMask": "1",
- "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x1",
"BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
"EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "SampleAfterValue": "25003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x1",
- "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
- "AnyThread": "1",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xC3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x1",
- "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
- "Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
"AnyThread": "1",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "Fixed counter 1",
+ "CounterHTOff": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0x3C",
- "UMask": "0x1",
- "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x2",
- "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "3",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x3C",
- "UMask": "0x2",
- "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "SampleAfterValue": "2503",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "Invert": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0x4C",
- "UMask": "0x1",
- "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+ "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
"Counter": "0,1,2,3",
- "EventName": "LOAD_HIT_PRE.SW_PF",
- "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x40"
},
{
- "EventCode": "0x59",
- "UMask": "0x1",
- "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
- "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "8",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EdgeDetect": "1",
- "Invert": "1",
- "EventCode": "0x5E",
- "UMask": "0x1",
- "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
"Counter": "0,1,2,3",
- "EventName": "RS_EVENTS.EMPTY_END",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x5E",
- "UMask": "0x1",
- "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
"Counter": "0,1,2,3",
- "EventName": "RS_EVENTS.EMPTY_CYCLES",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x87",
- "UMask": "0x1",
- "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
"Counter": "0,1,2,3",
- "EventName": "ILD_STALL.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "25003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xA1",
- "UMask": "0x1",
"BriefDescription": "Cycles per thread when uops are executed in port 0",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_0",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA1",
- "UMask": "0x2",
"BriefDescription": "Cycles per thread when uops are executed in port 1",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_1",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA1",
- "UMask": "0x4",
"BriefDescription": "Cycles per thread when uops are executed in port 2",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_2",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x4"
},
{
- "EventCode": "0xA1",
- "UMask": "0x8",
"BriefDescription": "Cycles per thread when uops are executed in port 3",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_3",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xA1",
- "UMask": "0x10",
"BriefDescription": "Cycles per thread when uops are executed in port 4",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_4",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xA1",
- "UMask": "0x20",
"BriefDescription": "Cycles per thread when uops are executed in port 5",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_5",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0xA1",
- "UMask": "0x40",
"BriefDescription": "Cycles per thread when uops are executed in port 6",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_6",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0xA1",
- "UMask": "0x80",
"BriefDescription": "Cycles per thread when uops are executed in port 7",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA1",
"EventName": "UOPS_DISPATCHED_PORT.PORT_7",
"PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x80"
},
{
- "EventCode": "0xa2",
- "UMask": "0x1",
- "BriefDescription": "Resource-related stall cycles",
+ "AnyThread": "1",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
"Counter": "0,1,2,3",
- "EventName": "RESOURCE_STALLS.ANY",
- "PublicDescription": "Counts resource-related stall cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA2",
- "UMask": "0x8",
- "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
- "Counter": "0,1,2,3",
- "EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "Counter": "1",
+ "CounterHTOff": "1",
+ "Errata": "SKL091, SKL044",
+ "EventCode": "0xC0",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "PEBS": "2",
+ "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x1",
- "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xA8",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x4",
- "BriefDescription": "Total execution stalls.",
+ "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0xA3",
- "UMask": "0x5",
- "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "CounterMask": "5",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xA3",
- "UMask": "0x8",
- "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "CounterMask": "8",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x5E",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "Invert": "1",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0xc",
- "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "AnyThread": "1",
+ "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "CounterMask": "12",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "25003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x10",
- "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "CounterMask": "16",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x59",
+ "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+ "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA3",
- "UMask": "0x14",
- "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
"Counter": "0,1,2,3",
- "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
- "CounterMask": "20",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x0E",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA6",
- "UMask": "0x1",
- "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+ "BriefDescription": "Not taken branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "PublicDescription": "This event counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10"
},
{
- "EventCode": "0xA6",
- "UMask": "0x2",
- "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "3",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA6",
- "UMask": "0x4",
- "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA6",
- "UMask": "0x8",
- "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA6",
- "UMask": "0x10",
- "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
- "Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "Counter": "Fixed counter 2",
+ "CounterHTOff": "Fixed counter 2",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x3"
},
{
- "EventCode": "0xA6",
- "UMask": "0x40",
- "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "SampleAfterValue": "400009"
+ },
+ {
+ "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC1",
+ "EventName": "OTHER_ASSISTS.ANY",
+ "SampleAfterValue": "100003",
+ "UMask": "0x3f"
+ },
+ {
+ "BriefDescription": "Cycles without actually retired uops.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xC2",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "Invert": "1",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xA8",
- "UMask": "0x1",
"BriefDescription": "Number of Uops delivered by the LSD.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA8",
"EventName": "LSD.UOPS",
"PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xA8",
- "UMask": "0x1",
- "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
"Counter": "0,1,2,3",
- "EventName": "LSD.CYCLES_4_UOPS",
- "CounterMask": "4",
- "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "25003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xA8",
- "UMask": "0x1",
- "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
"Counter": "0,1,2,3",
- "EventName": "LSD.CYCLES_ACTIVE",
- "CounterMask": "1",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x87",
+ "EventName": "ILD_STALL.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
- "CounterMask": "4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "16",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "BriefDescription": "Taken branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
- "CounterMask": "3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "This event counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
- "CounterMask": "2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
- "CounterMask": "1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0E",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "Invert": "1",
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
- "Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.STALL_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "Counter": "Fixed counter 1",
+ "CounterHTOff": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xB1",
- "UMask": "0x1",
- "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "AnyThread": "1",
+ "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Number of uops to be executed per-thread each cycle.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "25003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Number of uops executed on the core.",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE",
- "PublicDescription": "Number of uops executed from any thread.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "PublicDescription": "This event counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2"
},
{
- "Invert": "1",
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
- "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xCC",
+ "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x40"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "BriefDescription": "Resource-related stall cycles",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa2",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "PublicDescription": "Counts resource-related stall cycles.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "CounterMask": "3",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "CounterMask": "2",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "25003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xB1",
- "UMask": "0x2",
- "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "4",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xB1",
- "UMask": "0x10",
- "BriefDescription": "Counts the number of x87 uops dispatched.",
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20"
},
{
- "EventCode": "0xC0",
- "UMask": "0x0",
- "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
"Counter": "0,1,2,3",
- "EventName": "INST_RETIRED.ANY_P",
- "Errata": "SKL091, SKL044",
- "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "CounterHTOff": "0,1,2,3",
+ "CounterMask": "20",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x14"
},
{
- "EventCode": "0xC0",
- "UMask": "0x1",
- "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
- "PEBS": "2",
- "Counter": "1",
- "EventName": "INST_RETIRED.PREC_DIST",
- "Errata": "SKL091, SKL044",
- "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
+ "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+ "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "1"
+ "UMask": "0x1"
},
{
- "Invert": "1",
- "EventCode": "0xC0",
- "UMask": "0x1",
"BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.",
- "PEBS": "2",
"Counter": "0,2,3",
- "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "CounterHTOff": "0,2,3",
"CounterMask": "10",
"Errata": "SKL091, SKL044",
+ "EventCode": "0xC0",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "Invert": "1",
+ "PEBS": "2",
"PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,2,3"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC1",
- "UMask": "0x3f",
- "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+ "BriefDescription": "Retirement slots used.",
"Counter": "0,1,2,3",
- "EventName": "OTHER_ASSISTS.ANY",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "PublicDescription": "Counts the retirement slots used.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "Invert": "1",
- "EventCode": "0xC2",
- "UMask": "0x2",
- "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "AnyThread": "1",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
- "CounterMask": "10",
- "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "SampleAfterValue": "2000003"
},
{
- "Invert": "1",
- "EventCode": "0xC2",
- "UMask": "0x2",
- "BriefDescription": "Cycles without actually retired uops.",
+ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_RETIRED.STALL_CYCLES",
- "CounterMask": "1",
- "PublicDescription": "This event counts cycles without actually retired uops.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0E",
+ "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+ "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0xC2",
- "UMask": "0x2",
- "BriefDescription": "Retirement slots used.",
+ "BriefDescription": "Increments whenever there is an update to the LBR array.",
"Counter": "0,1,2,3",
- "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
- "PublicDescription": "Counts the retirement slots used.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xCC",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EdgeDetect": "1",
- "EventCode": "0xC3",
- "UMask": "0x1",
- "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
"Counter": "0,1,2,3",
- "EventName": "MACHINE_CLEARS.COUNT",
- "CounterMask": "1",
- "PublicDescription": "Number of machine clears (nukes) of any type.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x5E",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
},
{
- "EventCode": "0xC3",
- "UMask": "0x4",
- "BriefDescription": "Self-modifying code (SMC) detected.",
+ "BriefDescription": "Return instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PEBS": "1",
+ "PublicDescription": "This event counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8"
},
{
- "EventCode": "0xC4",
- "UMask": "0x0",
- "BriefDescription": "All (macro) branch instructions retired.",
+ "BriefDescription": "Instructions retired from execution.",
+ "Counter": "Fixed counter 0",
+ "CounterHTOff": "Fixed counter 0",
+ "EventName": "INST_RETIRED.ANY",
+ "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
- "Errata": "SKL091",
- "PublicDescription": "Counts all (macro) branch instructions retired.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "2",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xC4",
- "UMask": "0x1",
- "BriefDescription": "Conditional branch instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.CONDITIONAL",
- "Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA2",
+ "EventName": "RESOURCE_STALLS.SB",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
},
{
- "EventCode": "0xC4",
- "UMask": "0x2",
- "BriefDescription": "Direct and indirect near call instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.NEAR_CALL",
- "Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0x3C",
+ "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+ "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+ "SampleAfterValue": "100007"
},
{
- "EventCode": "0xC4",
- "UMask": "0x4",
"BriefDescription": "All (macro) branch instructions retired.",
- "PEBS": "2",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "CounterHTOff": "0,1,2,3",
"Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "PEBS": "2",
"PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
"SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3"
+ "UMask": "0x4"
},
{
- "EventCode": "0xC4",
- "UMask": "0x8",
- "BriefDescription": "Return instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Mispredicted macro branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.NEAR_RETURN",
- "Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "PEBS": "2",
+ "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x4"
},
{
- "EventCode": "0xC4",
- "UMask": "0x10",
- "BriefDescription": "Counts all not taken macro branch instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.NOT_TAKEN",
- "Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xC4",
- "UMask": "0x20",
- "BriefDescription": "Taken branch instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Not taken branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "PublicDescription": "This event counts not taken branch instructions retired.",
"SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0xC4",
- "UMask": "0x40",
- "BriefDescription": "Counts the number of far branch instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Conditional branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"Errata": "SKL091",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xC5",
- "UMask": "0x0",
- "BriefDescription": "All mispredicted macro branch instructions retired.",
- "Counter": "0,1,2,3",
- "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "PEBS": "1",
+ "PublicDescription": "This event counts conditional branch instructions retired.",
"SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC5",
- "UMask": "0x1",
"BriefDescription": "Mispredicted conditional branch instructions retired.",
- "PEBS": "1",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xC5",
"EventName": "BR_MISP_RETIRED.CONDITIONAL",
- "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+ "PEBS": "1",
+ "PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
"SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xC5",
- "UMask": "0x2",
- "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
- "PEBS": "1",
+ "BriefDescription": "Number of uops executed on the core.",
"Counter": "0,1,2,3",
- "EventName": "BR_MISP_RETIRED.NEAR_CALL",
- "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "PublicDescription": "Number of uops executed from any thread.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0xC5",
- "UMask": "0x4",
- "BriefDescription": "Mispredicted macro branch instructions retired.",
- "PEBS": "2",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
- "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "12",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xc"
},
{
- "EventCode": "0xC5",
- "UMask": "0x20",
- "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
- "PEBS": "1",
+ "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
"Counter": "0,1,2,3",
- "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.",
- "SampleAfterValue": "400009",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xA6",
+ "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "EventCode": "0xCC",
- "UMask": "0x20",
- "BriefDescription": "Increments whenever there is an update to the LBR array.",
+ "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
"Counter": "0,1,2,3",
- "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x0D",
+ "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x80"
},
{
- "EventCode": "0xCC",
- "UMask": "0x40",
- "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
+ "BriefDescription": "All (macro) branch instructions retired.",
"Counter": "0,1,2,3",
- "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091",
+ "EventCode": "0xC4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "Counts all (macro) branch instructions retired.",
+ "SampleAfterValue": "400009"
+ },
+ {
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xB1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xE6",
- "UMask": "0x1",
- "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
"Counter": "0,1,2,3",
- "EventName": "BACLEARS.ANY",
- "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xA3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index b4f91137f40c..f31794d3b926 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -4,14 +4,14 @@
"MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
"MetricGroup": "TopdownL1",
"MetricName": "Frontend_Bound",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Frontend_Bound_SMT",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
@@ -22,13 +22,14 @@
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Bad_Speculation_SMT",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
"MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
"MetricGroup": "TopdownL1",
"MetricName": "Backend_Bound",
@@ -36,7 +37,7 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+ "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Backend_Bound_SMT",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
@@ -50,7 +51,7 @@
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TopdownL1_SMT",
"MetricName": "Retiring_SMT",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
@@ -58,7 +59,7 @@
{
"BriefDescription": "Instructions Per Cycle (per Logical Processor)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "TopDownL1",
+ "MetricGroup": "Summary",
"MetricName": "IPC"
},
{
@@ -74,24 +75,6 @@
"MetricName": "IpTB"
},
{
- "BriefDescription": "Branch instructions per taken branch. ",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;PGO",
- "MetricName": "BpTB"
- },
- {
- "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
- "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )",
- "MetricGroup": "PGO;IcMiss",
- "MetricName": "IFetch_Line_Utilization"
- },
- {
- "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
- "MetricGroup": "DSB;Fetch_BW",
- "MetricName": "DSB_Coverage"
- },
- {
"BriefDescription": "Cycles Per Instruction (per Logical Processor)",
"MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
"MetricGroup": "Pipeline;Summary",
@@ -104,86 +87,104 @@
"MetricName": "CLKS"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
"MetricExpr": "4 * cycles",
"MetricGroup": "TopDownL1",
"MetricName": "SLOTS"
},
{
- "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
- "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "TopDownL1_SMT",
"MetricName": "SLOTS_SMT"
},
{
- "BriefDescription": "Instructions per Load (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "Instruction_Type",
- "MetricName": "IpL"
+ "MetricName": "IpLoad"
},
{
- "BriefDescription": "Instructions per Store (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "Instruction_Type",
- "MetricName": "IpS"
+ "MetricName": "IpStore"
},
{
- "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Instruction_Type",
- "MetricName": "IpB"
+ "MetricName": "IpBranch"
},
{
- "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
+ "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches",
"MetricName": "IpCall"
},
{
+ "BriefDescription": "Branch instructions per taken branch. ",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;PGO",
+ "MetricName": "BpTkBranch"
+ },
+ {
+ "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+ "MetricGroup": "FLOPS;FP_Arith;Instruction_Type",
+ "MetricName": "IpFLOP"
+ },
+ {
"BriefDescription": "Total number of retired Instructions",
"MetricExpr": "INST_RETIRED.ANY",
- "MetricGroup": "Summary",
+ "MetricGroup": "Summary;TopDownL1",
"MetricName": "Instructions"
},
{
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+ "MetricGroup": "DSB;Fetch_BW",
+ "MetricName": "DSB_Coverage"
+ },
+ {
"BriefDescription": "Instructions Per Cycle (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / cycles",
- "MetricGroup": "SMT",
+ "MetricGroup": "SMT;TopDownL1",
"MetricName": "CoreIPC"
},
{
"BriefDescription": "Instructions Per Cycle (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
- "MetricGroup": "SMT",
+ "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "SMT;TopDownL1",
"MetricName": "CoreIPC_SMT"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles",
"MetricGroup": "FLOPS",
"MetricName": "FLOPc"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "FLOPS_SMT",
"MetricName": "FLOPc_SMT"
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Pipeline",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
+ "MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP"
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "BrMispredicts",
"MetricName": "Branch_Misprediction_Cost"
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "BrMispredicts_SMT",
"MetricName": "Branch_Misprediction_Cost_SMT"
},
@@ -213,14 +214,14 @@
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization",
- "MetricConstraint": "NO_NMI_WATCHDOG"
+ "MetricName": "Page_Walks_Utilization"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
"MetricGroup": "TLB_SMT",
"MetricName": "Page_Walks_Utilization_SMT"
},
@@ -245,7 +246,7 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;Offcore",
"MetricName": "L3_Cache_Access_BW"
},
{
@@ -263,7 +264,7 @@
{
"BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
- "MetricGroup": "Cache_Misses",
+ "MetricGroup": "Cache_Misses;Offcore",
"MetricName": "L2MPKI_All"
},
{
@@ -298,7 +299,7 @@
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
+ "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
"MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs"
},
@@ -310,44 +311,56 @@
},
{
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
- "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )",
"MetricGroup": "SMT;Summary",
"MetricName": "SMT_2T_Utilization"
},
{
- "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Summary",
+ "MetricGroup": "OS",
"MetricName": "Kernel_Utilization"
},
{
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
- "MetricGroup": "Memory_BW",
+ "MetricGroup": "Memory_BW;SoC",
"MetricName": "DRAM_BW_Use"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
- "MetricGroup": "Memory_Lat",
- "MetricName": "DRAM_Read_Latency"
+ "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+ "MetricGroup": "Memory_Lat;SoC",
+ "MetricName": "MEM_Read_Latency"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@",
- "MetricGroup": "Memory_BW",
- "MetricName": "DRAM_Parallel_Reads"
+ "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
+ "MetricGroup": "Memory_BW;SoC",
+ "MetricName": "MEM_Parallel_Reads"
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
+ "MetricGroup": "IO_BW;SoC;Server",
+ "MetricName": "IO_Write_BW"
+ },
+ {
+ "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
+ "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
+ "MetricGroup": "IO_BW;SoC;Server",
+ "MetricName": "IO_Read_BW"
},
{
"BriefDescription": "Socket actual clocks when any core is active on that socket",
"MetricExpr": "cha_0@event\\=0x0@",
- "MetricGroup": "",
+ "MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
- "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )",
+ "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )",
- "MetricGroup": "",
+ "MetricGroup": "Branches;OS",
"MetricName": "IpFarBranch"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
index 9c7e5f8beee2..b80b5d66385d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
@@ -94,17 +94,7 @@
"Unit": "iMC"
},
{
- "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
- "Counter": "0,1,2,3",
- "EventCode": "0x4",
- "EventName": "LLC_MISSES.MEM_READ",
- "PerPkg": "1",
- "ScaleUnit": "64Bytes",
- "UMask": "0x3",
- "Unit": "iMC"
- },
- {
- "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills) ",
+ "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills)",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "UNC_M_CAS_COUNT.RD_REG",
@@ -119,18 +109,18 @@
"EventCode": "0x4",
"EventName": "UNC_M_CAS_COUNT.RD_UNDERFILL",
"PerPkg": "1",
- "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request). ",
+ "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request).",
"UMask": "0x2",
"Unit": "iMC"
},
{
- "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+ "BriefDescription": "DRAM CAS (Column Address Strobe) Commands.; DRAM WR_CAS (w/ and w/out auto-pre) in Write Major Mode",
"Counter": "0,1,2,3",
"EventCode": "0x4",
- "EventName": "LLC_MISSES.MEM_WRITE",
+ "EventName": "UNC_M_CAS_COUNT.WR_WMM",
"PerPkg": "1",
- "ScaleUnit": "64Bytes",
- "UMask": "0xC",
+ "PublicDescription": "Counts the total number or DRAM Write CAS commands issued on this channel while in Write-Major-Mode.",
+ "UMask": "0x4",
"Unit": "iMC"
},
{
@@ -139,7 +129,7 @@
"EventCode": "0x10",
"EventName": "UNC_M_RPQ_INSERTS",
"PerPkg": "1",
- "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ. ",
+ "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ.",
"Unit": "iMC"
},
{
@@ -166,7 +156,7 @@
"EventCode": "0x81",
"EventName": "UNC_M_WPQ_OCCUPANCY",
"PerPkg": "1",
- "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests.",
+ "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happenning in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts???",
"Unit": "iMC"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
index adb42c72f5c8..d7a0270de983 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
@@ -119,18 +119,15 @@
"EventName": "UPI_DATA_BANDWIDTH_TX",
"PerPkg": "1",
"ScaleUnit": "7.11E-06Bytes",
- "UMask": "0x0F",
+ "UMask": "0xf",
"Unit": "UPI LL"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0",
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 0",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "LLC_MISSES.PCIE_READ",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
"FCMask": "0x07",
- "Filter": "ch_mask=0x1f",
- "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
- "MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
@@ -138,118 +135,117 @@
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0",
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 1",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "LLC_MISSES.PCIE_WRITE",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
"FCMask": "0x07",
- "Filter": "ch_mask=0x1f",
- "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
- "MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
- "PortMask": "0x01",
+ "PortMask": "0x02",
"ScaleUnit": "4Bytes",
- "UMask": "0x01",
+ "UMask": "0x04",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO, part 0",
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 2",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
"FCMask": "0x07",
- "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
- "MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
- "PortMask": "0x01",
+ "PortMask": "0x04",
"ScaleUnit": "4Bytes",
- "UMask": "0x01",
+ "UMask": "0x04",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO, part 1",
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 3",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
"FCMask": "0x07",
"PerPkg": "1",
- "PortMask": "0x02",
+ "PortMask": "0x08",
"ScaleUnit": "4Bytes",
- "UMask": "0x01",
+ "UMask": "0x04",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO, part 2",
+ "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
+ "EventName": "LLC_MISSES.PCIE_READ",
"FCMask": "0x07",
+ "Filter": "ch_mask=0x1f",
+ "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+ "MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
- "PortMask": "0x04",
+ "PortMask": "0x01",
"ScaleUnit": "4Bytes",
- "UMask": "0x01",
+ "UMask": "0x04",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth writing at IIO, part 3",
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 0",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
"FCMask": "0x07",
"PerPkg": "1",
- "PortMask": "0x08",
+ "PortMask": "0x01",
"ScaleUnit": "4Bytes",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO, part 0",
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 1",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
"FCMask": "0x07",
- "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
- "MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
- "PortMask": "0x01",
+ "PortMask": "0x02",
"ScaleUnit": "4Bytes",
- "UMask": "0x04",
+ "UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO, part 1",
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 2",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
"FCMask": "0x07",
"PerPkg": "1",
- "PortMask": "0x02",
+ "PortMask": "0x04",
"ScaleUnit": "4Bytes",
- "UMask": "0x04",
+ "UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO, part 2",
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 3",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
"FCMask": "0x07",
"PerPkg": "1",
- "PortMask": "0x04",
+ "PortMask": "0x08",
"ScaleUnit": "4Bytes",
- "UMask": "0x04",
+ "UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "PCI Express bandwidth reading at IIO, part 3",
+ "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0",
"Counter": "0,1",
"EventCode": "0x83",
- "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+ "EventName": "LLC_MISSES.PCIE_WRITE",
"FCMask": "0x07",
+ "Filter": "ch_mask=0x1f",
+ "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+ "MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
- "PortMask": "0x08",
+ "PortMask": "0x01",
"ScaleUnit": "4Bytes",
- "UMask": "0x04",
+ "UMask": "0x01",
"Unit": "IIO"
},
{
@@ -313,6 +309,16 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "FaST wire asserted; Horizontal",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xA5",
+ "EventName": "UNC_CHA_FAST_ASSERTED.HORZ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles either the local or incoming distress signals are asserted. Incoming distress includes up, dn and across.",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "Read request from a remote socket which hit in the HitMe Cache to a line In the E state",
"Counter": "0,1,2,3",
"EventCode": "0x5F",
@@ -343,6 +349,46 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "Lines Victimized; Lines in E state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_E",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Lines Victimized; Lines in F State",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_F",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x08",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Lines Victimized; Lines in M state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_M",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Lines Victimized; Lines in S State",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_S",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "Number of times that an RFO hit in S state.",
"Counter": "0,1,2,3",
"EventCode": "0x39",
@@ -373,6 +419,65 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "Ingress (from CMS) Allocations; IRQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x13",
+ "EventName": "UNC_CHA_RxC_INSERTS.IRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts number of allocations per cycle into the specified Ingress queue.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Ingress (from CMS) Request Queue Rejects; PhyAddr Match",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x19",
+ "EventName": "UNC_CHA_RxC_IRQ1_REJECT.PA_MATCH",
+ "PerPkg": "1",
+ "PublicDescription": "Ingress (from CMS) Request Queue Rejects; PhyAddr Match",
+ "UMask": "0x80",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Ingress (from CMS) Occupancy; IRQ",
+ "EventCode": "0x11",
+ "EventName": "UNC_CHA_RxC_OCCUPANCY.IRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts number of entries in the specified Ingress queue in each cycle.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Snoop filter capacity evictions for E-state entries.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3D",
+ "EventName": "UNC_CHA_SF_EVICTION.E_STATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts snoop filter capacity evictions for entries tracking exclusive lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Snoop filter capacity evictions for M-state entries.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3D",
+ "EventName": "UNC_CHA_SF_EVICTION.M_STATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts snoop filter capacity evictions for entries tracking modified lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Snoop filter capacity evictions for S-state entries.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3D",
+ "EventName": "UNC_CHA_SF_EVICTION.S_STATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts snoop filter capacity evictions for entries tracking shared lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.",
+ "UMask": "0x04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "RspCnflct* Snoop Responses Received",
"Counter": "0,1,2,3",
"EventCode": "0x5C",
@@ -449,7 +554,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x01",
- "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -461,7 +566,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
- "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -473,7 +578,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
- "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -485,7 +590,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
- "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -497,7 +602,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x01",
- "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
@@ -509,35 +614,227 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
- "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU ",
+ "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU",
"Counter": "2,3",
"EventCode": "0xC0",
"EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
- "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU ",
+ "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU",
"Counter": "2,3",
"EventCode": "0xC0",
"EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
- "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
+ "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part0",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part0. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part1",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part1. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part2",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part2. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part3",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part3. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part0 by a different IIO unit",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part1 by a different IIO unit",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part2 by a different IIO unit",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part3 by a different IIO unit",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part1 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part2 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part3 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part0",
"Counter": "0,1,2,3",
"EventCode": "0xC1",
@@ -545,7 +842,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x01",
- "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -557,7 +854,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
- "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -569,7 +866,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
- "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -581,7 +878,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
- "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
"UMask": "0x04",
"Unit": "IIO"
},
@@ -593,7 +890,7 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x01",
- "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
@@ -605,35 +902,131 @@
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x02",
- "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU ",
+ "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU",
"Counter": "0,1,2,3",
"EventCode": "0xC1",
"EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x04",
- "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
- "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU ",
+ "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU",
"Counter": "0,1,2,3",
"EventCode": "0xC1",
"EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3",
"FCMask": "0x07",
"PerPkg": "1",
"PortMask": "0x08",
- "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
"UMask": "0x01",
"Unit": "IIO"
},
{
+ "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part0. Does not include requests made by the same IIO unit. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part1",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part1. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part2. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part3",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part3. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part0 by a different IIO unit",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part1 by a different IIO unit",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part2 by a different IIO unit",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part3 by a different IIO unit",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part0 to Memory",
"Counter": "0,1,2,3",
"EventCode": "0x84",
@@ -730,6 +1123,102 @@
"Unit": "IIO"
},
{
+ "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part0 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part1 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part2 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part3 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x08",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part0 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part1 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part1 to the MMIO space of an IIO target.In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part2 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part3 to an IIO target",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x02",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "Traffic in which the M2M to iMC Bypass was not taken",
"Counter": "0,1,2,3",
"EventCode": "0x22",
@@ -813,7 +1302,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state) ",
+ "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state)",
"Counter": "0,1,2,3",
"EventCode": "0x2D",
"EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_A",
@@ -823,7 +1312,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state) ",
+ "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state)",
"Counter": "0,1,2,3",
"EventCode": "0x2D",
"EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_I",
@@ -833,7 +1322,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state) ",
+ "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state)",
"Counter": "0,1,2,3",
"EventCode": "0x2D",
"EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_S",
@@ -863,7 +1352,7 @@
"Unit": "M2M"
},
{
- "BriefDescription": "Multi-socket cacheline Directory update from/to Any state ",
+ "BriefDescription": "Multi-socket cacheline Directory update from/to Any state",
"Counter": "0,1,2,3",
"EventCode": "0x2E",
"EventName": "UNC_M2M_DIRECTORY_UPDATE.ANY",
@@ -918,7 +1407,7 @@
"EventCode": "0x37",
"EventName": "UNC_M2M_IMC_READS.ALL",
"PerPkg": "1",
- "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller). ",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller).",
"UMask": "0x4",
"Unit": "M2M"
},
@@ -943,6 +1432,16 @@
"Unit": "M2M"
},
{
+ "BriefDescription": "M2M Writes Issued to iMC; All, regardless of priority.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x38",
+ "EventName": "UNC_M2M_IMC_WRITES.NI",
+ "PerPkg": "1",
+ "PublicDescription": "M2M Writes Issued to iMC; All, regardless of priority.",
+ "UMask": "0x80",
+ "Unit": "M2M"
+ },
+ {
"BriefDescription": "Partial Non-Isochronous writes to the iMC",
"Counter": "0,1,2,3",
"EventCode": "0x38",
@@ -976,12 +1475,77 @@
"EventCode": "0x1",
"EventName": "UNC_M2M_RxC_AD_INSERTS",
"PerPkg": "1",
- "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and ",
+ "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and",
"Unit": "M2M"
},
{
- "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.",
+ "BriefDescription": "AD Ingress (from CMS) Occupancy",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M2M_RxC_AD_OCCUPANCY",
+ "PerPkg": "1",
+ "PublicDescription": "AD Ingress (from CMS) Occupancy",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "BL Ingress (from CMS) Allocations",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_M2M_RxC_BL_INSERTS",
+ "PerPkg": "1",
+ "PublicDescription": "BL Ingress (from CMS) Allocations",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "BL Ingress (from CMS) Occupancy",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_M2M_RxC_BL_OCCUPANCY",
+ "PerPkg": "1",
+ "PublicDescription": "BL Ingress (from CMS) Occupancy",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "AD Egress (to CMS) Allocations",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x9",
+ "EventName": "UNC_M2M_TxC_AD_INSERTS",
+ "PerPkg": "1",
+ "PublicDescription": "AD Egress (to CMS) Allocations",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "AD Egress (to CMS) Occupancy",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xA",
+ "EventName": "UNC_M2M_TxC_AD_OCCUPANCY",
+ "PerPkg": "1",
+ "PublicDescription": "AD Egress (to CMS) Occupancy",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "BL Egress (to CMS) Allocations; All",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x15",
+ "EventName": "UNC_M2M_TxC_BL_INSERTS.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "BL Egress (to CMS) Allocations; All",
+ "UMask": "0x03",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "BL Egress (to CMS) Occupancy; All",
"Counter": "0,1,2,3",
+ "EventCode": "0x16",
+ "EventName": "UNC_M2M_TxC_BL_OCCUPANCY.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "BL Egress (to CMS) Occupancy; All",
+ "UMask": "0x03",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.",
+ "Counter": "0,1,2",
"EventCode": "0x29",
"EventName": "UNC_M3UPI_UPI_PREFETCH_SPAWN",
"PerPkg": "1",
@@ -1114,23 +1678,23 @@
"Unit": "UPI LL"
},
{
- "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data",
+ "BriefDescription": "Null FLITs transmitted from any slot",
"Counter": "0,1,2,3",
"EventCode": "0x2",
- "EventName": "UPI_DATA_BANDWIDTH_TX",
+ "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL",
"PerPkg": "1",
- "ScaleUnit": "7.11E-06Bytes",
- "UMask": "0x0F",
+ "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) transmitted via any of the 3 Intel Ulra Path Interconnect (UPI) slots on this UPI unit.",
+ "UMask": "0x27",
"Unit": "UPI LL"
},
{
- "BriefDescription": "Null FLITs transmitted from any slot",
+ "BriefDescription": "Valid Flits Sent; Data",
"Counter": "0,1,2,3",
"EventCode": "0x2",
- "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL",
+ "EventName": "UNC_UPI_TxL_FLITS.DATA",
"PerPkg": "1",
- "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) transmitted via any of the 3 Intel Ulra Path Interconnect (UPI) slots on this UPI unit.",
- "UMask": "0x27",
+ "PublicDescription": "Shows legal flit time (hides impact of L0p and L0c).; Count Data Flits (which consume all slots), but how much to count is based on Slot0-2 mask, so count can be 0-3 depending on which slots are enabled for counting..",
+ "UMask": "0x8",
"Unit": "UPI LL"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
index 7f466c97e485..bbeee1058096 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
@@ -1,284 +1,284 @@
[
{
- "EventCode": "0x08",
- "UMask": "0x1",
- "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+ "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
- "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.STLB_HIT",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0x08",
- "UMask": "0x2",
- "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
},
{
- "EventCode": "0x08",
- "UMask": "0x4",
- "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
+ "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x08",
- "UMask": "0x8",
- "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
},
{
- "EventCode": "0x08",
- "UMask": "0xe",
- "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0x08",
- "UMask": "0x10",
- "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xAE",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
},
{
- "EventCode": "0x08",
- "UMask": "0x10",
- "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x08",
- "UMask": "0x20",
"BriefDescription": "Loads that miss the DTLB and hit the STLB.",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
"PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0x49",
- "UMask": "0x1",
- "BriefDescription": "Store misses in all DTLB levels that cause page walks",
- "Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
- "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
},
{
- "EventCode": "0x49",
- "UMask": "0x2",
- "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
- "Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0x49",
- "UMask": "0x4",
- "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0x49",
- "UMask": "0x8",
- "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
- "Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0x49",
- "UMask": "0xe",
- "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xBD",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x1"
},
{
- "EventCode": "0x49",
- "UMask": "0x10",
- "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
"SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x49",
- "UMask": "0x10",
"BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
"PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x10"
},
{
- "EventCode": "0x49",
- "UMask": "0x20",
- "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
"Counter": "0,1,2,3",
- "EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0x4F",
- "UMask": "0x10",
- "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
+ "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
"Counter": "0,1,2,3",
- "EventName": "EPT.WALK_PENDING",
- "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
- "SampleAfterValue": "2000003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "SampleAfterValue": "100003",
+ "UMask": "0x20"
},
{
- "EventCode": "0x85",
- "UMask": "0x1",
- "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
- "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0xe"
},
{
- "EventCode": "0x85",
- "UMask": "0x2",
- "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0xe"
},
{
- "EventCode": "0x85",
- "UMask": "0x4",
- "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x2"
},
{
- "EventCode": "0x85",
- "UMask": "0x8",
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
"EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
"PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x8"
},
{
- "EventCode": "0x85",
- "UMask": "0xe",
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
"PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0xe"
},
{
- "EventCode": "0x85",
- "UMask": "0x10",
- "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
},
{
- "EventCode": "0x85",
- "UMask": "0x10",
- "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
+ "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.WALK_ACTIVE",
- "CounterMask": "1",
- "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
- "SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
},
{
- "EventCode": "0x85",
- "UMask": "0x20",
- "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks",
"Counter": "0,1,2,3",
- "EventName": "ITLB_MISSES.STLB_HIT",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
"SampleAfterValue": "100003",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
- },
- {
- "EventCode": "0xAE",
- "UMask": "0x1",
- "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
- "Counter": "0,1,2,3",
- "EventName": "ITLB.ITLB_FLUSH",
- "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x1"
},
{
- "EventCode": "0xBD",
- "UMask": "0x1",
- "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
"Counter": "0,1,2,3",
- "EventName": "TLB_FLUSH.DTLB_THREAD",
- "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
- "SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x4F",
+ "EventName": "EPT.WALK_PENDING",
+ "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
},
{
- "EventCode": "0xBD",
- "UMask": "0x20",
"BriefDescription": "STLB flush attempts",
"Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xBD",
"EventName": "TLB_FLUSH.STLB_ANY",
"PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
"SampleAfterValue": "100007",
- "CounterHTOff": "0,1,2,3,4,5,6,7"
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x8"
}
] \ No newline at end of file
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index fa86c5f997cc..e47644cab3fa 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -48,11 +48,40 @@
#include <linux/list.h>
#include "jsmn.h"
#include "json.h"
-#include "jevents.h"
+#include "pmu-events.h"
int verbose;
char *prog;
+struct json_event {
+ char *name;
+ char *event;
+ char *desc;
+ char *long_desc;
+ char *pmu;
+ char *unit;
+ char *perpkg;
+ char *aggr_mode;
+ char *metric_expr;
+ char *metric_name;
+ char *metric_group;
+ char *deprecated;
+ char *metric_constraint;
+};
+
+enum aggr_mode_class convert(const char *aggr_mode)
+{
+ if (!strcmp(aggr_mode, "PerCore"))
+ return PerCore;
+ else if (!strcmp(aggr_mode, "PerChip"))
+ return PerChip;
+
+ pr_err("%s: Wrong AggregationMode value '%s'\n", prog, aggr_mode);
+ return -1;
+}
+
+typedef int (*func)(void *data, struct json_event *je);
+
int eprintf(int level, int var, const char *fmt, ...)
{
@@ -71,11 +100,6 @@ int eprintf(int level, int var, const char *fmt, ...)
return ret;
}
-__attribute__((weak)) char *get_cpu_str(void)
-{
- return NULL;
-}
-
static void addfield(char *map, char **dst, const char *sep,
const char *a, jsmntok_t *bt)
{
@@ -137,7 +161,7 @@ static char *fixregex(char *s)
return s;
/* allocate space for a new string */
- fixed = (char *) malloc(len + 1);
+ fixed = (char *) malloc(len + esc_count + 1);
if (!fixed)
return NULL;
@@ -240,6 +264,7 @@ static struct map {
{ "hisi_sccl,hha", "hisi_sccl,hha" },
{ "hisi_sccl,l3c", "hisi_sccl,l3c" },
{ "L3PMC", "amd_l3" },
+ { "DFPMC", "amd_df" },
{}
};
@@ -318,12 +343,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname)
close_table = 1;
}
-static int print_events_table_entry(void *data, char *name, char *event,
- char *desc, char *long_desc,
- char *pmu, char *unit, char *perpkg,
- char *metric_expr,
- char *metric_name, char *metric_group,
- char *deprecated, char *metric_constraint)
+static int print_events_table_entry(void *data, struct json_event *je)
{
struct perf_entry_data *pd = data;
FILE *outfp = pd->outfp;
@@ -335,30 +355,32 @@ static int print_events_table_entry(void *data, char *name, char *event,
*/
fprintf(outfp, "{\n");
- if (name)
- fprintf(outfp, "\t.name = \"%s\",\n", name);
- if (event)
- fprintf(outfp, "\t.event = \"%s\",\n", event);
- fprintf(outfp, "\t.desc = \"%s\",\n", desc);
+ if (je->name)
+ fprintf(outfp, "\t.name = \"%s\",\n", je->name);
+ if (je->event)
+ fprintf(outfp, "\t.event = \"%s\",\n", je->event);
+ fprintf(outfp, "\t.desc = \"%s\",\n", je->desc);
fprintf(outfp, "\t.topic = \"%s\",\n", topic);
- if (long_desc && long_desc[0])
- fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc);
- if (pmu)
- fprintf(outfp, "\t.pmu = \"%s\",\n", pmu);
- if (unit)
- fprintf(outfp, "\t.unit = \"%s\",\n", unit);
- if (perpkg)
- fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg);
- if (metric_expr)
- fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr);
- if (metric_name)
- fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name);
- if (metric_group)
- fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
- if (deprecated)
- fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated);
- if (metric_constraint)
- fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint);
+ if (je->long_desc && je->long_desc[0])
+ fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc);
+ if (je->pmu)
+ fprintf(outfp, "\t.pmu = \"%s\",\n", je->pmu);
+ if (je->unit)
+ fprintf(outfp, "\t.unit = \"%s\",\n", je->unit);
+ if (je->perpkg)
+ fprintf(outfp, "\t.perpkg = \"%s\",\n", je->perpkg);
+ if (je->aggr_mode)
+ fprintf(outfp, "\t.aggr_mode = \"%d\",\n", convert(je->aggr_mode));
+ if (je->metric_expr)
+ fprintf(outfp, "\t.metric_expr = \"%s\",\n", je->metric_expr);
+ if (je->metric_name)
+ fprintf(outfp, "\t.metric_name = \"%s\",\n", je->metric_name);
+ if (je->metric_group)
+ fprintf(outfp, "\t.metric_group = \"%s\",\n", je->metric_group);
+ if (je->deprecated)
+ fprintf(outfp, "\t.deprecated = \"%s\",\n", je->deprecated);
+ if (je->metric_constraint)
+ fprintf(outfp, "\t.metric_constraint = \"%s\",\n", je->metric_constraint);
fprintf(outfp, "},\n");
return 0;
@@ -373,6 +395,7 @@ struct event_struct {
char *pmu;
char *unit;
char *perpkg;
+ char *aggr_mode;
char *metric_expr;
char *metric_name;
char *metric_group;
@@ -380,17 +403,17 @@ struct event_struct {
char *metric_constraint;
};
-#define ADD_EVENT_FIELD(field) do { if (field) { \
- es->field = strdup(field); \
+#define ADD_EVENT_FIELD(field) do { if (je->field) { \
+ es->field = strdup(je->field); \
if (!es->field) \
goto out_free; \
} } while (0)
#define FREE_EVENT_FIELD(field) free(es->field)
-#define TRY_FIXUP_FIELD(field) do { if (es->field && !*field) {\
- *field = strdup(es->field); \
- if (!*field) \
+#define TRY_FIXUP_FIELD(field) do { if (es->field && !je->field) {\
+ je->field = strdup(es->field); \
+ if (!je->field) \
return -ENOMEM; \
} } while (0)
@@ -402,6 +425,7 @@ struct event_struct {
op(pmu); \
op(unit); \
op(perpkg); \
+ op(aggr_mode); \
op(metric_expr); \
op(metric_name); \
op(metric_group); \
@@ -421,11 +445,7 @@ static void free_arch_std_events(void)
}
}
-static int save_arch_std_events(void *data, char *name, char *event,
- char *desc, char *long_desc, char *pmu,
- char *unit, char *perpkg, char *metric_expr,
- char *metric_name, char *metric_group,
- char *deprecated, char *metric_constraint)
+static int save_arch_std_events(void *data, struct json_event *je)
{
struct event_struct *es;
@@ -485,23 +505,15 @@ static char *real_event(const char *name, char *event)
}
static int
-try_fixup(const char *fn, char *arch_std, char **event, char **desc,
- char **name, char **long_desc, char **pmu, char **filter,
- char **perpkg, char **unit, char **metric_expr, char **metric_name,
- char **metric_group, unsigned long long eventcode,
- char **deprecated, char **metric_constraint)
+try_fixup(const char *fn, char *arch_std, struct json_event *je, char **event)
{
/* try to find matching event from arch standard values */
struct event_struct *es;
list_for_each_entry(es, &arch_std_events, list) {
if (!strcmp(arch_std, es->name)) {
- if (!eventcode && es->event) {
- /* allow EventCode to be overridden */
- free(*event);
- *event = NULL;
- }
FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD);
+ *event = je->event;
return 0;
}
}
@@ -512,14 +524,9 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc,
}
/* Call func with each event in the json file */
-int json_events(const char *fn,
- int (*func)(void *data, char *name, char *event, char *desc,
- char *long_desc,
- char *pmu, char *unit, char *perpkg,
- char *metric_expr,
- char *metric_name, char *metric_group,
- char *deprecated, char *metric_constraint),
- void *data)
+static int json_events(const char *fn,
+ int (*func)(void *data, struct json_event *je),
+ void *data)
{
int err;
size_t size;
@@ -537,18 +544,10 @@ int json_events(const char *fn,
EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array");
tok = tokens + 1;
for (i = 0; i < tokens->size; i++) {
- char *event = NULL, *desc = NULL, *name = NULL;
- char *long_desc = NULL;
+ char *event = NULL;
char *extra_desc = NULL;
- char *pmu = NULL;
char *filter = NULL;
- char *perpkg = NULL;
- char *unit = NULL;
- char *metric_expr = NULL;
- char *metric_name = NULL;
- char *metric_group = NULL;
- char *deprecated = NULL;
- char *metric_constraint = NULL;
+ struct json_event je = {};
char *arch_std = NULL;
unsigned long long eventcode = 0;
struct msrmap *msr = NULL;
@@ -583,14 +582,14 @@ int json_events(const char *fn,
eventcode |= strtoul(code, NULL, 0) << 21;
free(code);
} else if (json_streq(map, field, "EventName")) {
- addfield(map, &name, "", "", val);
+ addfield(map, &je.name, "", "", val);
} else if (json_streq(map, field, "BriefDescription")) {
- addfield(map, &desc, "", "", val);
- fixdesc(desc);
+ addfield(map, &je.desc, "", "", val);
+ fixdesc(je.desc);
} else if (json_streq(map, field,
"PublicDescription")) {
- addfield(map, &long_desc, "", "", val);
- fixdesc(long_desc);
+ addfield(map, &je.long_desc, "", "", val);
+ fixdesc(je.long_desc);
} else if (json_streq(map, field, "PEBS") && nz) {
precise = val;
} else if (json_streq(map, field, "MSRIndex") && nz) {
@@ -610,34 +609,36 @@ int json_events(const char *fn,
ppmu = field_to_perf(unit_to_pmu, map, val);
if (ppmu) {
- pmu = strdup(ppmu);
+ je.pmu = strdup(ppmu);
} else {
- if (!pmu)
- pmu = strdup("uncore_");
- addfield(map, &pmu, "", "", val);
- for (s = pmu; *s; s++)
+ if (!je.pmu)
+ je.pmu = strdup("uncore_");
+ addfield(map, &je.pmu, "", "", val);
+ for (s = je.pmu; *s; s++)
*s = tolower(*s);
}
- addfield(map, &desc, ". ", "Unit: ", NULL);
- addfield(map, &desc, "", pmu, NULL);
- addfield(map, &desc, "", " ", NULL);
+ addfield(map, &je.desc, ". ", "Unit: ", NULL);
+ addfield(map, &je.desc, "", je.pmu, NULL);
+ addfield(map, &je.desc, "", " ", NULL);
} else if (json_streq(map, field, "Filter")) {
addfield(map, &filter, "", "", val);
} else if (json_streq(map, field, "ScaleUnit")) {
- addfield(map, &unit, "", "", val);
+ addfield(map, &je.unit, "", "", val);
} else if (json_streq(map, field, "PerPkg")) {
- addfield(map, &perpkg, "", "", val);
+ addfield(map, &je.perpkg, "", "", val);
+ } else if (json_streq(map, field, "AggregationMode")) {
+ addfield(map, &je.aggr_mode, "", "", val);
} else if (json_streq(map, field, "Deprecated")) {
- addfield(map, &deprecated, "", "", val);
+ addfield(map, &je.deprecated, "", "", val);
} else if (json_streq(map, field, "MetricName")) {
- addfield(map, &metric_name, "", "", val);
+ addfield(map, &je.metric_name, "", "", val);
} else if (json_streq(map, field, "MetricGroup")) {
- addfield(map, &metric_group, "", "", val);
+ addfield(map, &je.metric_group, "", "", val);
} else if (json_streq(map, field, "MetricConstraint")) {
- addfield(map, &metric_constraint, "", "", val);
+ addfield(map, &je.metric_constraint, "", "", val);
} else if (json_streq(map, field, "MetricExpr")) {
- addfield(map, &metric_expr, "", "", val);
- for (s = metric_expr; *s; s++)
+ addfield(map, &je.metric_expr, "", "", val);
+ for (s = je.metric_expr; *s; s++)
*s = tolower(*s);
} else if (json_streq(map, field, "ArchStdEvent")) {
addfield(map, &arch_std, "", "", val);
@@ -646,7 +647,7 @@ int json_events(const char *fn,
}
/* ignore unknown fields */
}
- if (precise && desc && !strstr(desc, "(Precise Event)")) {
+ if (precise && je.desc && !strstr(je.desc, "(Precise Event)")) {
if (json_streq(map, precise, "2"))
addfield(map, &extra_desc, " ",
"(Must be precise)", NULL);
@@ -656,48 +657,44 @@ int json_events(const char *fn,
}
snprintf(buf, sizeof buf, "event=%#llx", eventcode);
addfield(map, &event, ",", buf, NULL);
- if (desc && extra_desc)
- addfield(map, &desc, " ", extra_desc, NULL);
- if (long_desc && extra_desc)
- addfield(map, &long_desc, " ", extra_desc, NULL);
+ if (je.desc && extra_desc)
+ addfield(map, &je.desc, " ", extra_desc, NULL);
+ if (je.long_desc && extra_desc)
+ addfield(map, &je.long_desc, " ", extra_desc, NULL);
if (filter)
addfield(map, &event, ",", filter, NULL);
if (msr != NULL)
addfield(map, &event, ",", msr->pname, msrval);
- if (name)
- fixname(name);
+ if (je.name)
+ fixname(je.name);
if (arch_std) {
/*
* An arch standard event is referenced, so try to
* fixup any unassigned values.
*/
- err = try_fixup(fn, arch_std, &event, &desc, &name,
- &long_desc, &pmu, &filter, &perpkg,
- &unit, &metric_expr, &metric_name,
- &metric_group, eventcode,
- &deprecated, &metric_constraint);
+ err = try_fixup(fn, arch_std, &je, &event);
if (err)
goto free_strings;
}
- err = func(data, name, real_event(name, event), desc, long_desc,
- pmu, unit, perpkg, metric_expr, metric_name,
- metric_group, deprecated, metric_constraint);
+ je.event = real_event(je.name, event);
+ err = func(data, &je);
free_strings:
free(event);
- free(desc);
- free(name);
- free(long_desc);
+ free(je.desc);
+ free(je.name);
+ free(je.long_desc);
free(extra_desc);
- free(pmu);
+ free(je.pmu);
free(filter);
- free(perpkg);
- free(deprecated);
- free(unit);
- free(metric_expr);
- free(metric_name);
- free(metric_group);
- free(metric_constraint);
+ free(je.perpkg);
+ free(je.aggr_mode);
+ free(je.deprecated);
+ free(je.unit);
+ free(je.metric_expr);
+ free(je.metric_name);
+ free(je.metric_group);
+ free(je.metric_constraint);
free(arch_std);
if (err)
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
deleted file mode 100644
index 2afc8304529e..000000000000
--- a/tools/perf/pmu-events/jevents.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef JEVENTS_H
-#define JEVENTS_H 1
-
-int json_events(const char *fn,
- int (*func)(void *data, char *name, char *event, char *desc,
- char *long_desc,
- char *pmu,
- char *unit, char *perpkg, char *metric_expr,
- char *metric_name, char *metric_group,
- char *deprecated, char *metric_constraint),
- void *data);
-char *get_cpu_str(void);
-
-#ifndef min
-#define min(x, y) ({ \
- typeof(x) _min1 = (x); \
- typeof(y) _min2 = (y); \
- (void) (&_min1 == &_min2); \
- _min1 < _min2 ? _min1 : _min2; })
-#endif
-
-#endif
diff --git a/tools/perf/pmu-events/jsmn.h b/tools/perf/pmu-events/jsmn.h
index c7b0f6ea2a31..1bdfd55fff30 100644
--- a/tools/perf/pmu-events/jsmn.h
+++ b/tools/perf/pmu-events/jsmn.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: MIT */
#ifndef __JSMN_H_
#define __JSMN_H_
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index 53e76d5d5b37..7da1a3743b77 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -2,6 +2,11 @@
#ifndef PMU_EVENTS_H
#define PMU_EVENTS_H
+enum aggr_mode_class {
+ PerChip = 1,
+ PerCore
+};
+
/*
* Describe each PMU event. Each CPU has a table of PMU events.
*/
@@ -14,6 +19,7 @@ struct pmu_event {
const char *pmu;
const char *unit;
const char *perpkg;
+ const char *aggr_mode;
const char *metric_expr;
const char *metric_name;
const char *metric_group;
@@ -26,7 +32,7 @@ struct pmu_event {
* Map a CPU to its table of PMU events. The CPU is identified by the
* cpuid field, which is an arch-specific identifier for the CPU.
* The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
- * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c)
+ * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
*
* The cpuid can contain any character other than the comma.
*/
diff --git a/tools/perf/scripts/python/bin/flamegraph-record b/tools/perf/scripts/python/bin/flamegraph-record
new file mode 100755
index 000000000000..7df5a19c0163
--- /dev/null
+++ b/tools/perf/scripts/python/bin/flamegraph-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -g "$@"
diff --git a/tools/perf/scripts/python/bin/flamegraph-report b/tools/perf/scripts/python/bin/flamegraph-report
new file mode 100755
index 000000000000..53c5dc90c87e
--- /dev/null
+++ b/tools/perf/scripts/python/bin/flamegraph-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: create flame graphs
+perf script -s "$PERF_EXEC_PATH"/scripts/python/flamegraph.py -- "$@"
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 7bd73a904b4e..d187e46c2683 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -1055,7 +1055,7 @@ def cbr(id, raw_buf):
cbr = data[0]
MHz = (data[4] + 500) / 1000
percent = ((cbr * 1000 / data[2]) + 5) / 10
- value = struct.pack("!hiqiiiiii", 4, 8, id, 4, cbr, 4, MHz, 4, percent)
+ value = struct.pack("!hiqiiiiii", 4, 8, id, 4, cbr, 4, int(MHz), 4, int(percent))
cbr_file.write(value)
def mwait(id, raw_buf):
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 26d7be785288..7daa8bb70a5a 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -768,7 +768,8 @@ class CallGraphModel(CallGraphModelBase):
" FROM calls"
" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
- " WHERE symbols.name" + match +
+ " WHERE calls.id <> 0"
+ " AND symbols.name" + match +
" GROUP BY comm_id, thread_id, call_path_id"
" ORDER BY comm_id, thread_id, call_path_id")
@@ -963,7 +964,8 @@ class CallTreeModel(CallGraphModelBase):
" FROM calls"
" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
- " WHERE symbols.name" + match +
+ " WHERE calls.id <> 0"
+ " AND symbols.name" + match +
" ORDER BY comm_id, thread_id, call_time, calls.id")
def FindPath(self, query):
@@ -1050,6 +1052,7 @@ class TreeWindowBase(QMdiSubWindow):
child = self.model.index(row, 0, parent)
if child.internalPointer().dbid == dbid:
found = True
+ self.view.setExpanded(parent, True)
self.view.setCurrentIndex(child)
parent = child
break
@@ -1127,6 +1130,7 @@ class CallTreeWindow(TreeWindowBase):
child = self.model.index(row, 0, parent)
if child.internalPointer().dbid == dbid:
found = True
+ self.view.setExpanded(parent, True)
self.view.setCurrentIndex(child)
parent = child
break
@@ -1139,6 +1143,7 @@ class CallTreeWindow(TreeWindowBase):
return
last_child = None
for row in xrange(n):
+ self.view.setExpanded(parent, True)
child = self.model.index(row, 0, parent)
child_call_time = child.internalPointer().call_time
if child_call_time < time:
@@ -1151,9 +1156,11 @@ class CallTreeWindow(TreeWindowBase):
if not last_child:
if not found:
child = self.model.index(0, 0, parent)
+ self.view.setExpanded(parent, True)
self.view.setCurrentIndex(child)
return
found = True
+ self.view.setExpanded(parent, True)
self.view.setCurrentIndex(last_child)
parent = last_child
diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py
new file mode 100755
index 000000000000..65780013f745
--- /dev/null
+++ b/tools/perf/scripts/python/flamegraph.py
@@ -0,0 +1,126 @@
+# flamegraph.py - create flame graphs from perf samples
+# SPDX-License-Identifier: GPL-2.0
+#
+# Usage:
+#
+# perf record -a -g -F 99 sleep 60
+# perf script report flamegraph
+#
+# Combined:
+#
+# perf script flamegraph -a -F 99 sleep 60
+#
+# Written by Andreas Gerstmayr <agerstmayr@redhat.com>
+# Flame Graphs invented by Brendan Gregg <bgregg@netflix.com>
+# Works in tandem with d3-flame-graph by Martin Spier <mspier@netflix.com>
+
+from __future__ import print_function
+import sys
+import os
+import io
+import argparse
+import json
+
+
+class Node:
+ def __init__(self, name, libtype=""):
+ self.name = name
+ self.libtype = libtype
+ self.value = 0
+ self.children = []
+
+ def toJSON(self):
+ return {
+ "n": self.name,
+ "l": self.libtype,
+ "v": self.value,
+ "c": self.children
+ }
+
+
+class FlameGraphCLI:
+ def __init__(self, args):
+ self.args = args
+ self.stack = Node("root")
+
+ if self.args.format == "html" and \
+ not os.path.isfile(self.args.template):
+ print("Flame Graph template {} does not exist. Please install "
+ "the js-d3-flame-graph (RPM) or libjs-d3-flame-graph (deb) "
+ "package, specify an existing flame graph template "
+ "(--template PATH) or another output format "
+ "(--format FORMAT).".format(self.args.template),
+ file=sys.stderr)
+ sys.exit(1)
+
+ def find_or_create_node(self, node, name, dso):
+ libtype = "kernel" if dso == "[kernel.kallsyms]" else ""
+ if name is None:
+ name = "[unknown]"
+
+ for child in node.children:
+ if child.name == name and child.libtype == libtype:
+ return child
+
+ child = Node(name, libtype)
+ node.children.append(child)
+ return child
+
+ def process_event(self, event):
+ node = self.find_or_create_node(self.stack, event["comm"], None)
+ if "callchain" in event:
+ for entry in reversed(event['callchain']):
+ node = self.find_or_create_node(
+ node, entry.get("sym", {}).get("name"), event.get("dso"))
+ else:
+ node = self.find_or_create_node(
+ node, entry.get("symbol"), event.get("dso"))
+ node.value += 1
+
+ def trace_end(self):
+ json_str = json.dumps(self.stack, default=lambda x: x.toJSON())
+
+ if self.args.format == "html":
+ try:
+ with io.open(self.args.template, encoding="utf-8") as f:
+ output_str = f.read().replace("/** @flamegraph_json **/",
+ json_str)
+ except IOError as e:
+ print("Error reading template file: {}".format(e), file=sys.stderr)
+ sys.exit(1)
+ output_fn = self.args.output or "flamegraph.html"
+ else:
+ output_str = json_str
+ output_fn = self.args.output or "stacks.json"
+
+ if output_fn == "-":
+ with io.open(sys.stdout.fileno(), "w", encoding="utf-8", closefd=False) as out:
+ out.write(output_str)
+ else:
+ print("dumping data to {}".format(output_fn))
+ try:
+ with io.open(output_fn, "w", encoding="utf-8") as out:
+ out.write(output_str)
+ except IOError as e:
+ print("Error writing output file: {}".format(e), file=sys.stderr)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Create flame graphs.")
+ parser.add_argument("-f", "--format",
+ default="html", choices=["json", "html"],
+ help="output file format")
+ parser.add_argument("-o", "--output",
+ help="output file name")
+ parser.add_argument("--template",
+ default="/usr/share/d3-flame-graph/d3-flamegraph-base.html",
+ help="path to flamegraph HTML template")
+ parser.add_argument("-i", "--input",
+ help=argparse.SUPPRESS)
+
+ args = parser.parse_args()
+ cli = FlameGraphCLI(args)
+
+ process_event = cli.process_event
+ trace_end = cli.trace_end
diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py
index 0c4841acf75d..7e884d46f920 100644
--- a/tools/perf/scripts/python/futex-contention.py
+++ b/tools/perf/scripts/python/futex-contention.py
@@ -12,41 +12,46 @@
from __future__ import print_function
-import os, sys
-sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+import os
+import sys
+sys.path.append(os.environ['PERF_EXEC_PATH'] +
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from Util import *
process_names = {}
thread_thislock = {}
thread_blocktime = {}
-lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
-process_names = {} # long-lived pid-to-execname mapping
+lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
+process_names = {} # long-lived pid-to-execname mapping
+
def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
- nr, uaddr, op, val, utime, uaddr2, val3):
- cmd = op & FUTEX_CMD_MASK
- if cmd != FUTEX_WAIT:
- return # we don't care about originators of WAKE events
+ nr, uaddr, op, val, utime, uaddr2, val3):
+ cmd = op & FUTEX_CMD_MASK
+ if cmd != FUTEX_WAIT:
+ return # we don't care about originators of WAKE events
+
+ process_names[tid] = comm
+ thread_thislock[tid] = uaddr
+ thread_blocktime[tid] = nsecs(s, ns)
- process_names[tid] = comm
- thread_thislock[tid] = uaddr
- thread_blocktime[tid] = nsecs(s, ns)
def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
- nr, ret):
- if tid in thread_blocktime:
- elapsed = nsecs(s, ns) - thread_blocktime[tid]
- add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
- del thread_blocktime[tid]
- del thread_thislock[tid]
+ nr, ret):
+ if tid in thread_blocktime:
+ elapsed = nsecs(s, ns) - thread_blocktime[tid]
+ add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
+ del thread_blocktime[tid]
+ del thread_thislock[tid]
+
def trace_begin():
- print("Press control+C to stop and show the summary")
+ print("Press control+C to stop and show the summary")
-def trace_end():
- for (tid, lock) in lock_waits:
- min, max, avg, count = lock_waits[tid, lock]
- print("%s[%d] lock %x contended %d times, %d avg ns" %
- (process_names[tid], tid, lock, count, avg))
+def trace_end():
+ for (tid, lock) in lock_waits:
+ min, max, avg, count = lock_waits[tid, lock]
+ print("%s[%d] lock %x contended %d times, %d avg ns [max: %d ns, min %d ns]" %
+ (process_names[tid], tid, lock, count, avg, max, min))
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index b3d1bf13ca07..4d15bf6041fb 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -56,6 +56,12 @@ perf-y += mem2node.o
perf-y += maps.o
perf-y += time-utils-test.o
perf-y += genelf.o
+perf-y += api-io.o
+perf-y += demangle-java-test.o
+perf-y += pfm.o
+perf-y += parse-metric.o
+perf-y += pe-file-parsing.o
+perf-y += expand-cgroup.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c
new file mode 100644
index 000000000000..2ada86ad6084
--- /dev/null
+++ b/tools/perf/tests/api-io.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "tests.h"
+#include <api/io.h>
+#include <linux/kernel.h>
+
+#define TEMPL "/tmp/perf-test-XXXXXX"
+
+#define EXPECT_EQUAL(val, expected) \
+do { \
+ if (val != expected) { \
+ pr_debug("%s:%d: %d != %d\n", \
+ __FILE__, __LINE__, val, expected); \
+ ret = -1; \
+ } \
+} while (0)
+
+#define EXPECT_EQUAL64(val, expected) \
+do { \
+ if (val != expected) { \
+ pr_debug("%s:%d: %lld != %lld\n", \
+ __FILE__, __LINE__, val, expected); \
+ ret = -1; \
+ } \
+} while (0)
+
+static int make_test_file(char path[PATH_MAX], const char *contents)
+{
+ ssize_t contents_len = strlen(contents);
+ int fd;
+
+ strcpy(path, TEMPL);
+ fd = mkstemp(path);
+ if (fd < 0) {
+ pr_debug("mkstemp failed");
+ return -1;
+ }
+ if (write(fd, contents, contents_len) < contents_len) {
+ pr_debug("short write");
+ close(fd);
+ unlink(path);
+ return -1;
+ }
+ close(fd);
+ return 0;
+}
+
+static int setup_test(char path[PATH_MAX], const char *contents,
+ size_t buf_size, struct io *io)
+{
+ if (make_test_file(path, contents))
+ return -1;
+
+ io->fd = open(path, O_RDONLY);
+ if (io->fd < 0) {
+ pr_debug("Failed to open '%s'\n", path);
+ unlink(path);
+ return -1;
+ }
+ io->buf = malloc(buf_size);
+ if (io->buf == NULL) {
+ pr_debug("Failed to allocate memory");
+ close(io->fd);
+ unlink(path);
+ return -1;
+ }
+ io__init(io, io->fd, io->buf, buf_size);
+ return 0;
+}
+
+static void cleanup_test(char path[PATH_MAX], struct io *io)
+{
+ free(io->buf);
+ close(io->fd);
+ unlink(path);
+}
+
+static int do_test_get_char(const char *test_string, size_t buf_size)
+{
+ char path[PATH_MAX];
+ struct io io;
+ int ch, ret = 0;
+ size_t i;
+
+ if (setup_test(path, test_string, buf_size, &io))
+ return -1;
+
+ for (i = 0; i < strlen(test_string); i++) {
+ ch = io__get_char(&io);
+
+ EXPECT_EQUAL(ch, test_string[i]);
+ EXPECT_EQUAL(io.eof, false);
+ }
+ ch = io__get_char(&io);
+ EXPECT_EQUAL(ch, -1);
+ EXPECT_EQUAL(io.eof, true);
+
+ cleanup_test(path, &io);
+ return ret;
+}
+
+static int test_get_char(void)
+{
+ int i, ret = 0;
+ size_t j;
+
+ static const char *const test_strings[] = {
+ "12345678abcdef90",
+ "a\nb\nc\nd\n",
+ "\a\b\t\v\f\r",
+ };
+ for (i = 0; i <= 10; i++) {
+ for (j = 0; j < ARRAY_SIZE(test_strings); j++) {
+ if (do_test_get_char(test_strings[j], 1 << i))
+ ret = -1;
+ }
+ }
+ return ret;
+}
+
+static int do_test_get_hex(const char *test_string,
+ __u64 val1, int ch1,
+ __u64 val2, int ch2,
+ __u64 val3, int ch3,
+ bool end_eof)
+{
+ char path[PATH_MAX];
+ struct io io;
+ int ch, ret = 0;
+ __u64 hex;
+
+ if (setup_test(path, test_string, 4, &io))
+ return -1;
+
+ ch = io__get_hex(&io, &hex);
+ EXPECT_EQUAL64(hex, val1);
+ EXPECT_EQUAL(ch, ch1);
+
+ ch = io__get_hex(&io, &hex);
+ EXPECT_EQUAL64(hex, val2);
+ EXPECT_EQUAL(ch, ch2);
+
+ ch = io__get_hex(&io, &hex);
+ EXPECT_EQUAL64(hex, val3);
+ EXPECT_EQUAL(ch, ch3);
+
+ EXPECT_EQUAL(io.eof, end_eof);
+
+ cleanup_test(path, &io);
+ return ret;
+}
+
+static int test_get_hex(void)
+{
+ int ret = 0;
+
+ if (do_test_get_hex("12345678abcdef90",
+ 0x12345678abcdef90, -1,
+ 0, -1,
+ 0, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_hex("1\n2\n3\n",
+ 1, '\n',
+ 2, '\n',
+ 3, '\n',
+ false))
+ ret = -1;
+
+ if (do_test_get_hex("12345678ABCDEF90;a;b",
+ 0x12345678abcdef90, ';',
+ 0xa, ';',
+ 0xb, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_hex("0x1x2x",
+ 0, 'x',
+ 1, 'x',
+ 2, 'x',
+ false))
+ ret = -1;
+
+ if (do_test_get_hex("x1x",
+ 0, -2,
+ 1, 'x',
+ 0, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_hex("10000000000000000000000000000abcdefgh99i",
+ 0xabcdef, 'g',
+ 0, -2,
+ 0x99, 'i',
+ false))
+ ret = -1;
+
+ return ret;
+}
+
+static int do_test_get_dec(const char *test_string,
+ __u64 val1, int ch1,
+ __u64 val2, int ch2,
+ __u64 val3, int ch3,
+ bool end_eof)
+{
+ char path[PATH_MAX];
+ struct io io;
+ int ch, ret = 0;
+ __u64 dec;
+
+ if (setup_test(path, test_string, 4, &io))
+ return -1;
+
+ ch = io__get_dec(&io, &dec);
+ EXPECT_EQUAL64(dec, val1);
+ EXPECT_EQUAL(ch, ch1);
+
+ ch = io__get_dec(&io, &dec);
+ EXPECT_EQUAL64(dec, val2);
+ EXPECT_EQUAL(ch, ch2);
+
+ ch = io__get_dec(&io, &dec);
+ EXPECT_EQUAL64(dec, val3);
+ EXPECT_EQUAL(ch, ch3);
+
+ EXPECT_EQUAL(io.eof, end_eof);
+
+ cleanup_test(path, &io);
+ return ret;
+}
+
+static int test_get_dec(void)
+{
+ int ret = 0;
+
+ if (do_test_get_dec("12345678abcdef90",
+ 12345678, 'a',
+ 0, -2,
+ 0, -2,
+ false))
+ ret = -1;
+
+ if (do_test_get_dec("1\n2\n3\n",
+ 1, '\n',
+ 2, '\n',
+ 3, '\n',
+ false))
+ ret = -1;
+
+ if (do_test_get_dec("12345678;1;2",
+ 12345678, ';',
+ 1, ';',
+ 2, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_dec("0x1x2x",
+ 0, 'x',
+ 1, 'x',
+ 2, 'x',
+ false))
+ ret = -1;
+
+ if (do_test_get_dec("x1x",
+ 0, -2,
+ 1, 'x',
+ 0, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_dec("10000000000000000000000000000000000000000000000000000000000123456789ab99c",
+ 123456789, 'a',
+ 0, -2,
+ 99, 'c',
+ false))
+ ret = -1;
+
+ return ret;
+}
+
+int test__api_io(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ int ret = 0;
+
+ if (test_get_char())
+ ret = TEST_FAIL;
+ if (test_get_hex())
+ ret = TEST_FAIL;
+ if (test_get_dec())
+ ret = TEST_FAIL;
+ return ret;
+}
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index a9599ab8c471..ec972e0892ab 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -30,9 +30,9 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
-#include "../perf-sys.h"
#include <subcmd/exec-cmd.h>
#include "event.h"
+#include "util.h"
#include "tests.h"
#define ENV "PERF_TEST_ATTR"
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
index 430024f618f1..a36f49fb4dbe 100644
--- a/tools/perf/tests/attr/README
+++ b/tools/perf/tests/attr/README
@@ -49,10 +49,12 @@ Following tests are defined (with perf commands):
perf record --call-graph fp kill (test-record-graph-fp)
perf record --group -e cycles,instructions kill (test-record-group)
perf record -e '{cycles,instructions}' kill (test-record-group1)
+ perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2)
perf record -D kill (test-record-no-delay)
perf record -i kill (test-record-no-inherit)
perf record -n kill (test-record-no-samples)
perf record -c 100 -P kill (test-record-period)
+ perf record -c 1 --pfm-events=cycles:period=2 (test-record-pfm-period)
perf record -R kill (test-record-raw)
perf stat -e cycles kill (test-stat-basic)
perf stat kill (test-stat-default)
diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy
new file mode 100644
index 000000000000..eba723cc0d38
--- /dev/null
+++ b/tools/perf/tests/attr/system-wide-dummy
@@ -0,0 +1,50 @@
+# Event added by system-wide or CPU perf-record to handle the race of
+# processes starting while /proc is processed.
+[event]
+fd=1
+group_fd=-1
+cpu=*
+pid=-1
+flags=8
+type=1
+size=120
+config=9
+sample_period=4000
+sample_type=455
+read_format=4
+# Event will be enabled right away.
+disabled=0
+inherit=1
+pinned=0
+exclusive=0
+exclude_user=0
+exclude_kernel=0
+exclude_hv=0
+exclude_idle=0
+mmap=1
+comm=1
+freq=1
+inherit_stat=0
+enable_on_exec=0
+task=1
+watermark=0
+precise_ip=0
+mmap_data=0
+sample_id_all=1
+exclude_host=0
+exclude_guest=0
+exclude_callchain_kernel=0
+exclude_callchain_user=0
+mmap2=1
+comm_exec=1
+context_switch=0
+write_backward=0
+namespaces=0
+use_clockid=0
+wakeup_events=0
+bp_type=0
+config1=0
+config2=0
+branch_sample_type=0
+sample_regs_user=0
+sample_stack_user=0
diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/attr/test-record-C0
index 93818054ae20..317730b906dd 100644
--- a/tools/perf/tests/attr/test-record-C0
+++ b/tools/perf/tests/attr/test-record-C0
@@ -9,6 +9,14 @@ cpu=0
# no enable on exec for CPU attached
enable_on_exec=0
-# PERF_SAMPLE_IP | PERF_SAMPLE_TID PERF_SAMPLE_TIME | # PERF_SAMPLE_PERIOD
+# PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
+# PERF_SAMPLE_ID | PERF_SAMPLE_PERIOD
# + PERF_SAMPLE_CPU added by -C 0
-sample_type=391
+sample_type=455
+
+# Dummy event handles mmaps, comm and task.
+mmap=0
+comm=0
+task=0
+
+[event:system-wide-dummy]
diff --git a/tools/perf/tests/attr/test-record-group2 b/tools/perf/tests/attr/test-record-group2
new file mode 100644
index 000000000000..6b9f8d182ce1
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group2
@@ -0,0 +1,29 @@
+[config]
+command = record
+args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1
+ret = 1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+config=0|1
+sample_period=1234000
+sample_type=87
+read_format=12
+inherit=0
+freq=0
+
+[event-2:base-record]
+fd=2
+group_fd=1
+config=0|1
+sample_period=6789000
+sample_type=87
+read_format=12
+disabled=0
+inherit=0
+mmap=0
+comm=0
+freq=0
+enable_on_exec=0
+task=0
diff --git a/tools/perf/tests/attr/test-record-pfm-period b/tools/perf/tests/attr/test-record-pfm-period
new file mode 100644
index 000000000000..368f5b814094
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-pfm-period
@@ -0,0 +1,9 @@
+[config]
+command = record
+args = --no-bpf-event -c 10000 --pfm-events=cycles:period=77777 kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_period=77777
+sample_type=7
+freq=0
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index da8ec1e8e064..cc9fbcedb364 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -45,10 +45,13 @@ volatile long the_var;
#if defined (__x86_64__)
extern void __test_function(volatile long *ptr);
asm (
+ ".pushsection .text;"
".globl __test_function\n"
+ ".type __test_function, @function;"
"__test_function:\n"
"incq (%rdi)\n"
- "ret\n");
+ "ret\n"
+ ".popsection\n");
#else
static void __test_function(volatile long *ptr)
{
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 5d20bf8397f0..cd77e334e577 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -197,7 +197,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
perf_mmap__read_done(&md->core);
}
- if (count != expect) {
+ if (count != expect * evlist->core.nr_entries) {
pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
goto out_delete_evlist;
}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index b6322eb0f423..132bdb3e6c31 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -75,6 +75,13 @@ static struct test generic_tests[] = {
{
.desc = "PMU events",
.func = test__pmu_events,
+ .subtest = {
+ .skip_if_fail = false,
+ .get_nr = test__pmu_events_subtest_get_nr,
+ .get_desc = test__pmu_events_subtest_get_desc,
+ .skip_reason = test__pmu_events_subtest_skip_reason,
+ },
+
},
{
.desc = "DSO data read",
@@ -310,10 +317,39 @@ static struct test generic_tests[] = {
.func = test__jit_write_elf,
},
{
+ .desc = "Test libpfm4 support",
+ .func = test__pfm,
+ .subtest = {
+ .skip_if_fail = true,
+ .get_nr = test__pfm_subtest_get_nr,
+ .get_desc = test__pfm_subtest_get_desc,
+ }
+ },
+ {
+ .desc = "Test api io",
+ .func = test__api_io,
+ },
+ {
.desc = "maps__merge_in",
.func = test__maps__merge_in,
},
{
+ .desc = "Demangle Java",
+ .func = test__demangle_java,
+ },
+ {
+ .desc = "Parse and process metrics",
+ .func = test__parse_metric,
+ },
+ {
+ .desc = "PE file support",
+ .func = test__pe_file_parsing,
+ },
+ {
+ .desc = "Event expansion for cgroups",
+ .func = test__expand_cgroup_events,
+ },
+ {
.func = NULL,
},
};
@@ -323,7 +359,7 @@ static struct test *tests[] = {
arch_tests,
};
-static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[])
+static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[])
{
int i;
@@ -340,7 +376,7 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char
continue;
}
- if (strcasestr(test->desc, argv[i]))
+ if (strcasestr(desc, argv[i]))
return true;
}
@@ -425,8 +461,15 @@ static int test_and_print(struct test *t, bool force_skip, int subtest)
case TEST_OK:
pr_info(" Ok\n");
break;
- case TEST_SKIP:
- color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+ case TEST_SKIP: {
+ const char *skip_reason = NULL;
+ if (t->subtest.skip_reason)
+ skip_reason = t->subtest.skip_reason(subtest);
+ if (skip_reason)
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", skip_reason);
+ else
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+ }
break;
case TEST_FAIL:
default:
@@ -562,7 +605,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width)
.priv = &st,
};
- if (!perf_test__matches(&test, curr, argc, argv))
+ if (!perf_test__matches(test.desc, curr, argc, argv))
continue;
st.file = ent->d_name;
@@ -590,9 +633,25 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
for_each_test(j, t) {
int curr = i++, err;
+ int subi;
- if (!perf_test__matches(t, curr, argc, argv))
- continue;
+ if (!perf_test__matches(t->desc, curr, argc, argv)) {
+ bool skip = true;
+ int subn;
+
+ if (!t->subtest.get_nr)
+ continue;
+
+ subn = t->subtest.get_nr();
+
+ for (subi = 0; subi < subn; subi++) {
+ if (perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+ skip = false;
+ }
+
+ if (skip)
+ continue;
+ }
if (t->is_supported && !t->is_supported()) {
pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
@@ -620,7 +679,6 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
*/
int subw = width > 2 ? width - 2 : width;
bool skip = false;
- int subi;
if (subn <= 0) {
color_fprintf(stderr, PERF_COLOR_YELLOW,
@@ -637,6 +695,9 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
}
for (subi = 0; subi < subn; subi++) {
+ if (!perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+ continue;
+
pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
t->subtest.get_desc(subi));
err = test_and_print(t, skip, subi);
@@ -670,7 +731,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
.desc = shell_test__description(bf, sizeof(bf), path, ent->d_name),
};
- if (!perf_test__matches(&t, curr, argc, argv))
+ if (!perf_test__matches(t.desc, curr, argc, argv))
continue;
pr_info("%2d: %s\n", i, t.desc);
@@ -689,7 +750,7 @@ static int perf_test__list(int argc, const char **argv)
for_each_test(j, t) {
int curr = i++;
- if (!perf_test__matches(t, curr, argc, argv) ||
+ if (!perf_test__matches(t->desc, curr, argc, argv) ||
(t->is_supported && !t->is_supported()))
continue;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 6fe221d31f07..035c9123549a 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -678,7 +678,7 @@ static int do_test_code_reading(bool try_kcore)
if (verbose > 0) {
char errbuf[512];
- perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+ evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
}
diff --git a/tools/perf/tests/demangle-java-test.c b/tools/perf/tests/demangle-java-test.c
new file mode 100644
index 000000000000..8f3b90832fb0
--- /dev/null
+++ b/tools/perf/tests/demangle-java-test.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "tests.h"
+#include "session.h"
+#include "debug.h"
+#include "demangle-java.h"
+
+int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret = TEST_OK;
+ char *buf = NULL;
+ size_t i;
+
+ struct {
+ const char *mangled, *demangled;
+ } test_cases[] = {
+ { "Ljava/lang/StringLatin1;equals([B[B)Z",
+ "boolean java.lang.StringLatin1.equals(byte[], byte[])" },
+ { "Ljava/util/zip/ZipUtils;CENSIZ([BI)J",
+ "long java.util.zip.ZipUtils.CENSIZ(byte[], int)" },
+ { "Ljava/util/regex/Pattern$BmpCharProperty;match(Ljava/util/regex/Matcher;ILjava/lang/CharSequence;)Z",
+ "boolean java.util.regex.Pattern$BmpCharProperty.match(java.util.regex.Matcher, int, java.lang.CharSequence)" },
+ { "Ljava/lang/AbstractStringBuilder;appendChars(Ljava/lang/String;II)V",
+ "void java.lang.AbstractStringBuilder.appendChars(java.lang.String, int, int)" },
+ { "Ljava/lang/Object;<init>()V",
+ "void java.lang.Object<init>()" },
+ };
+
+ for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
+ buf = java_demangle_sym(test_cases[i].mangled, 0);
+ if (strcmp(buf, test_cases[i].demangled)) {
+ pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled,
+ buf, test_cases[i].demangled);
+ ret = TEST_FAIL;
+ }
+ free(buf);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 779ce280a0e9..2491d167bf76 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -37,6 +37,7 @@ static int init_live_machine(struct machine *machine)
union perf_event event;
pid_t pid = getpid();
+ memset(&event, 0, sizeof(event));
return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
mmap_handler, machine, true);
}
@@ -94,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
return strcmp((const char *) symbol, funcs[idx]);
}
-noinline int test_dwarf_unwind__thread(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread)
{
struct perf_sample sample;
unsigned long cnt = 0;
@@ -125,7 +126,7 @@ noinline int test_dwarf_unwind__thread(struct thread *thread)
static int global_unwind_retval = -INT_MAX;
-noinline int test_dwarf_unwind__compare(void *p1, void *p2)
+__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2)
{
/* Any possible value should be 'thread' */
struct thread *thread = *(struct thread **)p1;
@@ -144,7 +145,7 @@ noinline int test_dwarf_unwind__compare(void *p1, void *p2)
return p1 - p2;
}
-noinline int test_dwarf_unwind__krava_3(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread)
{
struct thread *array[2] = {thread, thread};
void *fp = &bsearch;
@@ -163,12 +164,12 @@ noinline int test_dwarf_unwind__krava_3(struct thread *thread)
return global_unwind_retval;
}
-noinline int test_dwarf_unwind__krava_2(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread)
{
return test_dwarf_unwind__krava_3(thread);
}
-noinline int test_dwarf_unwind__krava_1(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread)
{
return test_dwarf_unwind__krava_2(thread);
}
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
index 1e8a9f5c356d..db68894a6f40 100644
--- a/tools/perf/tests/event-times.c
+++ b/tools/perf/tests/event-times.c
@@ -72,7 +72,7 @@ static int attach__current_disabled(struct evlist *evlist)
evsel->core.attr.disabled = 1;
- err = perf_evsel__open_per_thread(evsel, threads);
+ err = evsel__open_per_thread(evsel, threads);
if (err) {
pr_debug("Failed to open event cpu-clock:u\n");
return err;
@@ -96,7 +96,7 @@ static int attach__current_enabled(struct evlist *evlist)
return -1;
}
- err = perf_evsel__open_per_thread(evsel, threads);
+ err = evsel__open_per_thread(evsel, threads);
perf_thread_map__put(threads);
return err == 0 ? TEST_OK : TEST_FAIL;
@@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist)
evsel->core.attr.disabled = 1;
- err = perf_evsel__open_per_cpu(evsel, cpus, -1);
+ err = evsel__open_per_cpu(evsel, cpus, -1);
if (err) {
if (err == -EACCES)
return TEST_SKIP;
@@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist)
return -1;
}
- err = perf_evsel__open_per_cpu(evsel, cpus, -1);
+ err = evsel__open_per_cpu(evsel, cpus, -1);
if (err == -EACCES)
return TEST_SKIP;
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index c727379cf20e..bdcf032f8516 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -109,7 +109,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu
TEST_ASSERT_VAL("failed to synthesize attr update scale",
!perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale));
- tmp.name = perf_evsel__name(evsel);
+ tmp.name = evsel__name(evsel);
TEST_ASSERT_VAL("failed to synthesize attr update name",
!perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name));
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 956205bf9326..f7f3e5b4c180 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -20,12 +20,11 @@ static int perf_evsel__roundtrip_cache_name_test(void)
for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
/* skip invalid cache type */
- if (!perf_evsel__is_cache_op_valid(type, op))
+ if (!evsel__is_cache_op_valid(type, op))
continue;
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- __perf_evsel__hw_cache_type_op_res_name(type, op, i,
- name, sizeof(name));
+ __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
err = parse_events(evlist, name, NULL);
if (err)
ret = err;
@@ -39,23 +38,22 @@ static int perf_evsel__roundtrip_cache_name_test(void)
for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
/* skip invalid cache type */
- if (!perf_evsel__is_cache_op_valid(type, op))
+ if (!evsel__is_cache_op_valid(type, op))
continue;
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- __perf_evsel__hw_cache_type_op_res_name(type, op, i,
- name, sizeof(name));
+ __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
if (evsel->idx != idx)
continue;
++idx;
- if (strcmp(perf_evsel__name(evsel), name)) {
- pr_debug("%s != %s\n", perf_evsel__name(evsel), name);
+ if (strcmp(evsel__name(evsel), name)) {
+ pr_debug("%s != %s\n", evsel__name(evsel), name);
ret = -1;
}
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
}
}
}
@@ -84,9 +82,9 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
err = 0;
evlist__for_each_entry(evlist, evsel) {
- if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
+ if (strcmp(evsel__name(evsel), names[evsel->idx])) {
--err;
- pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
+ pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx]);
}
}
@@ -102,12 +100,11 @@ int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int
{
int err = 0, ret = 0;
- err = perf_evsel__name_array_test(perf_evsel__hw_names);
+ err = perf_evsel__name_array_test(evsel__hw_names);
if (err)
ret = err;
- err = __perf_evsel__name_array_test(perf_evsel__sw_names,
- PERF_COUNT_SW_DUMMY + 1);
+ err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1);
if (err)
ret = err;
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 261e6eaaee99..0e224a0a55d9 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -8,7 +8,7 @@
static int perf_evsel__test_field(struct evsel *evsel, const char *name,
int size, bool should_be_signed)
{
- struct tep_format_field *field = perf_evsel__field(evsel, name);
+ struct tep_format_field *field = evsel__field(evsel, name);
int is_signed;
int ret = 0;
@@ -35,11 +35,11 @@ static int perf_evsel__test_field(struct evsel *evsel, const char *name,
int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused)
{
- struct evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
+ struct evsel *evsel = evsel__newtp("sched", "sched_switch");
int ret = 0;
if (IS_ERR(evsel)) {
- pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+ pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel));
return -1;
}
@@ -66,10 +66,10 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
evsel__delete(evsel);
- evsel = perf_evsel__newtp("sched", "sched_wakeup");
+ evsel = evsel__newtp("sched", "sched_wakeup");
if (IS_ERR(evsel)) {
- pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+ pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel));
return -1;
}
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
new file mode 100644
index 000000000000..d5771e4d094f
--- /dev/null
+++ b/tools/perf/tests/expand-cgroup.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests.h"
+#include "debug.h"
+#include "evlist.h"
+#include "cgroup.h"
+#include "rblist.h"
+#include "metricgroup.h"
+#include "parse-events.h"
+#include "pmu-events/pmu-events.h"
+#include "pfm.h"
+#include <subcmd/parse-options.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static int test_expand_events(struct evlist *evlist,
+ struct rblist *metric_events)
+{
+ int i, ret = TEST_FAIL;
+ int nr_events;
+ bool was_group_event;
+ int nr_members; /* for the first evsel only */
+ const char cgrp_str[] = "A,B,C";
+ const char *cgrp_name[] = { "A", "B", "C" };
+ int nr_cgrps = ARRAY_SIZE(cgrp_name);
+ char **ev_name;
+ struct evsel *evsel;
+
+ TEST_ASSERT_VAL("evlist is empty", !perf_evlist__empty(evlist));
+
+ nr_events = evlist->core.nr_entries;
+ ev_name = calloc(nr_events, sizeof(*ev_name));
+ if (ev_name == NULL) {
+ pr_debug("memory allocation failure\n");
+ return TEST_FAIL;
+ }
+ i = 0;
+ evlist__for_each_entry(evlist, evsel) {
+ ev_name[i] = strdup(evsel->name);
+ if (ev_name[i] == NULL) {
+ pr_debug("memory allocation failure\n");
+ goto out;
+ }
+ i++;
+ }
+ /* remember grouping info */
+ was_group_event = evsel__is_group_event(evlist__first(evlist));
+ nr_members = evlist__first(evlist)->core.nr_members;
+
+ ret = evlist__expand_cgroup(evlist, cgrp_str, metric_events, false);
+ if (ret < 0) {
+ pr_debug("failed to expand events for cgroups\n");
+ goto out;
+ }
+
+ ret = TEST_FAIL;
+ if (evlist->core.nr_entries != nr_events * nr_cgrps) {
+ pr_debug("event count doesn't match\n");
+ goto out;
+ }
+
+ i = 0;
+ evlist__for_each_entry(evlist, evsel) {
+ if (strcmp(evsel->name, ev_name[i % nr_events])) {
+ pr_debug("event name doesn't match:\n");
+ pr_debug(" evsel[%d]: %s\n expected: %s\n",
+ i, evsel->name, ev_name[i % nr_events]);
+ goto out;
+ }
+ if (strcmp(evsel->cgrp->name, cgrp_name[i / nr_events])) {
+ pr_debug("cgroup name doesn't match:\n");
+ pr_debug(" evsel[%d]: %s\n expected: %s\n",
+ i, evsel->cgrp->name, cgrp_name[i / nr_events]);
+ goto out;
+ }
+
+ if ((i % nr_events) == 0) {
+ if (evsel__is_group_event(evsel) != was_group_event) {
+ pr_debug("event group doesn't match: got %s, expect %s\n",
+ evsel__is_group_event(evsel) ? "true" : "false",
+ was_group_event ? "true" : "false");
+ goto out;
+ }
+ if (evsel->core.nr_members != nr_members) {
+ pr_debug("event group member doesn't match: %d vs %d\n",
+ evsel->core.nr_members, nr_members);
+ goto out;
+ }
+ }
+ i++;
+ }
+ ret = TEST_OK;
+
+out: for (i = 0; i < nr_events; i++)
+ free(ev_name[i]);
+ free(ev_name);
+ return ret;
+}
+
+static int expand_default_events(void)
+{
+ int ret;
+ struct evlist *evlist;
+ struct rblist metric_events;
+
+ evlist = perf_evlist__new_default();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ rblist__init(&metric_events);
+ ret = test_expand_events(evlist, &metric_events);
+ evlist__delete(evlist);
+ return ret;
+}
+
+static int expand_group_events(void)
+{
+ int ret;
+ struct evlist *evlist;
+ struct rblist metric_events;
+ struct parse_events_error err;
+ const char event_str[] = "{cycles,instructions}";
+
+ symbol_conf.event_group = true;
+
+ evlist = evlist__new();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ ret = parse_events(evlist, event_str, &err);
+ if (ret < 0) {
+ pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+ event_str, ret, err.str);
+ parse_events_print_error(&err, event_str);
+ goto out;
+ }
+
+ rblist__init(&metric_events);
+ ret = test_expand_events(evlist, &metric_events);
+out:
+ evlist__delete(evlist);
+ return ret;
+}
+
+static int expand_libpfm_events(void)
+{
+ int ret;
+ struct evlist *evlist;
+ struct rblist metric_events;
+ const char event_str[] = "UNHALTED_CORE_CYCLES";
+ struct option opt = {
+ .value = &evlist,
+ };
+
+ symbol_conf.event_group = true;
+
+ evlist = evlist__new();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ ret = parse_libpfm_events_option(&opt, event_str, 0);
+ if (ret < 0) {
+ pr_debug("failed to parse libpfm event '%s', err %d\n",
+ event_str, ret);
+ goto out;
+ }
+ if (perf_evlist__empty(evlist)) {
+ pr_debug("libpfm was not enabled\n");
+ goto out;
+ }
+
+ rblist__init(&metric_events);
+ ret = test_expand_events(evlist, &metric_events);
+out:
+ evlist__delete(evlist);
+ return ret;
+}
+
+static int expand_metric_events(void)
+{
+ int ret;
+ struct evlist *evlist;
+ struct rblist metric_events;
+ const char metric_str[] = "CPI";
+
+ struct pmu_event pme_test[] = {
+ {
+ .metric_expr = "instructions / cycles",
+ .metric_name = "IPC",
+ },
+ {
+ .metric_expr = "1 / IPC",
+ .metric_name = "CPI",
+ },
+ {
+ .metric_expr = NULL,
+ .metric_name = NULL,
+ },
+ };
+ struct pmu_events_map ev_map = {
+ .cpuid = "test",
+ .version = "1",
+ .type = "core",
+ .table = pme_test,
+ };
+
+ evlist = evlist__new();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ rblist__init(&metric_events);
+ ret = metricgroup__parse_groups_test(evlist, &ev_map, metric_str,
+ false, false, &metric_events);
+ if (ret < 0) {
+ pr_debug("failed to parse '%s' metric\n", metric_str);
+ goto out;
+ }
+
+ ret = test_expand_events(evlist, &metric_events);
+
+out:
+ metricgroup__rblist_exit(&metric_events);
+ evlist__delete(evlist);
+ return ret;
+}
+
+int test__expand_cgroup_events(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ int ret;
+
+ ret = expand_default_events();
+ TEST_ASSERT_EQUAL("failed to expand default events", ret, 0);
+
+ ret = expand_group_events();
+ TEST_ASSERT_EQUAL("failed to expand event group", ret, 0);
+
+ ret = expand_libpfm_events();
+ TEST_ASSERT_EQUAL("failed to expand event group", ret, 0);
+
+ ret = expand_metric_events();
+ TEST_ASSERT_EQUAL("failed to expand metric events", ret, 0);
+
+ return ret;
+}
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 28313e59d6f6..4d01051951cd 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -6,11 +6,11 @@
#include <string.h>
#include <linux/zalloc.h>
-static int test(struct parse_ctx *ctx, const char *e, double val2)
+static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
{
double val;
- if (expr__parse(&val, ctx, e))
+ if (expr__parse(&val, ctx, e, 1))
TEST_ASSERT_VAL("parse test failed", 0);
TEST_ASSERT_VAL("unexpected value", val == val2);
return 0;
@@ -18,16 +18,15 @@ static int test(struct parse_ctx *ctx, const char *e, double val2)
int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
{
+ struct expr_id_data *val_ptr;
const char *p;
- const char **other;
double val;
- int i, ret;
- struct parse_ctx ctx;
- int num_other;
+ int ret;
+ struct expr_parse_ctx ctx;
expr__ctx_init(&ctx);
- expr__add_id(&ctx, "FOO", 1);
- expr__add_id(&ctx, "BAR", 2);
+ expr__add_id_val(&ctx, strdup("FOO"), 1);
+ expr__add_id_val(&ctx, strdup("BAR"), 2);
ret = test(&ctx, "1+1", 2);
ret |= test(&ctx, "FOO+BAR", 3);
@@ -39,29 +38,51 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
ret |= test(&ctx, "min(1,2) + 1", 2);
ret |= test(&ctx, "max(1,2) + 1", 3);
ret |= test(&ctx, "1+1 if 3*4 else 0", 2);
+ ret |= test(&ctx, "1.1 + 2.1", 3.2);
+ ret |= test(&ctx, ".1 + 2.", 2.1);
+ ret |= test(&ctx, "d_ratio(1, 2)", 0.5);
+ ret |= test(&ctx, "d_ratio(2.5, 0)", 0);
+ ret |= test(&ctx, "1.1 < 2.2", 1);
+ ret |= test(&ctx, "2.2 > 1.1", 1);
+ ret |= test(&ctx, "1.1 < 1.1", 0);
+ ret |= test(&ctx, "2.2 > 2.2", 0);
+ ret |= test(&ctx, "2.2 < 1.1", 0);
+ ret |= test(&ctx, "1.1 > 2.2", 0);
if (ret)
return ret;
p = "FOO/0";
- ret = expr__parse(&val, &ctx, p);
+ ret = expr__parse(&val, &ctx, p, 1);
TEST_ASSERT_VAL("division by zero", ret == -1);
p = "BAR/";
- ret = expr__parse(&val, &ctx, p);
+ ret = expr__parse(&val, &ctx, p, 1);
TEST_ASSERT_VAL("missing operand", ret == -1);
+ expr__ctx_clear(&ctx);
TEST_ASSERT_VAL("find other",
- expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
- TEST_ASSERT_VAL("find other", num_other == 3);
- TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR"));
- TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ"));
- TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO"));
- TEST_ASSERT_VAL("find other", other[3] == NULL);
+ expr__find_other("FOO + BAR + BAZ + BOZO", "FOO",
+ &ctx, 1) == 0);
+ TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 3);
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAR",
+ (void **)&val_ptr));
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAZ",
+ (void **)&val_ptr));
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BOZO",
+ (void **)&val_ptr));
- for (i = 0; i < num_other; i++)
- zfree(&other[i]);
- free((void *)other);
+ expr__ctx_clear(&ctx);
+ TEST_ASSERT_VAL("find other",
+ expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@",
+ NULL, &ctx, 3) == 0);
+ TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 2);
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT1,param=3/",
+ (void **)&val_ptr));
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT2,param=3/",
+ (void **)&val_ptr));
+
+ expr__ctx_clear(&ctx);
return 0;
}
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index c7c81c4a5b2b..d9eca8e86a6b 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -12,6 +12,7 @@ static void fdarray__init_revents(struct fdarray *fda, short revents)
for (fd = 0; fd < fda->nr; ++fd) {
fda->entries[fd].fd = fda->nr - fd;
+ fda->entries[fd].events = revents;
fda->entries[fd].revents = revents;
}
}
@@ -29,7 +30,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused)
{
- int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
+ int nr_fds, err = TEST_FAIL;
struct fdarray *fda = fdarray__new(5, 5);
if (fda == NULL) {
@@ -55,7 +56,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
fdarray__init_revents(fda, POLLHUP);
fda->entries[2].revents = POLLIN;
- expected_fd[0] = fda->entries[2].fd;
pr_debug("\nfiltering all but fda->entries[2]:");
fdarray__fprintf_prefix(fda, "before", stderr);
@@ -66,17 +66,9 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
goto out_delete;
}
- if (fda->entries[0].fd != expected_fd[0]) {
- pr_debug("\nfda->entries[0].fd=%d != %d\n",
- fda->entries[0].fd, expected_fd[0]);
- goto out_delete;
- }
-
fdarray__init_revents(fda, POLLHUP);
fda->entries[0].revents = POLLIN;
- expected_fd[0] = fda->entries[0].fd;
fda->entries[3].revents = POLLIN;
- expected_fd[1] = fda->entries[3].fd;
pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
fdarray__fprintf_prefix(fda, "before", stderr);
@@ -88,14 +80,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
goto out_delete;
}
- for (fd = 0; fd < 2; ++fd) {
- if (fda->entries[fd].fd != expected_fd[fd]) {
- pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd,
- fda->entries[fd].fd, expected_fd[fd]);
- goto out_delete;
- }
- }
-
pr_debug("\n");
err = 0;
@@ -128,7 +112,7 @@ int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unu
}
#define FDA_ADD(_idx, _fd, _revents, _nr) \
- if (fdarray__add(fda, _fd, _revents) < 0) { \
+ if (fdarray__add(fda, _fd, _revents, fdarray_flag__default) < 0) { \
pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \
__LINE__,_fd, _revents); \
goto out_delete; \
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 6367c8f6ca22..3f2e1a581247 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -190,7 +190,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec
* function since TEST_ASSERT_VAL() returns in case of failure.
*/
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(hists_to_evsel(hists), NULL);
+ evsel__output_resort(hists_to_evsel(hists), NULL);
if (verbose > 2) {
pr_info("use callchain: %d, cumulate callchain: %d\n",
@@ -280,7 +280,7 @@ static int test1(struct evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = false;
- perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ evsel__reset_sample_bit(evsel, CALLCHAIN);
setup_sorting(NULL);
callchain_register_param(&callchain_param);
@@ -427,7 +427,7 @@ static int test2(struct evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = false;
- perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+ evsel__set_sample_bit(evsel, CALLCHAIN);
setup_sorting(NULL);
callchain_register_param(&callchain_param);
@@ -485,7 +485,7 @@ static int test3(struct evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = true;
- perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ evsel__reset_sample_bit(evsel, CALLCHAIN);
setup_sorting(NULL);
callchain_register_param(&callchain_param);
@@ -669,7 +669,7 @@ static int test4(struct evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = true;
- perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+ evsel__set_sample_bit(evsel, CALLCHAIN);
setup_sorting(NULL);
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 618b51ffcc01..123e07d35b55 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -142,7 +142,7 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu
struct hists *hists = evsel__hists(evsel);
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("Normal histogram\n");
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index 38f804ff6452..8973f35df604 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -155,7 +155,7 @@ static int test1(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -255,7 +255,7 @@ static int test2(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -309,7 +309,7 @@ static int test3(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -387,7 +387,7 @@ static int test4(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -490,7 +490,7 @@ static int test5(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 5d0c3a9c47a1..a90fa043c066 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -84,10 +84,14 @@ make_no_libaudit := NO_LIBAUDIT=1
make_no_libbionic := NO_LIBBIONIC=1
make_no_auxtrace := NO_AUXTRACE=1
make_no_libbpf := NO_LIBBPF=1
+make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1
make_no_libcrypto := NO_LIBCRYPTO=1
make_with_babeltrace:= LIBBABELTRACE=1
make_no_sdt := NO_SDT=1
+make_no_syscall_tbl := NO_SYSCALL_TABLE=1
make_with_clangllvm := LIBCLANGLLVM=1
+make_with_libpfm4 := LIBPFM4=1
+make_with_gtk2 := GTK2=1
make_tags := tags
make_cscope := cscope
make_help := help
@@ -112,7 +116,7 @@ make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1
-make_minimal += NO_LIBCAP=1
+make_minimal += NO_LIBCAP=1 NO_SYSCALL_TABLE=1
# $(run) contains all available tests
run := make_pure
@@ -144,8 +148,14 @@ run += make_no_libaudit
run += make_no_libbionic
run += make_no_auxtrace
run += make_no_libbpf
+run += make_no_libbpf_DEBUG
+run += make_no_libcrypto
+run += make_no_sdt
+run += make_no_syscall_tbl
run += make_with_babeltrace
run += make_with_clangllvm
+run += make_with_libpfm4
+run += make_with_gtk2
run += make_help
run += make_doc
run += make_perf_o
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 5f4c0dbb4715..7b0dbfc0e17d 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -79,14 +79,14 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
char name[64];
snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
- evsels[i] = perf_evsel__newtp("syscalls", name);
+ evsels[i] = evsel__newtp("syscalls", name);
if (IS_ERR(evsels[i])) {
- pr_debug("perf_evsel__new(%s)\n", name);
+ pr_debug("evsel__new(%s)\n", name);
goto out_delete_evlist;
}
evsels[i]->core.attr.wakeup_events = 1;
- perf_evsel__set_sample_id(evsels[i], false);
+ evsel__set_sample_id(evsels[i], false);
evlist__add(evlist, evsels[i]);
@@ -150,7 +150,7 @@ out_init:
if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
pr_debug("expected %d %s events, got %d\n",
expected_nr_events[evsel->idx],
- perf_evsel__name(evsel), nr_events[evsel->idx]);
+ evsel__name(evsel), nr_events[evsel->idx]);
err = -1;
goto out_delete_evlist;
}
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 93c176523e38..71f85e2cc127 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -44,7 +44,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
CPU_ZERO(&cpu_set);
- evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+ evsel = evsel__newtp("syscalls", "sys_enter_openat");
if (IS_ERR(evsel)) {
tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
pr_debug("%s\n", errbuf);
@@ -90,8 +90,8 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
* we use the auto allocation it will allocate just for 1 cpu,
* as we start by cpu 0.
*/
- if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
- pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+ if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
+ pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
goto out_close_fd;
}
@@ -103,21 +103,21 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
if (cpus->map[cpu] >= CPU_SETSIZE)
continue;
- if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
- pr_debug("perf_evsel__read_on_cpu\n");
+ if (evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+ pr_debug("evsel__read_on_cpu\n");
err = -1;
break;
}
expected = nr_openat_calls + cpu;
if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
- pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
+ pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
err = -1;
}
}
- perf_evsel__free_counts(evsel);
+ evsel__free_counts(evsel);
out_close_fd:
perf_evsel__close_fd(&evsel->core);
out_evsel_delete:
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index c6b2d7aab608..1f5f5e79ae25 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -46,9 +46,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
goto out;
}
- evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+ evsel = evsel__newtp("syscalls", "sys_enter_openat");
if (IS_ERR(evsel)) {
- pr_debug("%s: perf_evsel__newtp\n", __func__);
+ pr_debug("%s: evsel__newtp\n", __func__);
goto out_delete_evlist;
}
@@ -60,7 +60,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
goto out_delete_evlist;
}
- perf_evsel__config(evsel, &opts, NULL);
+ evsel__config(evsel, &opts, NULL);
perf_thread_map__set_pid(evlist->core.threads, 0, getpid());
@@ -108,13 +108,13 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
continue;
}
- err = perf_evsel__parse_sample(evsel, event, &sample);
+ err = evsel__parse_sample(evsel, event, &sample);
if (err) {
pr_debug("Can't parse sample, err = %d\n", err);
goto out_delete_evlist;
}
- tp_flags = perf_evsel__intval(evsel, &sample, "flags");
+ tp_flags = evsel__intval(evsel, &sample, "flags");
if (flags != tp_flags) {
pr_debug("%s: Expected flags=%#x, got %#x\n",
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 5ebffae18605..85a8f0fe7aea 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -27,14 +27,14 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m
return -1;
}
- evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+ evsel = evsel__newtp("syscalls", "sys_enter_openat");
if (IS_ERR(evsel)) {
tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
pr_debug("%s\n", errbuf);
goto out_thread_map_delete;
}
- if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+ if (evsel__open_per_thread(evsel, threads) < 0) {
pr_debug("failed to open counter: %s, "
"tweak /proc/sys/kernel/perf_event_paranoid?\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
@@ -46,13 +46,13 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m
close(fd);
}
- if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
- pr_debug("perf_evsel__read_on_cpu\n");
+ if (evsel__read_on_cpu(evsel, 0, 0) < 0) {
+ pr_debug("evsel__read_on_cpu\n");
goto out_close_fd;
}
if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) {
- pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
+ pr_debug("evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val);
goto out_close_fd;
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 091c3aeccc27..611512f22b34 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -371,7 +371,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:u"));
+ !strcmp(evsel__name(evsel), "mem:0:u"));
return test__checkevent_breakpoint(evlist);
}
@@ -385,7 +385,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:x:k"));
+ !strcmp(evsel__name(evsel), "mem:0:x:k"));
return test__checkevent_breakpoint_x(evlist);
}
@@ -399,7 +399,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:r:hp"));
+ !strcmp(evsel__name(evsel), "mem:0:r:hp"));
return test__checkevent_breakpoint_r(evlist);
}
@@ -413,7 +413,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:w:up"));
+ !strcmp(evsel__name(evsel), "mem:0:w:up"));
return test__checkevent_breakpoint_w(evlist);
}
@@ -427,7 +427,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:rw:kp"));
+ !strcmp(evsel__name(evsel), "mem:0:rw:kp"));
return test__checkevent_breakpoint_rw(evlist);
}
@@ -468,7 +468,7 @@ static int test__checkevent_list(struct evlist *evlist)
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
/* syscalls:sys_enter_openat:k */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong sample_type",
PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
@@ -479,7 +479,7 @@ static int test__checkevent_list(struct evlist *evlist)
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
/* 1:1:hp */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
@@ -498,15 +498,15 @@ static int test__checkevent_pmu_name(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava"));
+ TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "krava"));
/* cpu/config=2/u" */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "cpu/config=2/u"));
+ !strcmp(evsel__name(evsel), "cpu/config=2/u"));
return 0;
}
@@ -529,7 +529,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->core.attr.sample_type));
/* cpu/config=2,call-graph=no,time=0,period=2000/ */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config);
/*
@@ -557,6 +557,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
return 0;
}
@@ -575,9 +576,10 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
/* cpu/pmu-event/u*/
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong exclude_user",
@@ -587,6 +589,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.pinned);
return 0;
}
@@ -631,6 +634,34 @@ static int test__checkterms_simple(struct list_head *terms)
TEST_ASSERT_VAL("wrong val", term->val.num == 1);
TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask"));
+ /*
+ * read
+ *
+ * The perf_pmu__test_parse_init injects 'read' term into
+ * perf_pmu_events_list, so 'read' is evaluated as read term
+ * and not as raw event with 'ead' hex value.
+ */
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+ TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "read"));
+
+ /*
+ * r0xead
+ *
+ * To be still able to pass 'ead' value with 'r' syntax,
+ * we added support to parse 'r0xHEX' event.
+ */
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 0xead);
+ TEST_ASSERT_VAL("wrong config", !term->config);
return 0;
}
@@ -652,13 +683,13 @@ static int test__group1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* cycles:upp */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -670,7 +701,7 @@ static int test__group1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
return 0;
@@ -691,16 +722,16 @@ static int test__group2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* cache-references + :u modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_REFERENCES == evsel->core.attr.config);
@@ -711,11 +742,11 @@ static int test__group2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* cycles:k */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -725,7 +756,7 @@ static int test__group2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
return 0;
@@ -750,15 +781,15 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong group name",
!strcmp(leader->group_name, "group1"));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* group1 cycles:kppp */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -771,11 +802,11 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 3);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* group2 cycles + G modifier */
- evsel = leader = perf_evsel__next(evsel);
+ evsel = leader = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -785,15 +816,15 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong group name",
!strcmp(leader->group_name, "group2"));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* group2 1:3 + G modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 3 == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
@@ -803,21 +834,21 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* instructions:u */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
return 0;
@@ -843,13 +874,13 @@ static int test__group4(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* instructions:kp + p */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -861,7 +892,7 @@ static int test__group4(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
return 0;
@@ -886,13 +917,13 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* instructions + G */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -903,11 +934,11 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* cycles:G */
- evsel = leader = perf_evsel__next(evsel);
+ evsel = leader = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -918,13 +949,13 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* instructions:G */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -935,10 +966,10 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
/* cycles */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -948,7 +979,7 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
return 0;
}
@@ -972,12 +1003,12 @@ static int test__group_gh1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
/* cache-misses:G + :H group modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -988,7 +1019,7 @@ static int test__group_gh1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
return 0;
}
@@ -1012,12 +1043,12 @@ static int test__group_gh2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
/* cache-misses:H + :G group modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1028,7 +1059,7 @@ static int test__group_gh2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
return 0;
}
@@ -1052,12 +1083,12 @@ static int test__group_gh3(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
/* cache-misses:H + :u group modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1068,7 +1099,7 @@ static int test__group_gh3(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
return 0;
}
@@ -1092,12 +1123,12 @@ static int test__group_gh4(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
/* cache-misses:H + :uG group modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1108,7 +1139,7 @@ static int test__group_gh4(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
return 0;
}
@@ -1135,7 +1166,7 @@ static int test__leader_sample1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
/* cache-misses - not sampling */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1149,7 +1180,7 @@ static int test__leader_sample1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
/* branch-misses - not sampling */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
@@ -1188,7 +1219,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
/* branch-misses - not sampling */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
@@ -1234,14 +1265,14 @@ static int test__pinned_group(struct evlist *evlist)
TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned);
/* cache-misses - can not be pinned, but will go on with the leader */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
/* branch-misses - ditto */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
@@ -1249,6 +1280,49 @@ static int test__pinned_group(struct evlist *evlist)
return 0;
}
+static int test__checkevent_exclusive_modifier(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
+ TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive);
+
+ return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__exclusive_group(struct evlist *evlist)
+{
+ struct evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries);
+
+ /* cycles - group leader */
+ evsel = leader = evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive);
+
+ /* cache-misses - can not be pinned, but will go on with the leader */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+
+ /* branch-misses - ditto */
+ evsel = evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
+ TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
+
+ return 0;
+}
static int test__checkevent_breakpoint_len(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
@@ -1356,6 +1430,16 @@ static int test__checkevent_complex_name(struct evlist *evlist)
return 0;
}
+static int test__checkevent_raw_pmu(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
+ return 0;
+}
+
static int test__sym_event_slash(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
@@ -1727,7 +1811,17 @@ static struct evlist_test test__events[] = {
.name = "cycles:k",
.check = test__sym_event_dc,
.id = 55,
- }
+ },
+ {
+ .name = "instructions:uep",
+ .check = test__checkevent_exclusive_modifier,
+ .id = 56,
+ },
+ {
+ .name = "{cycles,cache-misses,branch-misses}:e",
+ .check = test__exclusive_group,
+ .id = 57,
+ },
};
static struct evlist_test test__events_pmu[] = {
@@ -1750,7 +1844,17 @@ static struct evlist_test test__events_pmu[] = {
.name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
.check = test__checkevent_complex_name,
.id = 3,
- }
+ },
+ {
+ .name = "software/r1a/",
+ .check = test__checkevent_raw_pmu,
+ .id = 4,
+ },
+ {
+ .name = "software/r0x1a/",
+ .check = test__checkevent_raw_pmu,
+ .id = 4,
+ },
};
struct terms_test {
@@ -1761,7 +1865,7 @@ struct terms_test {
static struct terms_test test__terms[] = {
[0] = {
- .str = "config=10,config1,config2=3,umask=1",
+ .str = "config=10,config1,config2=3,umask=1,read,r0xead",
.check = test__checkterms_simple,
},
};
@@ -1821,6 +1925,13 @@ static int test_term(struct terms_test *t)
INIT_LIST_HEAD(&terms);
+ /*
+ * The perf_pmu__test_parse_init prepares perf_pmu_events_list
+ * which gets freed in parse_events_terms.
+ */
+ if (perf_pmu__test_parse_init())
+ return -1;
+
ret = parse_events_terms(&terms, t->str);
if (ret) {
pr_debug("failed to parse terms '%s', err %d\n",
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
new file mode 100644
index 000000000000..7c1bde01cb50
--- /dev/null
+++ b/tools/perf/tests/parse-metric.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <string.h>
+#include <perf/cpumap.h>
+#include <perf/evlist.h>
+#include "metricgroup.h"
+#include "tests.h"
+#include "pmu-events/pmu-events.h"
+#include "evlist.h"
+#include "rblist.h"
+#include "debug.h"
+#include "expr.h"
+#include "stat.h"
+
+static struct pmu_event pme_test[] = {
+{
+ .metric_expr = "inst_retired.any / cpu_clk_unhalted.thread",
+ .metric_name = "IPC",
+ .metric_group = "group1",
+},
+{
+ .metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * "
+ "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
+ .metric_name = "Frontend_Bound_SMT",
+},
+{
+ .metric_expr = "l1d\\-loads\\-misses / inst_retired.any",
+ .metric_name = "dcache_miss_cpi",
+},
+{
+ .metric_expr = "l1i\\-loads\\-misses / inst_retired.any",
+ .metric_name = "icache_miss_cycles",
+},
+{
+ .metric_expr = "(dcache_miss_cpi + icache_miss_cycles)",
+ .metric_name = "cache_miss_cycles",
+ .metric_group = "group1",
+},
+{
+ .metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
+ .metric_name = "DCache_L2_All_Hits",
+},
+{
+ .metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + "
+ "l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
+ .metric_name = "DCache_L2_All_Miss",
+},
+{
+ .metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss",
+ .metric_name = "DCache_L2_All",
+},
+{
+ .metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
+ .metric_name = "DCache_L2_Hits",
+},
+{
+ .metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
+ .metric_name = "DCache_L2_Misses",
+},
+{
+ .metric_expr = "ipc + m2",
+ .metric_name = "M1",
+},
+{
+ .metric_expr = "ipc + m1",
+ .metric_name = "M2",
+},
+{
+ .metric_expr = "1/m3",
+ .metric_name = "M3",
+},
+{
+ .name = NULL,
+}
+};
+
+static struct pmu_events_map map = {
+ .cpuid = "test",
+ .version = "1",
+ .type = "core",
+ .table = pme_test,
+};
+
+struct value {
+ const char *event;
+ u64 val;
+};
+
+static u64 find_value(const char *name, struct value *values)
+{
+ struct value *v = values;
+
+ while (v->event) {
+ if (!strcmp(name, v->event))
+ return v->val;
+ v++;
+ };
+ return 0;
+}
+
+static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist,
+ struct value *vals)
+{
+ struct evsel *evsel;
+ u64 count;
+
+ evlist__for_each_entry(evlist, evsel) {
+ count = find_value(evsel->name, vals);
+ perf_stat__update_shadow_stats(evsel, count, 0, st);
+ }
+}
+
+static double compute_single(struct rblist *metric_events, struct evlist *evlist,
+ struct runtime_stat *st, const char *name)
+{
+ struct metric_expr *mexp;
+ struct metric_event *me;
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ me = metricgroup__lookup(metric_events, evsel, false);
+ if (me != NULL) {
+ list_for_each_entry (mexp, &me->head, nd) {
+ if (strcmp(mexp->metric_name, name))
+ continue;
+ return test_generic_metric(mexp, 0, st);
+ }
+ }
+ }
+ return 0.;
+}
+
+static int __compute_metric(const char *name, struct value *vals,
+ const char *name1, double *ratio1,
+ const char *name2, double *ratio2)
+{
+ struct rblist metric_events = {
+ .nr_entries = 0,
+ };
+ struct perf_cpu_map *cpus;
+ struct runtime_stat st;
+ struct evlist *evlist;
+ int err;
+
+ /*
+ * We need to prepare evlist for stat mode running on CPU 0
+ * because that's where all the stats are going to be created.
+ */
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ cpus = perf_cpu_map__new("0");
+ if (!cpus) {
+ evlist__delete(evlist);
+ return -ENOMEM;
+ }
+
+ perf_evlist__set_maps(&evlist->core, cpus, NULL);
+ runtime_stat__init(&st);
+
+ /* Parse the metric into metric_events list. */
+ err = metricgroup__parse_groups_test(evlist, &map, name,
+ false, false,
+ &metric_events);
+ if (err)
+ goto out;
+
+ err = perf_evlist__alloc_stats(evlist, false);
+ if (err)
+ goto out;
+
+ /* Load the runtime stats with given numbers for events. */
+ load_runtime_stat(&st, evlist, vals);
+
+ /* And execute the metric */
+ if (name1 && ratio1)
+ *ratio1 = compute_single(&metric_events, evlist, &st, name1);
+ if (name2 && ratio2)
+ *ratio2 = compute_single(&metric_events, evlist, &st, name2);
+
+out:
+ /* ... clenup. */
+ metricgroup__rblist_exit(&metric_events);
+ runtime_stat__exit(&st);
+ perf_evlist__free_stats(evlist);
+ perf_cpu_map__put(cpus);
+ evlist__delete(evlist);
+ return err;
+}
+
+static int compute_metric(const char *name, struct value *vals, double *ratio)
+{
+ return __compute_metric(name, vals, name, ratio, NULL, NULL);
+}
+
+static int compute_metric_group(const char *name, struct value *vals,
+ const char *name1, double *ratio1,
+ const char *name2, double *ratio2)
+{
+ return __compute_metric(name, vals, name1, ratio1, name2, ratio2);
+}
+
+static int test_ipc(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "inst_retired.any", .val = 300 },
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("IPC", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("IPC failed, wrong ratio",
+ ratio == 1.5);
+ return 0;
+}
+
+static int test_frontend(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "idq_uops_not_delivered.core", .val = 300 },
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = "cpu_clk_unhalted.one_thread_active", .val = 400 },
+ { .event = "cpu_clk_unhalted.ref_xclk", .val = 600 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio",
+ ratio == 0.45);
+ return 0;
+}
+
+static int test_cache_miss_cycles(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "l1d-loads-misses", .val = 300 },
+ { .event = "l1i-loads-misses", .val = 200 },
+ { .event = "inst_retired.any", .val = 400 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("cache_miss_cycles", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio",
+ ratio == 1.25);
+ return 0;
+}
+
+
+/*
+ * DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi
+ * DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) +
+ * l2_rqsts.pf_miss + l2_rqsts.rfo_miss
+ * DCache_L2_All = dcache_l2_all_hits + dcache_l2_all_miss
+ * DCache_L2_Hits = d_ratio(dcache_l2_all_hits, dcache_l2_all)
+ * DCache_L2_Misses = d_ratio(dcache_l2_all_miss, dcache_l2_all)
+ *
+ * l2_rqsts.demand_data_rd_hit = 100
+ * l2_rqsts.pf_hit = 200
+ * l2_rqsts.rfo_hi = 300
+ * l2_rqsts.all_demand_data_rd = 400
+ * l2_rqsts.pf_miss = 500
+ * l2_rqsts.rfo_miss = 600
+ *
+ * DCache_L2_All_Hits = 600
+ * DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400
+ * DCache_L2_All = 600 + 1400 = 2000
+ * DCache_L2_Hits = 600 / 2000 = 0.3
+ * DCache_L2_Misses = 1400 / 2000 = 0.7
+ */
+static int test_dcache_l2(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "l2_rqsts.demand_data_rd_hit", .val = 100 },
+ { .event = "l2_rqsts.pf_hit", .val = 200 },
+ { .event = "l2_rqsts.rfo_hit", .val = 300 },
+ { .event = "l2_rqsts.all_demand_data_rd", .val = 400 },
+ { .event = "l2_rqsts.pf_miss", .val = 500 },
+ { .event = "l2_rqsts.rfo_miss", .val = 600 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("DCache_L2_Hits", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio",
+ ratio == 0.3);
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("DCache_L2_Misses", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio",
+ ratio == 0.7);
+ return 0;
+}
+
+static int test_recursion_fail(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "inst_retired.any", .val = 300 },
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to find recursion",
+ compute_metric("M1", vals, &ratio) == -1);
+
+ TEST_ASSERT_VAL("failed to find recursion",
+ compute_metric("M3", vals, &ratio) == -1);
+ return 0;
+}
+
+static int test_metric_group(void)
+{
+ double ratio1, ratio2;
+ struct value vals[] = {
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = "l1d-loads-misses", .val = 300 },
+ { .event = "l1i-loads-misses", .val = 200 },
+ { .event = "inst_retired.any", .val = 400 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to find recursion",
+ compute_metric_group("group1", vals,
+ "IPC", &ratio1,
+ "cache_miss_cycles", &ratio2) == 0);
+
+ TEST_ASSERT_VAL("group IPC failed, wrong ratio",
+ ratio1 == 2.0);
+
+ TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio",
+ ratio2 == 1.25);
+ return 0;
+}
+
+int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("IPC failed", test_ipc() == 0);
+ TEST_ASSERT_VAL("frontend failed", test_frontend() == 0);
+ TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0);
+ TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0);
+ TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0);
+ TEST_ASSERT_VAL("test metric group", test_metric_group() == 0);
+ return 0;
+}
diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c
new file mode 100644
index 000000000000..58b90c42eb38
--- /dev/null
+++ b/tools/perf/tests/pe-file-parsing.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <subcmd/exec-cmd.h>
+
+#include "debug.h"
+#include "util/build-id.h"
+#include "util/symbol.h"
+#include "util/dso.h"
+
+#include "tests.h"
+
+#ifdef HAVE_LIBBFD_SUPPORT
+
+static int run_dir(const char *d)
+{
+ char filename[PATH_MAX];
+ char debugfile[PATH_MAX];
+ struct build_id bid;
+ char debuglink[PATH_MAX];
+ char expect_build_id[] = {
+ 0x5a, 0x0f, 0xd8, 0x82, 0xb5, 0x30, 0x84, 0x22,
+ 0x4b, 0xa4, 0x7b, 0x62, 0x4c, 0x55, 0xa4, 0x69,
+ };
+ char expect_debuglink[PATH_MAX] = "pe-file.exe.debug";
+ struct dso *dso;
+ struct symbol *sym;
+ int ret;
+
+ scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d);
+ ret = filename__read_build_id(filename, &bid);
+ TEST_ASSERT_VAL("Failed to read build_id",
+ ret == sizeof(expect_build_id));
+ TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id,
+ sizeof(expect_build_id)));
+
+ ret = filename__read_debuglink(filename, debuglink, PATH_MAX);
+ TEST_ASSERT_VAL("Failed to read debuglink", ret == 0);
+ TEST_ASSERT_VAL("Wrong debuglink",
+ !strcmp(debuglink, expect_debuglink));
+
+ scnprintf(debugfile, PATH_MAX, "%s/%s", d, debuglink);
+ ret = filename__read_build_id(debugfile, &bid);
+ TEST_ASSERT_VAL("Failed to read debug file build_id",
+ ret == sizeof(expect_build_id));
+ TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id,
+ sizeof(expect_build_id)));
+
+ dso = dso__new(filename);
+ TEST_ASSERT_VAL("Failed to get dso", dso);
+
+ ret = dso__load_bfd_symbols(dso, debugfile);
+ TEST_ASSERT_VAL("Failed to load symbols", ret == 0);
+
+ dso__sort_by_name(dso);
+ sym = dso__find_symbol_by_name(dso, "main");
+ TEST_ASSERT_VAL("Failed to find main", sym);
+ dso__delete(dso);
+
+ return TEST_OK;
+}
+
+int test__pe_file_parsing(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ struct stat st;
+ char path_dir[PATH_MAX];
+
+ /* First try development tree tests. */
+ if (!lstat("./tests", &st))
+ return run_dir("./tests");
+
+ /* Then installed path. */
+ snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path());
+
+ if (!lstat(path_dir, &st))
+ return run_dir(path_dir);
+
+ return TEST_SKIP;
+}
+
+#else
+
+int test__pe_file_parsing(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ return TEST_SKIP;
+}
+
+#endif
diff --git a/tools/perf/tests/pe-file.c b/tools/perf/tests/pe-file.c
new file mode 100644
index 000000000000..eb3df5e9886f
--- /dev/null
+++ b/tools/perf/tests/pe-file.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// pe-file.exe and pe-file.exe.debug built with;
+// x86_64-w64-mingw32-gcc -o pe-file.exe pe-file.c
+// -Wl,--file-alignment,4096 -Wl,--build-id
+// x86_64-w64-mingw32-objcopy --only-keep-debug
+// --compress-debug-sections pe-file.exe pe-file.exe.debug
+// x86_64-w64-mingw32-objcopy --strip-debug
+// --add-gnu-debuglink=pe-file.exe.debug pe-file.exe
+
+int main(int argc, char const *argv[])
+{
+ return 0;
+}
diff --git a/tools/perf/tests/pe-file.exe b/tools/perf/tests/pe-file.exe
new file mode 100644
index 000000000000..838a46dae724
--- /dev/null
+++ b/tools/perf/tests/pe-file.exe
Binary files differ
diff --git a/tools/perf/tests/pe-file.exe.debug b/tools/perf/tests/pe-file.exe.debug
new file mode 100644
index 000000000000..287d6718d6c9
--- /dev/null
+++ b/tools/perf/tests/pe-file.exe.debug
Binary files differ
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 2195fc205e72..67d3f5aad016 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -106,9 +106,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
* Config the evsels, setting attr->comm on the first one, etc.
*/
evsel = evlist__first(evlist);
- perf_evsel__set_sample_bit(evsel, CPU);
- perf_evsel__set_sample_bit(evsel, TID);
- perf_evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, TID);
+ evsel__set_sample_bit(evsel, TIME);
perf_evlist__config(evlist, &opts, NULL);
err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
@@ -185,14 +185,14 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
err = perf_evlist__parse_sample(evlist, event, &sample);
if (err < 0) {
if (verbose > 0)
- perf_event__fprintf(event, stderr);
+ perf_event__fprintf(event, NULL, stderr);
pr_debug("Couldn't parse sample\n");
goto out_delete_evlist;
}
if (verbose > 0) {
pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
- perf_event__fprintf(event, stderr);
+ perf_event__fprintf(event, NULL, stderr);
}
if (prev_time > sample.time) {
diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c
new file mode 100644
index 000000000000..76a53126efdf
--- /dev/null
+++ b/tools/perf/tests/pfm.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test support for libpfm4 event encodings.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include "tests.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/pfm.h"
+
+#include <linux/kernel.h>
+
+#ifdef HAVE_LIBPFM
+static int test__pfm_events(void);
+static int test__pfm_group(void);
+#endif
+
+static const struct {
+ int (*func)(void);
+ const char *desc;
+} pfm_testcase_table[] = {
+#ifdef HAVE_LIBPFM
+ {
+ .func = test__pfm_events,
+ .desc = "test of individual --pfm-events",
+ },
+ {
+ .func = test__pfm_group,
+ .desc = "test groups of --pfm-events",
+ },
+#endif
+};
+
+#ifdef HAVE_LIBPFM
+static int count_pfm_events(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ int count = 0;
+
+ perf_evlist__for_each_entry(evlist, evsel) {
+ count++;
+ }
+ return count;
+}
+
+static int test__pfm_events(void)
+{
+ struct evlist *evlist;
+ struct option opt;
+ size_t i;
+ const struct {
+ const char *events;
+ int nr_events;
+ } table[] = {
+ {
+ .events = "",
+ .nr_events = 0,
+ },
+ {
+ .events = "instructions",
+ .nr_events = 1,
+ },
+ {
+ .events = "instructions,cycles",
+ .nr_events = 2,
+ },
+ {
+ .events = "stereolab",
+ .nr_events = 0,
+ },
+ {
+ .events = "instructions,instructions",
+ .nr_events = 2,
+ },
+ {
+ .events = "stereolab,instructions",
+ .nr_events = 0,
+ },
+ {
+ .events = "instructions,stereolab",
+ .nr_events = 1,
+ },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(table); i++) {
+ evlist = evlist__new();
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ opt.value = evlist;
+ parse_libpfm_events_option(&opt,
+ table[i].events,
+ 0);
+ TEST_ASSERT_EQUAL(table[i].events,
+ count_pfm_events(&evlist->core),
+ table[i].nr_events);
+ TEST_ASSERT_EQUAL(table[i].events,
+ evlist->nr_groups,
+ 0);
+
+ evlist__delete(evlist);
+ }
+ return 0;
+}
+
+static int test__pfm_group(void)
+{
+ struct evlist *evlist;
+ struct option opt;
+ size_t i;
+ const struct {
+ const char *events;
+ int nr_events;
+ int nr_groups;
+ } table[] = {
+ {
+ .events = "{},",
+ .nr_events = 0,
+ .nr_groups = 0,
+ },
+ {
+ .events = "{instructions}",
+ .nr_events = 1,
+ .nr_groups = 1,
+ },
+ {
+ .events = "{instructions},{}",
+ .nr_events = 1,
+ .nr_groups = 1,
+ },
+ {
+ .events = "{},{instructions}",
+ .nr_events = 0,
+ .nr_groups = 0,
+ },
+ {
+ .events = "{instructions},{instructions}",
+ .nr_events = 2,
+ .nr_groups = 2,
+ },
+ {
+ .events = "{instructions,cycles},{instructions,cycles}",
+ .nr_events = 4,
+ .nr_groups = 2,
+ },
+ {
+ .events = "{stereolab}",
+ .nr_events = 0,
+ .nr_groups = 0,
+ },
+ {
+ .events =
+ "{instructions,cycles},{instructions,stereolab}",
+ .nr_events = 3,
+ .nr_groups = 1,
+ },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(table); i++) {
+ evlist = evlist__new();
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ opt.value = evlist;
+ parse_libpfm_events_option(&opt,
+ table[i].events,
+ 0);
+ TEST_ASSERT_EQUAL(table[i].events,
+ count_pfm_events(&evlist->core),
+ table[i].nr_events);
+ TEST_ASSERT_EQUAL(table[i].events,
+ evlist->nr_groups,
+ table[i].nr_groups);
+
+ evlist__delete(evlist);
+ }
+ return 0;
+}
+#endif
+
+const char *test__pfm_subtest_get_desc(int i)
+{
+ if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
+ return NULL;
+ return pfm_testcase_table[i].desc;
+}
+
+int test__pfm_subtest_get_nr(void)
+{
+ return (int)ARRAY_SIZE(pfm_testcase_table);
+}
+
+int test__pfm(struct test *test __maybe_unused, int i __maybe_unused)
+{
+#ifdef HAVE_LIBPFM
+ if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
+ return TEST_FAIL;
+ return pfm_testcase_table[i].func();
+#else
+ return TEST_SKIP;
+#endif
+}
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index d64261da8bf7..d3517a74d95e 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include "math.h"
#include "parse-events.h"
#include "pmu.h"
#include "tests.h"
@@ -8,6 +9,9 @@
#include <linux/zalloc.h>
#include "debug.h"
#include "../pmu-events/pmu-events.h"
+#include "util/evlist.h"
+#include "util/expr.h"
+#include "util/parse-events.h"
struct perf_pmu_test_event {
struct pmu_event event;
@@ -144,7 +148,7 @@ static struct pmu_events_map *__test_pmu_get_events_map(void)
}
/* Verify generated events from pmu-events.c is as expected */
-static int __test_pmu_event_table(void)
+static int test_pmu_event_table(void)
{
struct pmu_events_map *map = __test_pmu_get_events_map();
struct pmu_event *table;
@@ -270,6 +274,7 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
int res = 0;
bool use_uncore_table;
struct pmu_events_map *map = __test_pmu_get_events_map();
+ struct perf_pmu_alias *a, *tmp;
if (!map)
return -1;
@@ -343,18 +348,19 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
pmu_name, alias->name);
}
+ list_for_each_entry_safe(a, tmp, &aliases, list) {
+ list_del(&a->list);
+ perf_pmu_free_alias(a);
+ }
free(pmu);
return res;
}
-int test__pmu_events(struct test *test __maybe_unused,
- int subtest __maybe_unused)
+
+static int test_aliases(void)
{
struct perf_pmu *pmu = NULL;
- if (__test_pmu_event_table())
- return -1;
-
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
int count = 0;
@@ -377,3 +383,282 @@ int test__pmu_events(struct test *test __maybe_unused,
return 0;
}
+
+static bool is_number(const char *str)
+{
+ char *end_ptr;
+ double v;
+
+ errno = 0;
+ v = strtod(str, &end_ptr);
+ (void)v; // We're not interested in this value, only if it is valid
+ return errno == 0 && end_ptr != str;
+}
+
+static int check_parse_id(const char *id, struct parse_events_error *error,
+ struct perf_pmu *fake_pmu)
+{
+ struct evlist *evlist;
+ int ret;
+
+ /* Numbers are always valid. */
+ if (is_number(id))
+ return 0;
+
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+ ret = __parse_events(evlist, id, error, fake_pmu);
+ evlist__delete(evlist);
+ return ret;
+}
+
+static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe)
+{
+ struct parse_events_error error = { .idx = 0, };
+
+ int ret = check_parse_id(id, &error, NULL);
+ if (ret && same_cpu) {
+ pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n",
+ pe->metric_name, id, pe->metric_expr);
+ pr_warning("Error string '%s' help '%s'\n", error.str,
+ error.help);
+ } else if (ret) {
+ pr_debug3("Parse event failed, but for an event that may not be supported by this CPU.\nid '%s' metric '%s' expr '%s'\n",
+ id, pe->metric_name, pe->metric_expr);
+ ret = 0;
+ }
+ free(error.str);
+ free(error.help);
+ free(error.first_str);
+ free(error.first_help);
+ return ret;
+}
+
+static int check_parse_fake(const char *id)
+{
+ struct parse_events_error error = { .idx = 0, };
+ int ret = check_parse_id(id, &error, &perf_pmu__fake);
+
+ free(error.str);
+ free(error.help);
+ free(error.first_str);
+ free(error.first_help);
+ return ret;
+}
+
+static void expr_failure(const char *msg,
+ const struct pmu_events_map *map,
+ const struct pmu_event *pe)
+{
+ pr_debug("%s for map %s %s %s\n",
+ msg, map->cpuid, map->version, map->type);
+ pr_debug("On metric %s\n", pe->metric_name);
+ pr_debug("On expression %s\n", pe->metric_expr);
+}
+
+static int test_parsing(void)
+{
+ struct pmu_events_map *cpus_map = perf_pmu__find_map(NULL);
+ struct pmu_events_map *map;
+ struct pmu_event *pe;
+ int i, j, k;
+ int ret = 0;
+ struct expr_parse_ctx ctx;
+ double result;
+
+ i = 0;
+ for (;;) {
+ map = &pmu_events_map[i++];
+ if (!map->table)
+ break;
+ j = 0;
+ for (;;) {
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ pe = &map->table[j++];
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ expr__ctx_init(&ctx);
+ if (expr__find_other(pe->metric_expr, NULL, &ctx, 0)
+ < 0) {
+ expr_failure("Parse other failed", map, pe);
+ ret++;
+ continue;
+ }
+
+ /*
+ * Add all ids with a made up value. The value may
+ * trigger divide by zero when subtracted and so try to
+ * make them unique.
+ */
+ k = 1;
+ hashmap__for_each_entry((&ctx.ids), cur, bkt)
+ expr__add_id_val(&ctx, strdup(cur->key), k++);
+
+ hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+ if (check_parse_cpu(cur->key, map == cpus_map,
+ pe))
+ ret++;
+ }
+
+ if (expr__parse(&result, &ctx, pe->metric_expr, 0)) {
+ expr_failure("Parse failed", map, pe);
+ ret++;
+ }
+ expr__ctx_clear(&ctx);
+ }
+ }
+ /* TODO: fail when not ok */
+ return ret == 0 ? TEST_OK : TEST_SKIP;
+}
+
+struct test_metric {
+ const char *str;
+};
+
+static struct test_metric metrics[] = {
+ { "(unc_p_power_state_occupancy.cores_c0 / unc_p_clockticks) * 100." },
+ { "imx8_ddr0@read\\-cycles@ * 4 * 4", },
+ { "imx8_ddr0@axid\\-read\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@ * 4", },
+ { "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", },
+ { "(imx8_ddr0@read\\-cycles@ + imx8_ddr0@write\\-cycles@)", },
+};
+
+static int metric_parse_fake(const char *str)
+{
+ struct expr_parse_ctx ctx;
+ struct hashmap_entry *cur;
+ double result;
+ int ret = -1;
+ size_t bkt;
+ int i;
+
+ pr_debug("parsing '%s'\n", str);
+
+ expr__ctx_init(&ctx);
+ if (expr__find_other(str, NULL, &ctx, 0) < 0) {
+ pr_err("expr__find_other failed\n");
+ return -1;
+ }
+
+ /*
+ * Add all ids with a made up value. The value may
+ * trigger divide by zero when subtracted and so try to
+ * make them unique.
+ */
+ i = 1;
+ hashmap__for_each_entry((&ctx.ids), cur, bkt)
+ expr__add_id_val(&ctx, strdup(cur->key), i++);
+
+ hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+ if (check_parse_fake(cur->key)) {
+ pr_err("check_parse_fake failed\n");
+ goto out;
+ }
+ }
+
+ if (expr__parse(&result, &ctx, str, 1))
+ pr_err("expr__parse failed\n");
+ else
+ ret = 0;
+
+out:
+ expr__ctx_clear(&ctx);
+ return ret;
+}
+
+/*
+ * Parse all the metrics for current architecture,
+ * or all defined cpus via the 'fake_pmu'
+ * in parse_events.
+ */
+static int test_parsing_fake(void)
+{
+ struct pmu_events_map *map;
+ struct pmu_event *pe;
+ unsigned int i, j;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(metrics); i++) {
+ err = metric_parse_fake(metrics[i].str);
+ if (err)
+ return err;
+ }
+
+ i = 0;
+ for (;;) {
+ map = &pmu_events_map[i++];
+ if (!map->table)
+ break;
+ j = 0;
+ for (;;) {
+ pe = &map->table[j++];
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ err = metric_parse_fake(pe->metric_expr);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static const struct {
+ int (*func)(void);
+ const char *desc;
+} pmu_events_testcase_table[] = {
+ {
+ .func = test_pmu_event_table,
+ .desc = "PMU event table sanity",
+ },
+ {
+ .func = test_aliases,
+ .desc = "PMU event map aliases",
+ },
+ {
+ .func = test_parsing,
+ .desc = "Parsing of PMU event table metrics",
+ },
+ {
+ .func = test_parsing_fake,
+ .desc = "Parsing of PMU event table metrics with fake PMUs",
+ },
+};
+
+const char *test__pmu_events_subtest_get_desc(int subtest)
+{
+ if (subtest < 0 ||
+ subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
+ return NULL;
+ return pmu_events_testcase_table[subtest].desc;
+}
+
+const char *test__pmu_events_subtest_skip_reason(int subtest)
+{
+ if (subtest < 0 ||
+ subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
+ return NULL;
+ if (pmu_events_testcase_table[subtest].func != test_parsing)
+ return NULL;
+ return "some metrics failed";
+}
+
+int test__pmu_events_subtest_get_nr(void)
+{
+ return (int)ARRAY_SIZE(pmu_events_testcase_table);
+}
+
+int test__pmu_events(struct test *test __maybe_unused, int subtest)
+{
+ if (subtest < 0 ||
+ subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
+ return TEST_FAIL;
+ return pmu_events_testcase_table[subtest].func();
+}
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 74379ff1f7fa..714e6830a758 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -156,8 +156,8 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
if (ret)
break;
- ret = perf_pmu__config_terms(&formats, &attr, terms,
- false, NULL);
+ ret = perf_pmu__config_terms("perf-pmu-test", &formats, &attr,
+ terms, false, NULL);
if (ret)
break;
@@ -173,6 +173,7 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
ret = 0;
} while (0);
+ perf_pmu__del_formats(&formats);
test_format_dir_put(format);
return ret;
}
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 40ab72149ce1..98c6d474aa6f 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -18,6 +18,7 @@ int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unuse
PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0)
return -1;
+ pr_debug("python usage test: \"%s\"\n", cmd);
ret = system(cmd) ? -1 : 0;
free(cmd);
return ret;
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 61865699c3f4..a0bdaf390ac8 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -296,12 +296,12 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
goto out_free;
}
- evsel.sample_size = __perf_evsel__sample_size(sample_type);
+ evsel.sample_size = __evsel__sample_size(sample_type);
- err = perf_evsel__parse_sample(&evsel, event, &sample_out);
+ err = evsel__parse_sample(&evsel, event, &sample_out);
if (err) {
pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
- "perf_evsel__parse_sample", sample_type, err);
+ "evsel__parse_sample", sample_type, err);
goto out_free;
}
diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
index 60f0e9ee04fb..ed76c693f65e 100644
--- a/tools/perf/tests/sdt.c
+++ b/tools/perf/tests/sdt.c
@@ -28,16 +28,16 @@ static int target_function(void)
static int build_id_cache__add_file(const char *filename)
{
char sbuild_id[SBUILD_ID_SIZE];
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int err;
- err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ err = filename__read_build_id(filename, &bid);
if (err < 0) {
pr_debug("Failed to read build id of %s\n", filename);
return err;
}
- build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ build_id__sprintf(&bid, sbuild_id);
err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false);
if (err < 0)
pr_debug("Failed to add build id cache of %s\n", filename);
diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh
new file mode 100755
index 000000000000..4861a20edee2
--- /dev/null
+++ b/tools/perf/tests/shell/buildid.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+# build id cache operations
+# SPDX-License-Identifier: GPL-2.0
+
+# skip if there's no readelf
+if ! [ -x "$(command -v readelf)" ]; then
+ echo "failed: no readelf, install binutils"
+ exit 2
+fi
+
+# skip if there's no compiler
+if ! [ -x "$(command -v cc)" ]; then
+ echo "failed: no compiler, install gcc"
+ exit 2
+fi
+
+ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX)
+ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX)
+
+echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c -
+echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c -
+
+echo "test binaries: ${ex_sha1} ${ex_md5}"
+
+check()
+{
+ id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'`
+
+ echo "build id: ${id}"
+
+ link=${build_id_dir}/.build-id/${id:0:2}/${id:2}
+ echo "link: ${link}"
+
+ if [ ! -h $link ]; then
+ echo "failed: link ${link} does not exist"
+ exit 1
+ fi
+
+ file=${build_id_dir}/.build-id/${id:0:2}/`readlink ${link}`/elf
+ echo "file: ${file}"
+
+ if [ ! -x $file ]; then
+ echo "failed: file ${file} does not exist"
+ exit 1
+ fi
+
+ diff ${file} ${1}
+ if [ $? -ne 0 ]; then
+ echo "failed: ${file} do not match"
+ exit 1
+ fi
+
+ echo "OK for ${1}"
+}
+
+test_add()
+{
+ build_id_dir=$(mktemp -d /tmp/perf.debug.XXX)
+ perf="perf --buildid-dir ${build_id_dir}"
+
+ ${perf} buildid-cache -v -a ${1}
+ if [ $? -ne 0 ]; then
+ echo "failed: add ${1} to build id cache"
+ exit 1
+ fi
+
+ check ${1}
+
+ rm -rf ${build_id_dir}
+}
+
+test_record()
+{
+ data=$(mktemp /tmp/perf.data.XXX)
+ build_id_dir=$(mktemp -d /tmp/perf.debug.XXX)
+ perf="perf --buildid-dir ${build_id_dir}"
+
+ ${perf} record --buildid-all -o ${data} ${1}
+ if [ $? -ne 0 ]; then
+ echo "failed: record ${1}"
+ exit 1
+ fi
+
+ check ${1}
+
+ rm -rf ${build_id_dir}
+ rm -rf ${data}
+}
+
+# add binaries manual via perf buildid-cache -a
+test_add ${ex_sha1}
+test_add ${ex_md5}
+
+# add binaries via perf record post processing
+test_record ${ex_sha1}
+test_record ${ex_md5}
+
+# cleanup
+rm ${ex_sha1} ${ex_md5}
+
+exit ${err}
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
index 54030c18bfc2..bf9e729b3ecf 100755
--- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
@@ -20,13 +20,13 @@ file=$(mktemp /tmp/temporary_file.XXXXX)
record_open_file() {
echo "Recording open file:"
- perf record -o ${perfdata} -e probe:vfs_getname touch $file
+ perf record -o ${perfdata} -e probe:vfs_getname\* touch $file
}
perf_script_filenames() {
echo "Looking at perf.data file for vfs_getname records for the file we touched:"
perf script -i ${perfdata} | \
- egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname: +\([[:xdigit:]]+\) +pathname=\"${file}\""
+ egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname[_0-9]*: +\([[:xdigit:]]+\) +pathname=\"${file}\""
}
add_probe_vfs_getname || skip_if_no_debuginfo
diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
index 63a91ec473bb..045723b3d992 100755
--- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh
+++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
@@ -12,7 +12,8 @@ skip_if_no_z_record() {
collect_z_record() {
echo "Collecting compressed record file:"
- $perf_tool record -o $trace_file -g -z -F 5000 -- \
+ [[ "$(uname -m)" != s390x ]] && gflag='-g'
+ $perf_tool record -o $trace_file $gflag -z -F 5000 -- \
dd count=500 if=/dev/urandom of=/dev/null
}
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
new file mode 100755
index 000000000000..8d84fdbed6a6
--- /dev/null
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+# Check Arm CoreSight trace data recording and synthesized samples
+
+# Uses the 'perf record' to record trace data with Arm CoreSight sinks;
+# then verify if there have any branch samples and instruction samples
+# are generated by CoreSight with 'perf script' and 'perf report'
+# commands.
+
+# SPDX-License-Identifier: GPL-2.0
+# Leo Yan <leo.yan@linaro.org>, 2020
+
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+file=$(mktemp /tmp/temporary_file.XXXXX)
+
+skip_if_no_cs_etm_event() {
+ perf list | grep -q 'cs_etm//' && return 0
+
+ # cs_etm event doesn't exist
+ return 2
+}
+
+skip_if_no_cs_etm_event || exit 2
+
+cleanup_files()
+{
+ rm -f ${perfdata}
+ rm -f ${file}
+}
+
+trap cleanup_files exit
+
+record_touch_file() {
+ echo "Recording trace (only user mode) with path: CPU$2 => $1"
+ rm -f $file
+ perf record -o ${perfdata} -e cs_etm/@$1/u --per-thread \
+ -- taskset -c $2 touch $file
+}
+
+perf_script_branch_samples() {
+ echo "Looking at perf.data file for dumping branch samples:"
+
+ # Below is an example of the branch samples dumping:
+ # touch 6512 1 branches:u: ffffb220824c strcmp+0xc (/lib/aarch64-linux-gnu/ld-2.27.so)
+ # touch 6512 1 branches:u: ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so)
+ # touch 6512 1 branches:u: ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so)
+ perf script -F,-time -i ${perfdata} | \
+ egrep " +$1 +[0-9]+ .* +branches:([u|k]:)? +"
+}
+
+perf_report_branch_samples() {
+ echo "Looking at perf.data file for reporting branch samples:"
+
+ # Below is an example of the branch samples reporting:
+ # 73.04% 73.04% touch libc-2.27.so [.] _dl_addr
+ # 7.71% 7.71% touch libc-2.27.so [.] getenv
+ # 2.59% 2.59% touch ld-2.27.so [.] strcmp
+ perf report --stdio -i ${perfdata} | \
+ egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 "
+}
+
+perf_report_instruction_samples() {
+ echo "Looking at perf.data file for instruction samples:"
+
+ # Below is an example of the instruction samples reporting:
+ # 68.12% touch libc-2.27.so [.] _dl_addr
+ # 5.80% touch libc-2.27.so [.] getenv
+ # 4.35% touch ld-2.27.so [.] _dl_fixup
+ perf report --itrace=i1000i --stdio -i ${perfdata} | \
+ egrep " +[0-9]+\.[0-9]+% +$1"
+}
+
+is_device_sink() {
+ # If the node of "enable_sink" is existed under the device path, this
+ # means the device is a sink device. Need to exclude 'tpiu' since it
+ # cannot support perf PMU.
+ echo "$1" | egrep -q -v "tpiu"
+
+ if [ $? -eq 0 -a -e "$1/enable_sink" ]; then
+
+ pmu_dev="/sys/bus/event_source/devices/cs_etm/sinks/$2"
+
+ # Warn if the device is not supported by PMU
+ if ! [ -f $pmu_dev ]; then
+ echo "PMU doesn't support $pmu_dev"
+ fi
+
+ return 0
+ fi
+
+ # Otherwise, it's not a sink device
+ return 1
+}
+
+arm_cs_iterate_devices() {
+ for dev in $1/connections/out\:*; do
+
+ # Skip testing if it's not a directory
+ ! [ -d $dev ] && continue;
+
+ # Read out its symbol link file name
+ path=`readlink -f $dev`
+
+ # Extract device name from path, e.g.
+ # path = '/sys/devices/platform/20010000.etf/tmc_etf0'
+ # `> device_name = 'tmc_etf0'
+ device_name=$(basename $path)
+
+ if is_device_sink $path $devce_name; then
+
+ record_touch_file $device_name $2 &&
+ perf_script_branch_samples touch &&
+ perf_report_branch_samples touch &&
+ perf_report_instruction_samples touch
+
+ err=$?
+
+ # Exit when find failure
+ [ $err != 0 ] && exit $err
+ fi
+
+ arm_cs_iterate_devices $dev $2
+ done
+}
+
+arm_cs_etm_traverse_path_test() {
+ # Iterate for every ETM device
+ for dev in /sys/bus/coresight/devices/etm*; do
+
+ # Find the ETM device belonging to which CPU
+ cpu=`cat $dev/cpu`
+
+ echo $dev
+ echo $cpu
+
+ # Use depth-first search (DFS) to iterate outputs
+ arm_cs_iterate_devices $dev $cpu
+ done
+}
+
+arm_cs_etm_system_wide_test() {
+ echo "Recording trace with system wide mode"
+ perf record -o ${perfdata} -e cs_etm// -a -- ls
+
+ perf_script_branch_samples perf &&
+ perf_report_branch_samples perf &&
+ perf_report_instruction_samples perf
+
+ err=$?
+
+ # Exit when find failure
+ [ $err != 0 ] && exit $err
+}
+
+arm_cs_etm_snapshot_test() {
+ echo "Recording trace with snapshot mode"
+ perf record -o ${perfdata} -e cs_etm// -S \
+ -- dd if=/dev/zero of=/dev/null &
+ PERFPID=$!
+
+ # Wait for perf program
+ sleep 1
+
+ # Send signal to snapshot trace data
+ kill -USR2 $PERFPID
+
+ # Stop perf program
+ kill $PERFPID
+ wait $PERFPID
+
+ perf_script_branch_samples dd &&
+ perf_report_branch_samples dd &&
+ perf_report_instruction_samples dd
+
+ err=$?
+
+ # Exit when find failure
+ [ $err != 0 ] && exit $err
+}
+
+arm_cs_etm_traverse_path_test
+arm_cs_etm_system_wide_test
+arm_cs_etm_snapshot_test
+exit 0
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index bfb9986093d8..4b9b731977c8 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -56,7 +56,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
evsel = evsel__new(&attr);
if (evsel == NULL) {
- pr_debug("perf_evsel__new\n");
+ pr_debug("evsel__new\n");
goto out_delete_evlist;
}
evlist__add(evlist, evsel);
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index fcb0d03dba4e..db5e1f70053a 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -135,8 +135,8 @@ static int process_sample_event(struct evlist *evlist,
evsel = perf_evlist__id2evsel(evlist, sample.id);
if (evsel == switch_tracking->switch_evsel) {
- next_tid = perf_evsel__intval(evsel, &sample, "next_pid");
- prev_tid = perf_evsel__intval(evsel, &sample, "prev_pid");
+ next_tid = evsel__intval(evsel, &sample, "next_pid");
+ prev_tid = evsel__intval(evsel, &sample, "prev_pid");
cpu = sample.cpu;
pr_debug3("sched_switch: cpu: %d prev_tid %d next_tid %d\n",
cpu, prev_tid, next_tid);
@@ -394,8 +394,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
switch_evsel = evlist__last(evlist);
- perf_evsel__set_sample_bit(switch_evsel, CPU);
- perf_evsel__set_sample_bit(switch_evsel, TIME);
+ evsel__set_sample_bit(switch_evsel, CPU);
+ evsel__set_sample_bit(switch_evsel, TIME);
switch_evsel->core.system_wide = true;
switch_evsel->no_aux_samples = true;
@@ -412,8 +412,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
goto out_err;
}
- perf_evsel__set_sample_bit(cycles_evsel, CPU);
- perf_evsel__set_sample_bit(cycles_evsel, TIME);
+ evsel__set_sample_bit(cycles_evsel, CPU);
+ evsel__set_sample_bit(cycles_evsel, TIME);
/* Fourth event */
err = parse_events(evlist, "dummy:u", NULL);
@@ -429,7 +429,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
tracking_evsel->core.attr.freq = 0;
tracking_evsel->core.attr.sample_period = 1;
- perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ evsel__set_sample_bit(tracking_evsel, TIME);
/* Config events */
perf_evlist__config(evlist, &opts, NULL);
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 61a1ab032080..c85a2c08e407 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -34,6 +34,7 @@ struct test {
bool skip_if_fail;
int (*get_nr)(void);
const char *(*get_desc)(int subtest);
+ const char *(*skip_reason)(int subtest);
} subtest;
bool (*is_supported)(void);
void *priv;
@@ -50,6 +51,9 @@ int test__perf_evsel__tp_sched_test(struct test *test, int subtest);
int test__syscall_openat_tp_fields(struct test *test, int subtest);
int test__pmu(struct test *test, int subtest);
int test__pmu_events(struct test *test, int subtest);
+const char *test__pmu_events_subtest_get_desc(int subtest);
+const char *test__pmu_events_subtest_skip_reason(int subtest);
+int test__pmu_events_subtest_get_nr(void);
int test__attr(struct test *test, int subtest);
int test__dso_data(struct test *test, int subtest);
int test__dso_data_cache(struct test *test, int subtest);
@@ -112,6 +116,14 @@ int test__mem2node(struct test *t, int subtest);
int test__maps__merge_in(struct test *t, int subtest);
int test__time_utils(struct test *t, int subtest);
int test__jit_write_elf(struct test *test, int subtest);
+int test__api_io(struct test *test, int subtest);
+int test__demangle_java(struct test *test, int subtest);
+int test__pfm(struct test *test, int subtest);
+const char *test__pfm_subtest_get_desc(int subtest);
+int test__pfm_subtest_get_nr(void);
+int test__parse_metric(struct test *test, int subtest);
+int test__pe_file_parsing(struct test *test, int subtest);
+int test__expand_cgroup_events(struct test *test, int subtest);
bool test__bp_signal_is_supported(void);
bool test__bp_account_is_supported(void);
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 4a800499d7c3..22daf2bdf5fa 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -33,10 +33,8 @@ static int session_write_header(char *path)
{
struct perf_session *session;
struct perf_data data = {
- .file = {
- .path = path,
- },
- .mode = PERF_DATA_MODE_WRITE,
+ .path = path,
+ .mode = PERF_DATA_MODE_WRITE,
};
session = perf_session__new(&data, false, NULL);
@@ -63,10 +61,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
{
struct perf_session *session;
struct perf_data data = {
- .file = {
- .path = path,
- },
- .mode = PERF_DATA_MODE_READ,
+ .path = path,
+ .mode = PERF_DATA_MODE_READ,
};
int i;
diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh
index 22c9fc900c84..9f9ea45cddc4 100755
--- a/tools/perf/trace/beauty/arch_errno_names.sh
+++ b/tools/perf/trace/beauty/arch_errno_names.sh
@@ -57,7 +57,7 @@ process_arch()
local arch="$1"
local asm_errno=$(asm_errno_file "$arch")
- $gcc $include_path -E -dM -x c $asm_errno \
+ $gcc $CFLAGS $include_path -E -dM -x c $asm_errno \
|grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \
|awk '{ print $2","$3; }' \
|sort -t, -k2 -nu \
@@ -91,7 +91,7 @@ EoHEADER
# in tools/perf/arch
archlist=""
for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do
- test -d arch/$arch && archlist="$archlist $arch"
+ test -d $toolsdir/perf/arch/$arch && archlist="$archlist $arch"
done
for arch in x86 $archlist generic; do
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
new file mode 100644
index 000000000000..e9cb30d8cbfb
--- /dev/null
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SOCKET_H
+#define _LINUX_SOCKET_H
+
+
+#include <asm/socket.h> /* arch-dependent defines */
+#include <linux/sockios.h> /* the SIOCxxx I/O controls */
+#include <linux/uio.h> /* iovec support */
+#include <linux/types.h> /* pid_t */
+#include <linux/compiler.h> /* __user */
+#include <uapi/linux/socket.h>
+
+struct file;
+struct pid;
+struct cred;
+struct socket;
+
+#define __sockaddr_check_size(size) \
+ BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
+
+#ifdef CONFIG_PROC_FS
+struct seq_file;
+extern void socket_seq_show(struct seq_file *seq);
+#endif
+
+typedef __kernel_sa_family_t sa_family_t;
+
+/*
+ * 1003.1g requires sa_family_t and that sa_data is char.
+ */
+
+struct sockaddr {
+ sa_family_t sa_family; /* address family, AF_xxx */
+ char sa_data[14]; /* 14 bytes of protocol address */
+};
+
+struct linger {
+ int l_onoff; /* Linger active */
+ int l_linger; /* How long to linger for */
+};
+
+#define sockaddr_storage __kernel_sockaddr_storage
+
+/*
+ * As we do 4.4BSD message passing we use a 4.4BSD message passing
+ * system, not 4.3. Thus msg_accrights(len) are now missing. They
+ * belong in an obscure libc emulation or the bin.
+ */
+
+struct msghdr {
+ void *msg_name; /* ptr to socket address structure */
+ int msg_namelen; /* size of socket address structure */
+ struct iov_iter msg_iter; /* data */
+
+ /*
+ * Ancillary data. msg_control_user is the user buffer used for the
+ * recv* side when msg_control_is_user is set, msg_control is the kernel
+ * buffer used for all other cases.
+ */
+ union {
+ void *msg_control;
+ void __user *msg_control_user;
+ };
+ bool msg_control_is_user : 1;
+ __kernel_size_t msg_controllen; /* ancillary data buffer length */
+ unsigned int msg_flags; /* flags on received message */
+ struct kiocb *msg_iocb; /* ptr to iocb for async requests */
+};
+
+struct user_msghdr {
+ void __user *msg_name; /* ptr to socket address structure */
+ int msg_namelen; /* size of socket address structure */
+ struct iovec __user *msg_iov; /* scatter/gather array */
+ __kernel_size_t msg_iovlen; /* # elements in msg_iov */
+ void __user *msg_control; /* ancillary data */
+ __kernel_size_t msg_controllen; /* ancillary data buffer length */
+ unsigned int msg_flags; /* flags on received message */
+};
+
+/* For recvmmsg/sendmmsg */
+struct mmsghdr {
+ struct user_msghdr msg_hdr;
+ unsigned int msg_len;
+};
+
+/*
+ * POSIX 1003.1g - ancillary data object information
+ * Ancillary data consits of a sequence of pairs of
+ * (cmsghdr, cmsg_data[])
+ */
+
+struct cmsghdr {
+ __kernel_size_t cmsg_len; /* data byte count, including hdr */
+ int cmsg_level; /* originating protocol */
+ int cmsg_type; /* protocol-specific type */
+};
+
+/*
+ * Ancillary data object information MACROS
+ * Table 5-14 of POSIX 1003.1g
+ */
+
+#define __CMSG_NXTHDR(ctl, len, cmsg) __cmsg_nxthdr((ctl),(len),(cmsg))
+#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr((mhdr), (cmsg))
+
+#define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) )
+
+#define CMSG_DATA(cmsg) \
+ ((void *)(cmsg) + sizeof(struct cmsghdr))
+#define CMSG_USER_DATA(cmsg) \
+ ((void __user *)(cmsg) + sizeof(struct cmsghdr))
+#define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len))
+#define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len))
+
+#define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \
+ (struct cmsghdr *)(ctl) : \
+ (struct cmsghdr *)NULL)
+#define CMSG_FIRSTHDR(msg) __CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen)
+#define CMSG_OK(mhdr, cmsg) ((cmsg)->cmsg_len >= sizeof(struct cmsghdr) && \
+ (cmsg)->cmsg_len <= (unsigned long) \
+ ((mhdr)->msg_controllen - \
+ ((char *)(cmsg) - (char *)(mhdr)->msg_control)))
+#define for_each_cmsghdr(cmsg, msg) \
+ for (cmsg = CMSG_FIRSTHDR(msg); \
+ cmsg; \
+ cmsg = CMSG_NXTHDR(msg, cmsg))
+
+/*
+ * Get the next cmsg header
+ *
+ * PLEASE, do not touch this function. If you think, that it is
+ * incorrect, grep kernel sources and think about consequences
+ * before trying to improve it.
+ *
+ * Now it always returns valid, not truncated ancillary object
+ * HEADER. But caller still MUST check, that cmsg->cmsg_len is
+ * inside range, given by msg->msg_controllen before using
+ * ancillary object DATA. --ANK (980731)
+ */
+
+static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size,
+ struct cmsghdr *__cmsg)
+{
+ struct cmsghdr * __ptr;
+
+ __ptr = (struct cmsghdr*)(((unsigned char *) __cmsg) + CMSG_ALIGN(__cmsg->cmsg_len));
+ if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size)
+ return (struct cmsghdr *)0;
+
+ return __ptr;
+}
+
+static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg)
+{
+ return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
+}
+
+static inline size_t msg_data_left(struct msghdr *msg)
+{
+ return iov_iter_count(&msg->msg_iter);
+}
+
+/* "Socket"-level control message types: */
+
+#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
+#define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
+#define SCM_SECURITY 0x03 /* rw: security label */
+
+struct ucred {
+ __u32 pid;
+ __u32 uid;
+ __u32 gid;
+};
+
+/* Supported address families. */
+#define AF_UNSPEC 0
+#define AF_UNIX 1 /* Unix domain sockets */
+#define AF_LOCAL 1 /* POSIX name for AF_UNIX */
+#define AF_INET 2 /* Internet IP Protocol */
+#define AF_AX25 3 /* Amateur Radio AX.25 */
+#define AF_IPX 4 /* Novell IPX */
+#define AF_APPLETALK 5 /* AppleTalk DDP */
+#define AF_NETROM 6 /* Amateur Radio NET/ROM */
+#define AF_BRIDGE 7 /* Multiprotocol bridge */
+#define AF_ATMPVC 8 /* ATM PVCs */
+#define AF_X25 9 /* Reserved for X.25 project */
+#define AF_INET6 10 /* IP version 6 */
+#define AF_ROSE 11 /* Amateur Radio X.25 PLP */
+#define AF_DECnet 12 /* Reserved for DECnet project */
+#define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/
+#define AF_SECURITY 14 /* Security callback pseudo AF */
+#define AF_KEY 15 /* PF_KEY key management API */
+#define AF_NETLINK 16
+#define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */
+#define AF_PACKET 17 /* Packet family */
+#define AF_ASH 18 /* Ash */
+#define AF_ECONET 19 /* Acorn Econet */
+#define AF_ATMSVC 20 /* ATM SVCs */
+#define AF_RDS 21 /* RDS sockets */
+#define AF_SNA 22 /* Linux SNA Project (nutters!) */
+#define AF_IRDA 23 /* IRDA sockets */
+#define AF_PPPOX 24 /* PPPoX sockets */
+#define AF_WANPIPE 25 /* Wanpipe API Sockets */
+#define AF_LLC 26 /* Linux LLC */
+#define AF_IB 27 /* Native InfiniBand address */
+#define AF_MPLS 28 /* MPLS */
+#define AF_CAN 29 /* Controller Area Network */
+#define AF_TIPC 30 /* TIPC sockets */
+#define AF_BLUETOOTH 31 /* Bluetooth sockets */
+#define AF_IUCV 32 /* IUCV sockets */
+#define AF_RXRPC 33 /* RxRPC sockets */
+#define AF_ISDN 34 /* mISDN sockets */
+#define AF_PHONET 35 /* Phonet sockets */
+#define AF_IEEE802154 36 /* IEEE802154 sockets */
+#define AF_CAIF 37 /* CAIF sockets */
+#define AF_ALG 38 /* Algorithm sockets */
+#define AF_NFC 39 /* NFC sockets */
+#define AF_VSOCK 40 /* vSockets */
+#define AF_KCM 41 /* Kernel Connection Multiplexor*/
+#define AF_QIPCRTR 42 /* Qualcomm IPC Router */
+#define AF_SMC 43 /* smc sockets: reserve number for
+ * PF_SMC protocol family that
+ * reuses AF_INET address family
+ */
+#define AF_XDP 44 /* XDP sockets */
+
+#define AF_MAX 45 /* For now.. */
+
+/* Protocol families, same as address families. */
+#define PF_UNSPEC AF_UNSPEC
+#define PF_UNIX AF_UNIX
+#define PF_LOCAL AF_LOCAL
+#define PF_INET AF_INET
+#define PF_AX25 AF_AX25
+#define PF_IPX AF_IPX
+#define PF_APPLETALK AF_APPLETALK
+#define PF_NETROM AF_NETROM
+#define PF_BRIDGE AF_BRIDGE
+#define PF_ATMPVC AF_ATMPVC
+#define PF_X25 AF_X25
+#define PF_INET6 AF_INET6
+#define PF_ROSE AF_ROSE
+#define PF_DECnet AF_DECnet
+#define PF_NETBEUI AF_NETBEUI
+#define PF_SECURITY AF_SECURITY
+#define PF_KEY AF_KEY
+#define PF_NETLINK AF_NETLINK
+#define PF_ROUTE AF_ROUTE
+#define PF_PACKET AF_PACKET
+#define PF_ASH AF_ASH
+#define PF_ECONET AF_ECONET
+#define PF_ATMSVC AF_ATMSVC
+#define PF_RDS AF_RDS
+#define PF_SNA AF_SNA
+#define PF_IRDA AF_IRDA
+#define PF_PPPOX AF_PPPOX
+#define PF_WANPIPE AF_WANPIPE
+#define PF_LLC AF_LLC
+#define PF_IB AF_IB
+#define PF_MPLS AF_MPLS
+#define PF_CAN AF_CAN
+#define PF_TIPC AF_TIPC
+#define PF_BLUETOOTH AF_BLUETOOTH
+#define PF_IUCV AF_IUCV
+#define PF_RXRPC AF_RXRPC
+#define PF_ISDN AF_ISDN
+#define PF_PHONET AF_PHONET
+#define PF_IEEE802154 AF_IEEE802154
+#define PF_CAIF AF_CAIF
+#define PF_ALG AF_ALG
+#define PF_NFC AF_NFC
+#define PF_VSOCK AF_VSOCK
+#define PF_KCM AF_KCM
+#define PF_QIPCRTR AF_QIPCRTR
+#define PF_SMC AF_SMC
+#define PF_XDP AF_XDP
+#define PF_MAX AF_MAX
+
+/* Maximum queue length specifiable by listen. */
+#define SOMAXCONN 4096
+
+/* Flags we can use with send/ and recv.
+ Added those for 1003.1g not all are supported yet
+ */
+
+#define MSG_OOB 1
+#define MSG_PEEK 2
+#define MSG_DONTROUTE 4
+#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */
+#define MSG_CTRUNC 8
+#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */
+#define MSG_TRUNC 0x20
+#define MSG_DONTWAIT 0x40 /* Nonblocking io */
+#define MSG_EOR 0x80 /* End of record */
+#define MSG_WAITALL 0x100 /* Wait for a full request */
+#define MSG_FIN 0x200
+#define MSG_SYN 0x400
+#define MSG_CONFIRM 0x800 /* Confirm path validity */
+#define MSG_RST 0x1000
+#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */
+#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */
+#define MSG_MORE 0x8000 /* Sender will send more */
+#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */
+#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
+#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
+#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */
+#define MSG_EOF MSG_FIN
+#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
+#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry
+ * plain text and require encryption
+ */
+
+#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
+#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
+#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file
+ descriptor received through
+ SCM_RIGHTS */
+#if defined(CONFIG_COMPAT)
+#define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */
+#else
+#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */
+#endif
+
+
+/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
+#define SOL_IP 0
+/* #define SOL_ICMP 1 No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */
+#define SOL_TCP 6
+#define SOL_UDP 17
+#define SOL_IPV6 41
+#define SOL_ICMPV6 58
+#define SOL_SCTP 132
+#define SOL_UDPLITE 136 /* UDP-Lite (RFC 3828) */
+#define SOL_RAW 255
+#define SOL_IPX 256
+#define SOL_AX25 257
+#define SOL_ATALK 258
+#define SOL_NETROM 259
+#define SOL_ROSE 260
+#define SOL_DECNET 261
+#define SOL_X25 262
+#define SOL_PACKET 263
+#define SOL_ATM 264 /* ATM layer (cell level) */
+#define SOL_AAL 265 /* ATM Adaption Layer (packet level) */
+#define SOL_IRDA 266
+#define SOL_NETBEUI 267
+#define SOL_LLC 268
+#define SOL_DCCP 269
+#define SOL_NETLINK 270
+#define SOL_TIPC 271
+#define SOL_RXRPC 272
+#define SOL_PPPOL2TP 273
+#define SOL_BLUETOOTH 274
+#define SOL_PNPIPE 275
+#define SOL_RDS 276
+#define SOL_IUCV 277
+#define SOL_CAIF 278
+#define SOL_ALG 279
+#define SOL_NFC 280
+#define SOL_KCM 281
+#define SOL_TLS 282
+#define SOL_XDP 283
+
+/* IPX options */
+#define IPX_TYPE 1
+
+extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
+extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+
+struct timespec64;
+struct __kernel_timespec;
+struct old_timespec32;
+
+struct scm_timestamping_internal {
+ struct timespec64 ts[3];
+};
+
+extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss);
+extern void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss);
+
+/* The __sys_...msg variants allow MSG_CMSG_COMPAT iff
+ * forbid_cmsg_compat==false
+ */
+extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg,
+ unsigned int flags, bool forbid_cmsg_compat);
+extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg,
+ unsigned int flags, bool forbid_cmsg_compat);
+extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags,
+ struct __kernel_timespec __user *timeout,
+ struct old_timespec32 __user *timeout32);
+extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags,
+ bool forbid_cmsg_compat);
+extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
+ unsigned int flags);
+extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user *uaddr,
+ unsigned int flags);
+extern int sendmsg_copy_msghdr(struct msghdr *msg,
+ struct user_msghdr __user *umsg, unsigned flags,
+ struct iovec **iov);
+extern int recvmsg_copy_msghdr(struct msghdr *msg,
+ struct user_msghdr __user *umsg, unsigned flags,
+ struct sockaddr __user **uaddr,
+ struct iovec **iov);
+extern int __copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec __user **uiov, size_t *nsegs);
+
+/* helpers which do the actual work for syscalls */
+extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
+ unsigned int flags, struct sockaddr __user *addr,
+ int __user *addr_len);
+extern int __sys_sendto(int fd, void __user *buff, size_t len,
+ unsigned int flags, struct sockaddr __user *addr,
+ int addr_len);
+extern int __sys_accept4_file(struct file *file, unsigned file_flags,
+ struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags,
+ unsigned long nofile);
+extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags);
+extern int __sys_socket(int family, int type, int protocol);
+extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
+extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
+ int addrlen, int file_flags);
+extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
+ int addrlen);
+extern int __sys_listen(int fd, int backlog);
+extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len);
+extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len);
+extern int __sys_socketpair(int family, int type, int protocol,
+ int __user *usockvec);
+extern int __sys_shutdown(int fd, int how);
+
+extern struct ns_common *get_net_ns(struct ns_common *ns);
+#endif /* _LINUX_SOCKET_H */
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 862c8331dded..3c5e97b93dd5 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,40 +1,28 @@
// SPDX-License-Identifier: LGPL-2.1
-#include <uapi/linux/mman.h>
#include <linux/log2.h>
-static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
- struct syscall_arg *arg)
+#include "trace/beauty/generated/mmap_prot_array.c"
+static DEFINE_STRARRAY(mmap_prot, "PROT_");
+
+static size_t mmap__scnprintf_prot(unsigned long prot, char *bf, size_t size, bool show_prefix)
+{
+ return strarray__scnprintf_flags(&strarray__mmap_prot, bf, size, show_prefix, prot);
+}
+
+static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, struct syscall_arg *arg)
{
- const char *prot_prefix = "PROT_";
- int printed = 0, prot = arg->val;
- bool show_prefix = arg->show_string_prefix;
-
- if (prot == PROT_NONE)
- return scnprintf(bf, size, "%sNONE", show_prefix ? prot_prefix : "");
-#define P_MMAP_PROT(n) \
- if (prot & PROT_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prot_prefix :"", #n); \
- prot &= ~PROT_##n; \
- }
-
- P_MMAP_PROT(READ);
- P_MMAP_PROT(WRITE);
- P_MMAP_PROT(EXEC);
- P_MMAP_PROT(SEM);
- P_MMAP_PROT(GROWSDOWN);
- P_MMAP_PROT(GROWSUP);
-#undef P_MMAP_PROT
-
- if (prot)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
-
- return printed;
+ unsigned long prot = arg->val;
+
+ if (prot == 0)
+ return scnprintf(bf, size, "%sNONE", arg->show_string_prefix ? strarray__mmap_prot.prefix : "");
+
+ return mmap__scnprintf_prot(prot, bf, size, arg->show_string_prefix);
}
#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
#include "trace/beauty/generated/mmap_flags_array.c"
- static DEFINE_STRARRAY(mmap_flags, "MAP_");
+static DEFINE_STRARRAY(mmap_flags, "MAP_");
static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
{
@@ -54,28 +42,22 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
-static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- const char *flags_prefix = "MREMAP_";
- bool show_prefix = arg->show_string_prefix;
- int printed = 0, flags = arg->val;
+#include "trace/beauty/generated/mremap_flags_array.c"
+static DEFINE_STRARRAY(mremap_flags, "MREMAP_");
-#define P_MREMAP_FLAG(n) \
- if (flags & MREMAP_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? flags_prefix : "", #n); \
- flags &= ~MREMAP_##n; \
- }
+static size_t mremap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+{
+ return strarray__scnprintf_flags(&strarray__mremap_flags, bf, size, show_prefix, flags);
+}
- P_MREMAP_FLAG(MAYMOVE);
- P_MREMAP_FLAG(FIXED);
- P_MREMAP_FLAG(DONTUNMAP);
-#undef P_MREMAP_FLAG
+static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+ if (!(flags & MREMAP_FIXED))
+ arg->mask |= (1 << 5); /* Mask 5th ('new_address') args, ignored */
- return printed;
+ return mremap__scnprintf_flags(flags, bf, size, arg->show_string_prefix);
}
#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
index 5f5eefcb3c74..39eb2595983b 100755
--- a/tools/perf/trace/beauty/mmap_flags.sh
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -21,20 +21,20 @@ printf "static const char *mmap_flags[] = {\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
egrep -q $regex ${arch_mman} && \
(egrep $regex ${arch_mman} | \
- sed -r "s/$regex/\2 \1/g" | \
- xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
egrep -q $regex ${linux_mman} && \
(egrep $regex ${linux_mman} | \
egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
- sed -r "s/$regex/\2 \1/g" | \
- xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
(egrep $regex ${header_dir}/mman-common.h | \
egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
- sed -r "s/$regex/\2 \1/g" | \
- xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
(egrep $regex ${header_dir}/mman.h | \
- sed -r "s/$regex/\2 \1/g" | \
- xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
printf "};\n"
diff --git a/tools/perf/trace/beauty/mmap_prot.sh b/tools/perf/trace/beauty/mmap_prot.sh
new file mode 100755
index 000000000000..28f638f8d216
--- /dev/null
+++ b/tools/perf/trace/beauty/mmap_prot.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 2 ] ; then
+ [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/`
+ asm_header_dir=tools/include/uapi/asm-generic
+ arch_header_dir=tools/arch/${hostarch}/include/uapi/asm
+else
+ asm_header_dir=$1
+ arch_header_dir=$2
+fi
+
+common_mman=${asm_header_dir}/mman-common.h
+arch_mman=${arch_header_dir}/mman.h
+
+prefix="PROT"
+
+printf "static const char *mmap_prot[] = {\n"
+regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}`
+([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
+(egrep $regex ${common_mman} | \
+ egrep -vw PROT_NONE | \
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef ${prefix}_%s\n#define ${prefix}_%s %s\n#endif\n")
+[ -f ${arch_mman} ] && egrep -q $regex ${arch_mman} &&
+(egrep $regex ${arch_mman} | \
+ egrep -vw PROT_NONE | \
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef ${prefix}_%s\n#define ${prefix}_%s %s\n#endif\n")
+printf "};\n"
diff --git a/tools/perf/trace/beauty/mremap_flags.sh b/tools/perf/trace/beauty/mremap_flags.sh
new file mode 100755
index 000000000000..d58182300bb1
--- /dev/null
+++ b/tools/perf/trace/beauty/mremap_flags.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ linux_header_dir=tools/include/uapi/linux
+else
+ linux_header_dir=$1
+fi
+
+linux_mman=${linux_header_dir}/mman.h
+
+printf "static const char *mremap_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MREMAP_([[:alnum:]_]+)[[:space:]]+((0x)?[[:xdigit:]]+)[[:space:]]*.*'
+egrep -q $regex ${linux_mman} && \
+(egrep $regex ${linux_mman} | \
+ sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MREMAP_%s\n#define MREMAP_%s %s\n#endif\n")
+printf "};\n"
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
index e0c13e6a5788..cd110634ab09 100644
--- a/tools/perf/trace/beauty/sockaddr.c
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -7,14 +7,7 @@
#include <sys/un.h>
#include <arpa/inet.h>
-static const char *socket_families[] = {
- "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
- "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
- "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
- "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
- "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
- "ALG", "NFC", "VSOCK",
-};
+#include "trace/beauty/generated/socket_arrays.c"
DEFINE_STRARRAY(socket_families, "PF_");
static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size)
diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh
new file mode 100755
index 000000000000..3820e5c82293
--- /dev/null
+++ b/tools/perf/trace/beauty/socket.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+# This one uses a copy from the kernel sources headers that is in a
+# place used just for these tools/perf/beauty/ usage, we shouldn't not
+# put it in tools/include/linux otherwise they would be used in the
+# normal compiler building process and would drag needless stuff from the
+# kernel.
+
+# When what these scripts need is already in tools/include/ then use it,
+# otherwise grab and check the copy from the kernel sources just for these
+# string table building scripts.
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/
+
+printf "static const char *socket_families[] = {\n"
+# #define AF_LOCAL 1 /* POSIX name for AF_UNIX */
+regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*'
+
+egrep $regex ${header_dir}/socket.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[%s] = \"%s\",\n" | \
+ egrep -v "\"(UNIX|MAX)\""
+printf "};\n"
diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c
index 811cc0eeb2d5..110f0c609d84 100644
--- a/tools/perf/trace/beauty/statx.c
+++ b/tools/perf/trace/beauty/statx.c
@@ -65,6 +65,7 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a
P_FLAG(SIZE);
P_FLAG(BLOCKS);
P_FLAG(BTIME);
+ P_FLAG(MNT_ID);
#undef P_FLAG
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 9023267e5643..bd77825fd5a1 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -209,7 +209,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
ui_browser__mark_fused(browser,
pcnt_width + 3 + notes->widths.addr + width,
from - 1,
- to > from ? true : false);
+ to > from);
}
}
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 487e54ef56a9..a07626f07208 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2288,6 +2288,11 @@ static struct thread *hist_browser__selected_thread(struct hist_browser *browser
return browser->he_selection->thread;
}
+static struct res_sample *hist_browser__selected_res_sample(struct hist_browser *browser)
+{
+ return browser->he_selection ? browser->he_selection->res_samples : NULL;
+}
+
/* Check whether the browser is for 'top' or 'report' */
static inline bool is_report_browser(void *timer)
{
@@ -3357,16 +3362,16 @@ skip_annotation:
&options[nr_options], NULL, NULL, evsel);
nr_options += add_res_sample_opt(browser, &actions[nr_options],
&options[nr_options],
- hist_browser__selected_entry(browser)->res_samples,
- evsel, A_NORMAL);
+ hist_browser__selected_res_sample(browser),
+ evsel, A_NORMAL);
nr_options += add_res_sample_opt(browser, &actions[nr_options],
&options[nr_options],
- hist_browser__selected_entry(browser)->res_samples,
- evsel, A_ASM);
+ hist_browser__selected_res_sample(browser),
+ evsel, A_ASM);
nr_options += add_res_sample_opt(browser, &actions[nr_options],
&options[nr_options],
- hist_browser__selected_entry(browser)->res_samples,
- evsel, A_SOURCE);
+ hist_browser__selected_res_sample(browser),
+ evsel, A_SOURCE);
nr_options += add_switch_opt(browser, &actions[nr_options],
&options[nr_options]);
skip_scripting:
@@ -3416,7 +3421,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
struct hists *hists = evsel__hists(evsel);
bool current_entry = ui_browser__is_current_entry(browser, row);
unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE];
- const char *ev_name = perf_evsel__name(evsel);
+ const char *ev_name = evsel__name(evsel);
char bf[256], unit;
const char *warn = " ";
size_t printed;
@@ -3424,10 +3429,10 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
HE_COLORSET_NORMAL);
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
struct evsel *pos;
- ev_name = perf_evsel__group_name(evsel);
+ ev_name = evsel__group_name(evsel);
for_each_group_member(pos, evsel) {
struct hists *pos_hists = evsel__hists(pos);
@@ -3512,13 +3517,13 @@ browse_hists:
if (pos->core.node.next == &evlist->core.entries)
pos = evlist__first(evlist);
else
- pos = perf_evsel__next(pos);
+ pos = evsel__next(pos);
goto browse_hists;
case K_UNTAB:
if (pos->core.node.prev == &evlist->core.entries)
pos = evlist__last(evlist);
else
- pos = perf_evsel__prev(pos);
+ pos = evsel__prev(pos);
goto browse_hists;
case K_SWITCH_INPUT_DATA:
case K_RELOAD:
@@ -3554,7 +3559,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused,
{
struct evsel *evsel = list_entry(entry, struct evsel, core.node);
- if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel))
+ if (symbol_conf.event_group && !evsel__is_group_leader(evsel))
return true;
return false;
@@ -3587,7 +3592,7 @@ static int __perf_evlist__tui_browse_hists(struct evlist *evlist,
ui_helpline__push("Press ESC to exit");
evlist__for_each_entry(evlist, pos) {
- const char *ev_name = perf_evsel__name(pos);
+ const char *ev_name = evsel__name(pos);
size_t line_len = strlen(ev_name) + 7;
if (menu.b.width < line_len)
@@ -3598,6 +3603,23 @@ static int __perf_evlist__tui_browse_hists(struct evlist *evlist,
hbt, warn_lost_event);
}
+static bool perf_evlist__single_entry(struct evlist *evlist)
+{
+ int nr_entries = evlist->core.nr_entries;
+
+ if (nr_entries == 1)
+ return true;
+
+ if (nr_entries == 2) {
+ struct evsel *last = evlist__last(evlist);
+
+ if (evsel__is_dummy_event(last))
+ return true;
+ }
+
+ return false;
+}
+
int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
struct hist_browser_timer *hbt,
float min_pcnt,
@@ -3607,8 +3629,8 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
{
int nr_entries = evlist->core.nr_entries;
-single_entry:
- if (nr_entries == 1) {
+ if (perf_evlist__single_entry(evlist)) {
+single_entry: {
struct evsel *first = evlist__first(evlist);
return perf_evsel__hists_browse(first, nr_entries, help,
@@ -3616,13 +3638,14 @@ single_entry:
env, warn_lost_event,
annotation_opts);
}
+ }
if (symbol_conf.event_group) {
struct evsel *pos;
nr_entries = 0;
evlist__for_each_entry(evlist, pos) {
- if (perf_evsel__is_group_leader(pos))
+ if (evsel__is_group_leader(pos))
nr_entries++;
}
@@ -3640,7 +3663,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf,
size_t size)
{
struct hists *hists = evsel__hists(browser->block_evsel);
- const char *evname = perf_evsel__name(browser->block_evsel);
+ const char *evname = evsel__name(browser->block_evsel);
unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
int ret;
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 35f9641bf670..a7dff77f2018 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -130,7 +130,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms,
gtk_list_store_append(store, &iter);
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
for (i = 0; i < evsel->core.nr_members; i++) {
ret += perf_gtk__get_percent(s + ret,
sizeof(s) - ret,
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index ed1a97b2c4b0..53ef71a1b15d 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -635,18 +635,18 @@ int perf_evlist__gtk_browse_hists(struct evlist *evlist,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- const char *evname = perf_evsel__name(pos);
+ const char *evname = evsel__name(pos);
GtkWidget *scrolled_window;
GtkWidget *tab_label;
char buf[512];
size_t size = sizeof(buf);
if (symbol_conf.event_group) {
- if (!perf_evsel__is_group_leader(pos))
+ if (!evsel__is_group_leader(pos))
continue;
if (pos->core.nr_members > 1) {
- perf_evsel__group_desc(pos, buf, size);
+ evsel__group_desc(pos, buf, size);
evname = buf;
}
}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 025f4c7f96bf..c1f24d004852 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -43,12 +43,12 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
} else
ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
int prev_idx, idx_delta;
struct hist_entry *pair;
int nr_members = evsel->core.nr_members;
- prev_idx = perf_evsel__group_idx(evsel);
+ prev_idx = evsel__group_idx(evsel);
list_for_each_entry(pair, &he->pairs.head, pairs.node) {
u64 period = get_field(pair);
@@ -58,7 +58,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
continue;
evsel = hists_to_evsel(pair->hists);
- idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
+ idx_delta = evsel__group_idx(evsel) - prev_idx - 1;
while (idx_delta--) {
/*
@@ -82,7 +82,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
len, period);
}
- prev_idx = perf_evsel__group_idx(evsel);
+ prev_idx = evsel__group_idx(evsel);
}
idx_delta = nr_members - prev_idx - 1;
@@ -164,12 +164,12 @@ static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b,
list_for_each_entry(pair, &a->pairs.head, pairs.node) {
struct evsel *evsel = hists_to_evsel(pair->hists);
- fa[perf_evsel__group_idx(evsel)] = get_field(pair);
+ fa[evsel__group_idx(evsel)] = get_field(pair);
}
list_for_each_entry(pair, &b->pairs.head, pairs.node) {
struct evsel *evsel = hists_to_evsel(pair->hists);
- fb[perf_evsel__group_idx(evsel)] = get_field(pair);
+ fb[evsel__group_idx(evsel)] = get_field(pair);
}
*fields_a = fa;
@@ -190,7 +190,7 @@ static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b,
int cmp, nr_members, ret, i;
cmp = field_cmp(get_field(a), get_field(b));
- if (!perf_evsel__is_group_event(evsel))
+ if (!evsel__is_group_event(evsel))
return cmp;
nr_members = evsel->core.nr_members;
@@ -240,7 +240,7 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
return ret;
evsel = hists_to_evsel(a->hists);
- if (!perf_evsel__is_group_event(evsel))
+ if (!evsel__is_group_event(evsel))
return ret;
nr_members = evsel->core.nr_members;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index c0cf8dff694e..e2563d0154eb 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -10,6 +10,7 @@ perf-y += db-export.o
perf-y += env.o
perf-y += event.o
perf-y += evlist.o
+perf-y += sideband_evlist.o
perf-y += evsel.o
perf-y += evsel_fprintf.o
perf-y += perf_event_attr_fprintf.o
@@ -88,6 +89,7 @@ perf-y += counts.o
perf-y += stat.o
perf-y += stat-shadow.o
perf-y += stat-display.o
+perf-y += perf_api_probe.o
perf-y += record.o
perf-y += srcline.o
perf-y += srccode.o
@@ -99,12 +101,14 @@ perf-y += call-path.o
perf-y += rwsem.o
perf-y += thread-stack.o
perf-y += spark.o
+perf-y += topdown.o
+perf-y += stream.o
perf-$(CONFIG_AUXTRACE) += auxtrace.o
perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
perf-$(CONFIG_AUXTRACE) += intel-pt.o
perf-$(CONFIG_AUXTRACE) += intel-bts.o
perf-$(CONFIG_AUXTRACE) += arm-spe.o
-perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
+perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/
perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
ifdef CONFIG_LIBOPENCSD
@@ -115,6 +119,7 @@ endif
perf-y += parse-branch-options.o
perf-y += dump-insn.o
perf-y += parse-regs-options.o
+perf-y += parse-sublevel-options.o
perf-y += term.o
perf-y += help-unknown-cmd.o
perf-y += mem-events.o
@@ -126,6 +131,7 @@ perf-y += expr-bison.o
perf-y += expr.o
perf-y += branch.o
perf-y += mem2node.o
+perf-y += clockid.o
perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
@@ -134,6 +140,10 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
perf-$(CONFIG_LIBELF) += probe-event.o
+ifndef CONFIG_LIBBPF
+perf-y += hashmap.o
+endif
+
ifndef CONFIG_LIBELF
perf-y += symbol-minimal.o
endif
@@ -177,42 +187,68 @@ perf-$(CONFIG_LIBBPF) += bpf-event.o
perf-$(CONFIG_CXX) += c++/
+perf-$(CONFIG_LIBPFM4) += pfm.o
+
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
# avoid compiler warnings in 32-bit mode
CFLAGS_genelf_debug.o += -Wno-packed
-$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
$(call rule_mkdir)
- $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/parse-events-flex.c \
+ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+$(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
-$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c
+$(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c
$(call rule_mkdir)
- $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/expr-flex.c \
+ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/expr-bison.c: util/expr.y
+$(OUTPUT)util/expr-bison.c $(OUTPUT)util/expr-bison.h: util/expr.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ -o $(OUTPUT)util/expr-bison.c -p expr_
-$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(call rule_mkdir)
- $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/pmu-flex.c \
+ --header-file=$(OUTPUT)util/pmu-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/pmu-bison.c: util/pmu.y
+$(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
-
-CFLAGS_parse-events-flex.o += -w
-CFLAGS_pmu-flex.o += -w
-CFLAGS_expr-flex.o += -w
-CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
-CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
-CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e 's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26)
+ifeq ($(FLEX_GE_26),1)
+ flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations
+ CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?)
+ ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1)
+ flex_flags += -Wno-misleading-indentation
+ endif
+else
+ flex_flags := -w
+endif
+CFLAGS_parse-events-flex.o += $(flex_flags)
+CFLAGS_pmu-flex.o += $(flex_flags)
+CFLAGS_expr-flex.o += $(flex_flags)
+
+bison_flags := -DYYENABLE_NLS=0
+BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
+ifeq ($(BISON_GE_35),1)
+ bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum
+else
+ bison_flags += -w
+endif
+CFLAGS_parse-events-bison.o += $(bison_flags)
+CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
+CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f1ea0d61eb5b..6c8575e182ed 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -41,7 +41,6 @@
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/string.h>
-#include <bpf/libbpf.h>
#include <subcmd/parse-options.h>
#include <subcmd/run-command.h>
@@ -1191,7 +1190,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args)
struct disasm_line *dl = NULL;
int nr = 1;
- if (perf_evsel__is_group_event(args->evsel))
+ if (evsel__is_group_event(args->evsel))
nr = args->evsel->core.nr_members;
dl = zalloc(disasm_line_size(nr));
@@ -1437,7 +1436,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
if (queue)
return -1;
- if (perf_evsel__is_group_event(evsel))
+ if (evsel__is_group_event(evsel))
width *= evsel->core.nr_members;
if (!*al->line)
@@ -1579,8 +1578,7 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s
char *build_id_msg = NULL;
if (dso->has_build_id) {
- build_id__sprintf(dso->build_id,
- sizeof(dso->build_id), bf + 15);
+ build_id__sprintf(&dso->bid, bf + 15);
build_id_msg = bf;
}
scnprintf(buf, buflen,
@@ -1622,6 +1620,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
char *build_id_filename;
char *build_id_path = NULL;
char *pos;
+ int len;
if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
!dso__is_kcore(dso))
@@ -1650,10 +1649,16 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
dirname(build_id_path);
- if (dso__is_kcore(dso) ||
- readlink(build_id_path, linkname, sizeof(linkname)) < 0 ||
- strstr(linkname, DSO__NAME_KALLSYMS) ||
- access(filename, R_OK)) {
+ if (dso__is_kcore(dso))
+ goto fallback;
+
+ len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
+ if (len < 0)
+ goto fallback;
+
+ linkname[len] = '\0';
+ if (strstr(linkname, DSO__NAME_KALLSYMS) ||
+ access(filename, R_OK)) {
fallback:
/*
* If we don't have build-ids or the build-id file isn't in the
@@ -1821,6 +1826,24 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
}
#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+static int
+symbol__disassemble_bpf_image(struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+
+ args->offset = -1;
+ args->line = strdup("to be implemented");
+ args->line_nr = 0;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ free(args->line);
+ return 0;
+}
+
/*
* Possibly create a new version of line with tabs expanded. Returns the
* existing or new line, storage is updated if a new line is allocated. If
@@ -1920,6 +1943,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
return symbol__disassemble_bpf(sym, args);
+ } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) {
+ return symbol__disassemble_bpf_image(sym, args);
} else if (dso__is_kcore(dso)) {
kce.kcore_filename = symfs_filename;
kce.addr = map__rip_2objdump(map, sym->start);
@@ -2136,7 +2161,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
.evsel = evsel,
.options = options,
};
- struct perf_env *env = perf_evsel__env(evsel);
+ struct perf_env *env = evsel__env(evsel);
const char *arch_name = perf_env__arch(env);
struct arch *arch;
int err;
@@ -2324,7 +2349,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
struct dso *dso = map->dso;
char *filename;
const char *d_filename;
- const char *evsel_name = perf_evsel__name(evsel);
+ const char *evsel_name = evsel__name(evsel);
struct annotation *notes = symbol__annotation(sym);
struct sym_hist *h = annotation__histogram(notes, evsel->idx);
struct annotation_line *pos, *queue = NULL;
@@ -2348,9 +2373,9 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
len = symbol__size(sym);
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
width *= evsel->core.nr_members;
- perf_evsel__group_desc(evsel, buf, sizeof(buf));
+ evsel__group_desc(evsel, buf, sizeof(buf));
evsel_name = buf;
}
@@ -2485,7 +2510,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
struct annotation_options *opts)
{
- const char *ev_name = perf_evsel__name(evsel);
+ const char *ev_name = evsel__name(evsel);
char buf[1024];
char *filename;
int err = -1;
@@ -2498,8 +2523,8 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
if (fp == NULL)
goto out_free_filename;
- if (perf_evsel__is_group_event(evsel)) {
- perf_evsel__group_desc(evsel, buf, sizeof(buf));
+ if (evsel__is_group_event(evsel)) {
+ evsel__group_desc(evsel, buf, sizeof(buf));
ev_name = buf;
}
@@ -3044,7 +3069,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
if (notes->offsets == NULL)
return ENOMEM;
- if (perf_evsel__is_group_event(evsel))
+ if (evsel__is_group_event(evsel))
nr_pcnt = evsel->core.nr_members;
err = symbol__annotate(ms, evsel, options, parch);
@@ -3101,6 +3126,8 @@ static int annotation__config(const char *var, const char *value, void *data)
value);
} else if (!strcmp(var, "annotate.use_offset")) {
opt->use_offset = perf_config_bool("use_offset", value);
+ } else if (!strcmp(var, "annotate.disassembler_style")) {
+ opt->disassembler_style = value;
} else {
pr_debug("%s variable unknown, ignoring...", var);
}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 2d88069d6428..0a0cd4f32175 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -144,7 +144,7 @@ struct annotation_line {
u32 idx;
int idx_asm;
int data_nr;
- struct annotation_data data[0];
+ struct annotation_data data[];
};
struct disasm_line {
@@ -227,7 +227,7 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel);
struct sym_hist {
u64 nr_samples;
u64 period;
- struct sym_hist_entry addr[0];
+ struct sym_hist_entry addr[];
};
struct cyc_hist {
diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build
new file mode 100644
index 000000000000..f8dae13fc876
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
new file mode 100644
index 000000000000..93e063f22be5
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arm_spe_decoder.c: ARM SPE support
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/zalloc.h>
+
+#include "../auxtrace.h"
+#include "../debug.h"
+#include "../util.h"
+
+#include "arm-spe-decoder.h"
+
+#ifndef BIT
+#define BIT(n) (1UL << (n))
+#endif
+
+static u64 arm_spe_calc_ip(int index, u64 payload)
+{
+ u8 *addr = (u8 *)&payload;
+ int ns, el;
+
+ /* Instruction virtual address or Branch target address */
+ if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
+ index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
+ ns = addr[7] & SPE_ADDR_PKT_NS;
+ el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
+
+ /* Fill highest byte for EL1 or EL2 (VHE) mode */
+ if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
+ addr[7] = 0xff;
+ /* Clean highest byte for other cases */
+ else
+ addr[7] = 0x0;
+
+ /* Data access virtual address */
+ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
+
+ /* Fill highest byte if bits [48..55] is 0xff */
+ if (addr[6] == 0xff)
+ addr[7] = 0xff;
+ /* Otherwise, cleanup tags */
+ else
+ addr[7] = 0x0;
+
+ /* Data access physical address */
+ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
+ /* Cleanup byte 7 */
+ addr[7] = 0x0;
+ } else {
+ pr_err("unsupported address packet index: 0x%x\n", index);
+ }
+
+ return payload;
+}
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
+{
+ struct arm_spe_decoder *decoder;
+
+ if (!params->get_trace)
+ return NULL;
+
+ decoder = zalloc(sizeof(struct arm_spe_decoder));
+ if (!decoder)
+ return NULL;
+
+ decoder->get_trace = params->get_trace;
+ decoder->data = params->data;
+
+ return decoder;
+}
+
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
+{
+ free(decoder);
+}
+
+static int arm_spe_get_data(struct arm_spe_decoder *decoder)
+{
+ struct arm_spe_buffer buffer = { .buf = 0, };
+ int ret;
+
+ pr_debug("Getting more data\n");
+ ret = decoder->get_trace(&buffer, decoder->data);
+ if (ret < 0)
+ return ret;
+
+ decoder->buf = buffer.buf;
+ decoder->len = buffer.len;
+
+ if (!decoder->len)
+ pr_debug("No more data\n");
+
+ return decoder->len;
+}
+
+static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
+{
+ int ret;
+
+ do {
+ if (!decoder->len) {
+ ret = arm_spe_get_data(decoder);
+
+ /* Failed to read out trace data */
+ if (ret <= 0)
+ return ret;
+ }
+
+ ret = arm_spe_get_packet(decoder->buf, decoder->len,
+ &decoder->packet);
+ if (ret <= 0) {
+ /* Move forward for 1 byte */
+ decoder->buf += 1;
+ decoder->len -= 1;
+ return -EBADMSG;
+ }
+
+ decoder->buf += ret;
+ decoder->len -= ret;
+ } while (decoder->packet.type == ARM_SPE_PAD);
+
+ return 1;
+}
+
+static int arm_spe_read_record(struct arm_spe_decoder *decoder)
+{
+ int err;
+ int idx;
+ u64 payload, ip;
+
+ memset(&decoder->record, 0x0, sizeof(decoder->record));
+
+ while (1) {
+ err = arm_spe_get_next_packet(decoder);
+ if (err <= 0)
+ return err;
+
+ idx = decoder->packet.index;
+ payload = decoder->packet.payload;
+
+ switch (decoder->packet.type) {
+ case ARM_SPE_TIMESTAMP:
+ decoder->record.timestamp = payload;
+ return 1;
+ case ARM_SPE_END:
+ return 1;
+ case ARM_SPE_ADDRESS:
+ ip = arm_spe_calc_ip(idx, payload);
+ if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
+ decoder->record.from_ip = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
+ decoder->record.to_ip = ip;
+ break;
+ case ARM_SPE_COUNTER:
+ break;
+ case ARM_SPE_CONTEXT:
+ break;
+ case ARM_SPE_OP_TYPE:
+ break;
+ case ARM_SPE_EVENTS:
+ if (payload & BIT(EV_L1D_REFILL))
+ decoder->record.type |= ARM_SPE_L1D_MISS;
+
+ if (payload & BIT(EV_L1D_ACCESS))
+ decoder->record.type |= ARM_SPE_L1D_ACCESS;
+
+ if (payload & BIT(EV_TLB_WALK))
+ decoder->record.type |= ARM_SPE_TLB_MISS;
+
+ if (payload & BIT(EV_TLB_ACCESS))
+ decoder->record.type |= ARM_SPE_TLB_ACCESS;
+
+ if ((idx == 2 || idx == 4 || idx == 8) &&
+ (payload & BIT(EV_LLC_MISS)))
+ decoder->record.type |= ARM_SPE_LLC_MISS;
+
+ if ((idx == 2 || idx == 4 || idx == 8) &&
+ (payload & BIT(EV_LLC_ACCESS)))
+ decoder->record.type |= ARM_SPE_LLC_ACCESS;
+
+ if ((idx == 2 || idx == 4 || idx == 8) &&
+ (payload & BIT(EV_REMOTE_ACCESS)))
+ decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
+
+ if (payload & BIT(EV_MISPRED))
+ decoder->record.type |= ARM_SPE_BRANCH_MISS;
+
+ break;
+ case ARM_SPE_DATA_SOURCE:
+ break;
+ case ARM_SPE_BAD:
+ break;
+ case ARM_SPE_PAD:
+ break;
+ default:
+ pr_err("Get packet error!\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int arm_spe_decode(struct arm_spe_decoder *decoder)
+{
+ return arm_spe_read_record(decoder);
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
new file mode 100644
index 000000000000..a5111a8d4360
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm_spe_decoder.h: Arm Statistical Profiling Extensions support
+ * Copyright (c) 2019-2020, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_DECODER_H__
+#define INCLUDE__ARM_SPE_DECODER_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+enum arm_spe_events {
+ EV_EXCEPTION_GEN = 0,
+ EV_RETIRED = 1,
+ EV_L1D_ACCESS = 2,
+ EV_L1D_REFILL = 3,
+ EV_TLB_ACCESS = 4,
+ EV_TLB_WALK = 5,
+ EV_NOT_TAKEN = 6,
+ EV_MISPRED = 7,
+ EV_LLC_ACCESS = 8,
+ EV_LLC_MISS = 9,
+ EV_REMOTE_ACCESS = 10,
+ EV_ALIGNMENT = 11,
+ EV_PARTIAL_PREDICATE = 17,
+ EV_EMPTY_PREDICATE = 18,
+};
+
+enum arm_spe_sample_type {
+ ARM_SPE_L1D_ACCESS = 1 << 0,
+ ARM_SPE_L1D_MISS = 1 << 1,
+ ARM_SPE_LLC_ACCESS = 1 << 2,
+ ARM_SPE_LLC_MISS = 1 << 3,
+ ARM_SPE_TLB_ACCESS = 1 << 4,
+ ARM_SPE_TLB_MISS = 1 << 5,
+ ARM_SPE_BRANCH_MISS = 1 << 6,
+ ARM_SPE_REMOTE_ACCESS = 1 << 7,
+};
+
+struct arm_spe_record {
+ enum arm_spe_sample_type type;
+ int err;
+ u64 from_ip;
+ u64 to_ip;
+ u64 timestamp;
+};
+
+struct arm_spe_insn;
+
+struct arm_spe_buffer {
+ const unsigned char *buf;
+ size_t len;
+ u64 offset;
+ u64 trace_nr;
+};
+
+struct arm_spe_params {
+ int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+ void *data;
+};
+
+struct arm_spe_decoder {
+ int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+ void *data;
+ struct arm_spe_record record;
+
+ const unsigned char *buf;
+ size_t len;
+
+ struct arm_spe_pkt packet;
+};
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params);
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder);
+
+int arm_spe_decode(struct arm_spe_decoder *decoder);
+
+#endif
diff --git a/tools/perf/util/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index b94001b756c7..b94001b756c7 100644
--- a/tools/perf/util/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
diff --git a/tools/perf/util/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index d786ef65113f..4c870521b8eb 100644
--- a/tools/perf/util/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -15,6 +15,8 @@
#define ARM_SPE_NEED_MORE_BYTES -1
#define ARM_SPE_BAD_PACKET -2
+#define ARM_SPE_PKT_MAX_SZ 16
+
enum arm_spe_pkt_type {
ARM_SPE_BAD,
ARM_SPE_PAD,
@@ -34,6 +36,20 @@ struct arm_spe_pkt {
uint64_t payload;
};
+#define SPE_ADDR_PKT_HDR_INDEX_INS (0x0)
+#define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1)
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2)
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3)
+
+#define SPE_ADDR_PKT_NS BIT(7)
+#define SPE_ADDR_PKT_CH BIT(6)
+#define SPE_ADDR_PKT_EL_OFFSET (5)
+#define SPE_ADDR_PKT_EL_MASK (0x3 << SPE_ADDR_PKT_EL_OFFSET)
+#define SPE_ADDR_PKT_EL0 (0)
+#define SPE_ADDR_PKT_EL1 (1)
+#define SPE_ADDR_PKT_EL2 (2)
+#define SPE_ADDR_PKT_EL3 (3)
+
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
int arm_spe_get_packet(const unsigned char *buf, size_t len,
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 53be12b23ff4..3882a5360ada 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -4,46 +4,85 @@
* Copyright (c) 2017-2018, Arm Ltd.
*/
+#include <byteswap.h>
#include <endian.h>
#include <errno.h>
-#include <byteswap.h>
#include <inttypes.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
#include <linux/bitops.h>
+#include <linux/kernel.h>
#include <linux/log2.h>
+#include <linux/types.h>
#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "auxtrace.h"
#include "color.h"
+#include "debug.h"
+#include "evlist.h"
#include "evsel.h"
#include "machine.h"
#include "session.h"
-#include "debug.h"
-#include "auxtrace.h"
+#include "symbol.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "tool.h"
+#include "util/synthetic-events.h"
+
#include "arm-spe.h"
-#include "arm-spe-pkt-decoder.h"
+#include "arm-spe-decoder/arm-spe-decoder.h"
+#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
+
+#define MAX_TIMESTAMP (~0ULL)
struct arm_spe {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
struct auxtrace_heap heap;
+ struct itrace_synth_opts synth_opts;
u32 auxtrace_type;
struct perf_session *session;
struct machine *machine;
u32 pmu_type;
+
+ u8 timeless_decoding;
+ u8 data_queued;
+
+ u8 sample_flc;
+ u8 sample_llc;
+ u8 sample_tlb;
+ u8 sample_branch;
+ u8 sample_remote_access;
+
+ u64 l1d_miss_id;
+ u64 l1d_access_id;
+ u64 llc_miss_id;
+ u64 llc_access_id;
+ u64 tlb_miss_id;
+ u64 tlb_access_id;
+ u64 branch_miss_id;
+ u64 remote_access_id;
+
+ u64 kernel_start;
+
+ unsigned long num_events;
};
struct arm_spe_queue {
- struct arm_spe *spe;
- unsigned int queue_nr;
- struct auxtrace_buffer *buffer;
- bool on_heap;
- bool done;
- pid_t pid;
- pid_t tid;
- int cpu;
+ struct arm_spe *spe;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ struct auxtrace_buffer *old_buffer;
+ union perf_event *event_buf;
+ bool on_heap;
+ bool done;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+ struct arm_spe_decoder *decoder;
+ u64 time;
+ u64 timestamp;
+ struct thread *thread;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -92,44 +131,520 @@ static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
arm_spe_dump(spe, buf, len);
}
-static int arm_spe_process_event(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
+{
+ struct arm_spe_queue *speq = data;
+ struct auxtrace_buffer *buffer = speq->buffer;
+ struct auxtrace_buffer *old_buffer = speq->old_buffer;
+ struct auxtrace_queue *queue;
+
+ queue = &speq->spe->queues.queue_array[speq->queue_nr];
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ /* If no more data, drop the previous auxtrace_buffer and return */
+ if (!buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ b->len = 0;
+ return 0;
+ }
+
+ speq->buffer = buffer;
+
+ /* If the aux_buffer doesn't have data associated, try to load it */
+ if (!buffer->data) {
+ /* get the file desc associated with the perf data file */
+ int fd = perf_data__fd(speq->spe->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data)
+ return -ENOMEM;
+ }
+
+ b->len = buffer->size;
+ b->buf = buffer->data;
+
+ if (b->len) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ speq->old_buffer = buffer;
+ } else {
+ auxtrace_buffer__drop_data(buffer);
+ return arm_spe_get_trace(b, data);
+ }
+
+ return 0;
+}
+
+static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
+ unsigned int queue_nr)
+{
+ struct arm_spe_params params = { .get_trace = 0, };
+ struct arm_spe_queue *speq;
+
+ speq = zalloc(sizeof(*speq));
+ if (!speq)
+ return NULL;
+
+ speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!speq->event_buf)
+ goto out_free;
+
+ speq->spe = spe;
+ speq->queue_nr = queue_nr;
+ speq->pid = -1;
+ speq->tid = -1;
+ speq->cpu = -1;
+
+ /* params set */
+ params.get_trace = arm_spe_get_trace;
+ params.data = speq;
+
+ /* create new decoder */
+ speq->decoder = arm_spe_decoder_new(&params);
+ if (!speq->decoder)
+ goto out_free;
+
+ return speq;
+
+out_free:
+ zfree(&speq->event_buf);
+ free(speq);
+
+ return NULL;
+}
+
+static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
+{
+ return ip >= spe->kernel_start ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
+static void arm_spe_prep_sample(struct arm_spe *spe,
+ struct arm_spe_queue *speq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct arm_spe_record *record = &speq->decoder->record;
+
+ if (!spe->timeless_decoding)
+ sample->time = speq->timestamp;
+
+ sample->ip = record->from_ip;
+ sample->cpumode = arm_spe_cpumode(spe, sample->ip);
+ sample->pid = speq->pid;
+ sample->tid = speq->tid;
+ sample->addr = record->to_ip;
+ sample->period = 1;
+ sample->cpu = speq->cpu;
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = sample->cpumode;
+ event->sample.header.size = sizeof(struct perf_event_header);
+}
+
+static inline int
+arm_spe_deliver_synth_event(struct arm_spe *spe,
+ struct arm_spe_queue *speq __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ int ret;
+
+ ret = perf_session__deliver_synth_event(spe->session, event, sample);
+ if (ret)
+ pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
+
+ return ret;
+}
+
+static int
+arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id)
+{
+ struct arm_spe *spe = speq->spe;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe_sample(struct arm_spe_queue *speq)
+{
+ const struct arm_spe_record *record = &speq->decoder->record;
+ struct arm_spe *spe = speq->spe;
+ int err;
+
+ if (spe->sample_flc) {
+ if (record->type & ARM_SPE_L1D_MISS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->l1d_miss_id);
+ if (err)
+ return err;
+ }
+
+ if (record->type & ARM_SPE_L1D_ACCESS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->l1d_access_id);
+ if (err)
+ return err;
+ }
+ }
+
+ if (spe->sample_llc) {
+ if (record->type & ARM_SPE_LLC_MISS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->llc_miss_id);
+ if (err)
+ return err;
+ }
+
+ if (record->type & ARM_SPE_LLC_ACCESS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->llc_access_id);
+ if (err)
+ return err;
+ }
+ }
+
+ if (spe->sample_tlb) {
+ if (record->type & ARM_SPE_TLB_MISS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->tlb_miss_id);
+ if (err)
+ return err;
+ }
+
+ if (record->type & ARM_SPE_TLB_ACCESS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->tlb_access_id);
+ if (err)
+ return err;
+ }
+ }
+
+ if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
+ err = arm_spe_synth_spe_events_sample(speq,
+ spe->branch_miss_id);
+ if (err)
+ return err;
+ }
+
+ if (spe->sample_remote_access &&
+ (record->type & ARM_SPE_REMOTE_ACCESS)) {
+ err = arm_spe_synth_spe_events_sample(speq,
+ spe->remote_access_id);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
+{
+ struct arm_spe *spe = speq->spe;
+ int ret;
+
+ if (!spe->kernel_start)
+ spe->kernel_start = machine__kernel_start(spe->machine);
+
+ while (1) {
+ ret = arm_spe_decode(speq->decoder);
+ if (!ret) {
+ pr_debug("No data or all data has been processed.\n");
+ return 1;
+ }
+
+ /*
+ * Error is detected when decode SPE trace data, continue to
+ * the next trace data and find out more records.
+ */
+ if (ret < 0)
+ continue;
+
+ ret = arm_spe_sample(speq);
+ if (ret)
+ return ret;
+
+ if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
+ *timestamp = speq->timestamp;
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int arm_spe__setup_queue(struct arm_spe *spe,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct arm_spe_queue *speq = queue->priv;
+ struct arm_spe_record *record;
+
+ if (list_empty(&queue->head) || speq)
+ return 0;
+
+ speq = arm_spe__alloc_queue(spe, queue_nr);
+
+ if (!speq)
+ return -ENOMEM;
+
+ queue->priv = speq;
+
+ if (queue->cpu != -1)
+ speq->cpu = queue->cpu;
+
+ if (!speq->on_heap) {
+ int ret;
+
+ if (spe->timeless_decoding)
+ return 0;
+
+retry:
+ ret = arm_spe_decode(speq->decoder);
+
+ if (!ret)
+ return 0;
+
+ if (ret < 0)
+ goto retry;
+
+ record = &speq->decoder->record;
+
+ speq->timestamp = record->timestamp;
+ ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
+ if (ret)
+ return ret;
+ speq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int arm_spe__setup_queues(struct arm_spe *spe)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < spe->queues.nr_queues; i++) {
+ ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int arm_spe__update_queues(struct arm_spe *spe)
+{
+ if (spe->queues.new_data) {
+ spe->queues.new_data = false;
+ return arm_spe__setup_queues(spe);
+ }
+
+ return 0;
+}
+
+static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
+{
+ struct evsel *evsel;
+ struct evlist *evlist = spe->session->evlist;
+ bool timeless_decoding = true;
+
+ /*
+ * Circle through the list of event and complain if we find one
+ * with the time bit set.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
+ timeless_decoding = false;
+ }
+
+ return timeless_decoding;
+}
+
+static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
+ struct auxtrace_queue *queue)
+{
+ struct arm_spe_queue *speq = queue->priv;
+ pid_t tid;
+
+ tid = machine__get_current_tid(spe->machine, speq->cpu);
+ if (tid != -1) {
+ speq->tid = tid;
+ thread__zput(speq->thread);
+ } else
+ speq->tid = queue->tid;
+
+ if ((!speq->thread) && (speq->tid != -1)) {
+ speq->thread = machine__find_thread(spe->machine, -1,
+ speq->tid);
+ }
+
+ if (speq->thread) {
+ speq->pid = speq->thread->pid_;
+ if (queue->cpu == -1)
+ speq->cpu = speq->thread->cpu;
+ }
+}
+
+static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
+{
+ unsigned int queue_nr;
+ u64 ts;
+ int ret;
+
+ while (1) {
+ struct auxtrace_queue *queue;
+ struct arm_spe_queue *speq;
+
+ if (!spe->heap.heap_cnt)
+ return 0;
+
+ if (spe->heap.heap_array[0].ordinal >= timestamp)
+ return 0;
+
+ queue_nr = spe->heap.heap_array[0].queue_nr;
+ queue = &spe->queues.queue_array[queue_nr];
+ speq = queue->priv;
+
+ auxtrace_heap__pop(&spe->heap);
+
+ if (spe->heap.heap_cnt) {
+ ts = spe->heap.heap_array[0].ordinal + 1;
+ if (ts > timestamp)
+ ts = timestamp;
+ } else {
+ ts = timestamp;
+ }
+
+ arm_spe_set_pid_tid_cpu(spe, queue);
+
+ ret = arm_spe_run_decoder(speq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&spe->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ speq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
+ u64 time_)
{
+ struct auxtrace_queues *queues = &spe->queues;
+ unsigned int i;
+ u64 ts = 0;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &spe->queues.queue_array[i];
+ struct arm_spe_queue *speq = queue->priv;
+
+ if (speq && (tid == -1 || speq->tid == tid)) {
+ speq->time = time_;
+ arm_spe_set_pid_tid_cpu(spe, queue);
+ arm_spe_run_decoder(speq, &ts);
+ }
+ }
return 0;
}
+static int arm_spe_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ int err = 0;
+ u64 timestamp;
+ struct arm_spe *spe = container_of(session->auxtrace,
+ struct arm_spe, auxtrace);
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("SPE trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && (sample->time != (u64) -1))
+ timestamp = sample->time;
+ else
+ timestamp = 0;
+
+ if (timestamp || spe->timeless_decoding) {
+ err = arm_spe__update_queues(spe);
+ if (err)
+ return err;
+ }
+
+ if (spe->timeless_decoding) {
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = arm_spe_process_timeless_queues(spe,
+ event->fork.tid,
+ sample->time);
+ }
+ } else if (timestamp) {
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = arm_spe_process_queues(spe, timestamp);
+ if (err)
+ return err;
+ }
+ }
+
+ return err;
+}
+
static int arm_spe_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool __maybe_unused)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
- struct auxtrace_buffer *buffer;
- off_t data_offset;
- int fd = perf_data__fd(session->data);
- int err;
- if (perf_data__is_pipe(session->data)) {
- data_offset = 0;
- } else {
- data_offset = lseek(fd, 0, SEEK_CUR);
- if (data_offset == -1)
- return -errno;
- }
+ if (!spe->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ int err;
- err = auxtrace_queues__add_event(&spe->queues, session, event,
- data_offset, &buffer);
- if (err)
- return err;
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
- /* Dump here now we have copied a piped trace out of the pipe */
- if (dump_trace) {
- if (auxtrace_buffer__get_data(buffer, fd)) {
- arm_spe_dump_event(spe, buffer->data,
- buffer->size);
- auxtrace_buffer__put_data(buffer);
+ err = auxtrace_queues__add_event(&spe->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ arm_spe_dump_event(spe, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
}
}
@@ -139,7 +654,25 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session,
static int arm_spe_flush(struct perf_session *session __maybe_unused,
struct perf_tool *tool __maybe_unused)
{
- return 0;
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+ auxtrace);
+ int ret;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = arm_spe__update_queues(spe);
+ if (ret < 0)
+ return ret;
+
+ if (spe->timeless_decoding)
+ return arm_spe_process_timeless_queues(spe, -1,
+ MAX_TIMESTAMP - 1);
+
+ return arm_spe_process_queues(spe, MAX_TIMESTAMP);
}
static void arm_spe_free_queue(void *priv)
@@ -148,6 +681,9 @@ static void arm_spe_free_queue(void *priv)
if (!speq)
return;
+ thread__zput(speq->thread);
+ arm_spe_decoder_free(speq->decoder);
+ zfree(&speq->event_buf);
free(speq);
}
@@ -176,6 +712,14 @@ static void arm_spe_free(struct perf_session *session)
free(spe);
}
+static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
+
+ return evsel->core.attr.type == spe->pmu_type;
+}
+
static const char * const arm_spe_info_fmts[] = {
[ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
};
@@ -188,11 +732,189 @@ static void arm_spe_print_info(__u64 *arr)
fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
}
+struct arm_spe_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int arm_spe_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct arm_spe_synth *arm_spe_synth =
+ container_of(tool, struct arm_spe_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(arm_spe_synth->session,
+ event, NULL);
+}
+
+static int arm_spe_synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct arm_spe_synth arm_spe_synth;
+
+ memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
+ arm_spe_synth.session = session;
+
+ return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
+ &id, arm_spe_event_synth);
+}
+
+static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
+ const char *name)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.id && evsel->core.id[0] == id) {
+ if (evsel->name)
+ zfree(&evsel->name);
+ evsel->name = strdup(name);
+ break;
+ }
+ }
+}
+
+static int
+arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
+{
+ struct evlist *evlist = session->evlist;
+ struct evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == spe->pmu_type) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("No selected events with SPE trace data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ if (spe->timeless_decoding)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ else
+ attr.sample_type |= PERF_SAMPLE_TIME;
+
+ attr.exclude_user = evsel->core.attr.exclude_user;
+ attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+ attr.exclude_hv = evsel->core.attr.exclude_hv;
+ attr.exclude_host = evsel->core.attr.exclude_host;
+ attr.exclude_guest = evsel->core.attr.exclude_guest;
+ attr.sample_id_all = evsel->core.attr.sample_id_all;
+ attr.read_format = evsel->core.attr.read_format;
+
+ /* create new id val to be a fixed offset from evsel id */
+ id = evsel->core.id[0] + 1000000000;
+
+ if (!id)
+ id = 1;
+
+ if (spe->synth_opts.flc) {
+ spe->sample_flc = true;
+
+ /* Level 1 data cache miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->l1d_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "l1d-miss");
+ id += 1;
+
+ /* Level 1 data cache access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->l1d_access_id = id;
+ arm_spe_set_event_name(evlist, id, "l1d-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.llc) {
+ spe->sample_llc = true;
+
+ /* Last level cache miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->llc_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "llc-miss");
+ id += 1;
+
+ /* Last level cache access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->llc_access_id = id;
+ arm_spe_set_event_name(evlist, id, "llc-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.tlb) {
+ spe->sample_tlb = true;
+
+ /* TLB miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->tlb_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "tlb-miss");
+ id += 1;
+
+ /* TLB access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->tlb_access_id = id;
+ arm_spe_set_event_name(evlist, id, "tlb-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.branches) {
+ spe->sample_branch = true;
+
+ /* Branch miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->branch_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "branch-miss");
+ id += 1;
+ }
+
+ if (spe->synth_opts.remote_access) {
+ spe->sample_remote_access = true;
+
+ /* Remote access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->remote_access_id = id;
+ arm_spe_set_event_name(evlist, id, "remote-access");
+ id += 1;
+ }
+
+ return 0;
+}
+
int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
- size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
+ size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
struct arm_spe *spe;
int err;
@@ -213,17 +935,41 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->auxtrace_type = auxtrace_info->type;
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+ spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
spe->auxtrace.process_event = arm_spe_process_event;
spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
spe->auxtrace.flush_events = arm_spe_flush;
spe->auxtrace.free_events = arm_spe_free_events;
spe->auxtrace.free = arm_spe_free;
+ spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
session->auxtrace = &spe->auxtrace;
arm_spe_print_info(&auxtrace_info->priv[0]);
+ if (dump_trace)
+ return 0;
+
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+ spe->synth_opts = *session->itrace_synth_opts;
+ else
+ itrace_synth_opts__set_default(&spe->synth_opts, false);
+
+ err = arm_spe_synth_events(spe, session);
+ if (err)
+ goto err_free_queues;
+
+ err = auxtrace_queues__process_index(&spe->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ if (spe->queues.populated)
+ spe->data_queued = true;
+
return 0;
+err_free_queues:
+ auxtrace_queues__free(&spe->queues);
+ session->auxtrace = NULL;
err_free:
free(spe);
return err;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 3571ce72ca28..42a85c86421d 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -33,6 +33,7 @@
#include "evsel.h"
#include "evsel_config.h"
#include "symbol.h"
+#include "util/perf_api_probe.h"
#include "util/synthetic-events.h"
#include "thread_map.h"
#include "asm/bug.h"
@@ -54,29 +55,9 @@
#include "util/mmap.h"
#include <linux/ctype.h>
-#include <linux/kernel.h>
#include "symbol/kallsyms.h"
#include <internal/lib.h>
-static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel)
-{
- struct perf_pmu *pmu = NULL;
-
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- if (pmu->type == evsel->core.attr.type)
- break;
- }
-
- return pmu;
-}
-
-static bool perf_evsel__is_aux_event(struct evsel *evsel)
-{
- struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
-
- return pmu && pmu->auxtrace;
-}
-
/*
* Make a group from 'leader' to 'last', requiring that the events were not
* already grouped to a different leader.
@@ -88,7 +69,7 @@ static int perf_evlist__regroup(struct evlist *evlist,
struct evsel *evsel;
bool grp;
- if (!perf_evsel__is_group_leader(leader))
+ if (!evsel__is_group_leader(leader))
return -EINVAL;
grp = false;
@@ -703,8 +684,8 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist,
evlist__for_each_entry(evlist, evsel) {
sz = evsel->core.attr.aux_sample_size;
- if (perf_evsel__is_group_leader(evsel)) {
- has_aux_leader = perf_evsel__is_aux_event(evsel);
+ if (evsel__is_group_leader(evsel)) {
+ has_aux_leader = evsel__is_aux_event(evsel);
if (sz) {
if (has_aux_leader)
pr_err("Cannot add AUX area sampling to an AUX area event\n");
@@ -723,10 +704,10 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist,
pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n");
return -EINVAL;
}
- perf_evsel__set_sample_bit(evsel, AUX);
+ evsel__set_sample_bit(evsel, AUX);
opts->auxtrace_sample_mode = true;
} else {
- perf_evsel__reset_sample_bit(evsel, AUX);
+ evsel__reset_sample_bit(evsel, AUX);
}
}
@@ -747,7 +728,7 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts, const char *str)
{
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
struct evsel *aux_evsel;
bool has_aux_sample_size = false;
bool has_aux_leader = false;
@@ -777,8 +758,8 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr,
/* Set aux_sample_size based on --aux-sample option */
evlist__for_each_entry(evlist, evsel) {
- if (perf_evsel__is_group_leader(evsel)) {
- has_aux_leader = perf_evsel__is_aux_event(evsel);
+ if (evsel__is_group_leader(evsel)) {
+ has_aux_leader = evsel__is_aux_event(evsel);
} else if (has_aux_leader) {
evsel->core.attr.aux_sample_size = sz;
}
@@ -787,9 +768,9 @@ no_opt:
aux_evsel = NULL;
/* Override with aux_sample_size from config term */
evlist__for_each_entry(evlist, evsel) {
- if (perf_evsel__is_aux_event(evsel))
+ if (evsel__is_aux_event(evsel))
aux_evsel = evsel;
- term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE);
+ term = evsel__get_config_term(evsel, AUX_SAMPLE_SIZE);
if (term) {
has_aux_sample_size = true;
evsel->core.attr.aux_sample_size = term->val.aux_sample_size;
@@ -1234,29 +1215,79 @@ out_free:
return err;
}
+static void unleader_evsel(struct evlist *evlist, struct evsel *leader)
+{
+ struct evsel *new_leader = NULL;
+ struct evsel *evsel;
+
+ /* Find new leader for the group */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->leader != leader || evsel == leader)
+ continue;
+ if (!new_leader)
+ new_leader = evsel;
+ evsel->leader = new_leader;
+ }
+
+ /* Update group information */
+ if (new_leader) {
+ zfree(&new_leader->group_name);
+ new_leader->group_name = leader->group_name;
+ leader->group_name = NULL;
+
+ new_leader->core.nr_members = leader->core.nr_members - 1;
+ leader->core.nr_members = 1;
+ }
+}
+
+static void unleader_auxtrace(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (auxtrace__evsel_is_auxtrace(session, evsel) &&
+ evsel__is_group_leader(evsel)) {
+ unleader_evsel(session->evlist, evsel);
+ }
+ }
+}
+
int perf_event__process_auxtrace_info(struct perf_session *session,
union perf_event *event)
{
enum auxtrace_type type = event->auxtrace_info.type;
+ int err;
if (dump_trace)
fprintf(stdout, " type: %u\n", type);
switch (type) {
case PERF_AUXTRACE_INTEL_PT:
- return intel_pt_process_auxtrace_info(event, session);
+ err = intel_pt_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_INTEL_BTS:
- return intel_bts_process_auxtrace_info(event, session);
+ err = intel_bts_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_ARM_SPE:
- return arm_spe_process_auxtrace_info(event, session);
+ err = arm_spe_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_CS_ETM:
- return cs_etm__process_auxtrace_info(event, session);
+ err = cs_etm__process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_S390_CPUMSF:
- return s390_cpumsf_process_auxtrace_info(event, session);
+ err = s390_cpumsf_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_UNKNOWN:
default:
return -EINVAL;
}
+
+ if (err)
+ return err;
+
+ unleader_auxtrace(session);
+
+ return 0;
}
s64 perf_event__process_auxtrace(struct perf_session *session,
@@ -1299,6 +1330,11 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
synth_opts->pwr_events = true;
synth_opts->other_events = true;
synth_opts->errors = true;
+ synth_opts->flc = true;
+ synth_opts->llc = true;
+ synth_opts->tlb = true;
+ synth_opts->remote_access = true;
+
if (no_sample) {
synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
synth_opts->period = 1;
@@ -1313,6 +1349,47 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
synth_opts->initial_skip = 0;
}
+static int get_flag(const char **ptr, unsigned int *flags)
+{
+ while (1) {
+ char c = **ptr;
+
+ if (c >= 'a' && c <= 'z') {
+ *flags |= 1 << (c - 'a');
+ ++*ptr;
+ return 0;
+ } else if (c == ' ') {
+ ++*ptr;
+ continue;
+ } else {
+ return -1;
+ }
+ }
+}
+
+static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *minus_flags)
+{
+ while (1) {
+ switch (**ptr) {
+ case '+':
+ ++*ptr;
+ if (get_flag(ptr, plus_flags))
+ return -1;
+ break;
+ case '-':
+ ++*ptr;
+ if (get_flag(ptr, minus_flags))
+ return -1;
+ break;
+ case ' ':
+ ++*ptr;
+ break;
+ default:
+ return 0;
+ }
+ }
+}
+
/*
* Please check tools/perf/Documentation/perf-script.txt for information
* about the options parsed here, which is introduced after this cset,
@@ -1400,9 +1477,15 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
break;
case 'e':
synth_opts->errors = true;
+ if (get_flags(&p, &synth_opts->error_plus_flags,
+ &synth_opts->error_minus_flags))
+ goto out_err;
break;
case 'd':
synth_opts->log = true;
+ if (get_flags(&p, &synth_opts->log_plus_flags,
+ &synth_opts->log_minus_flags))
+ goto out_err;
break;
case 'c':
synth_opts->branches = true;
@@ -1412,8 +1495,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
synth_opts->branches = true;
synth_opts->returns = true;
break;
+ case 'G':
case 'g':
- synth_opts->callchain = true;
+ if (p[-1] == 'G')
+ synth_opts->add_callchain = true;
+ else
+ synth_opts->callchain = true;
synth_opts->callchain_sz =
PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
while (*p == ' ' || *p == ',')
@@ -1428,8 +1515,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
synth_opts->callchain_sz = val;
}
break;
+ case 'L':
case 'l':
- synth_opts->last_branch = true;
+ if (p[-1] == 'L')
+ synth_opts->add_last_branch = true;
+ else
+ synth_opts->last_branch = true;
synth_opts->last_branch_sz =
PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
while (*p == ' ' || *p == ',')
@@ -1451,6 +1542,21 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
goto out_err;
p = endptr;
break;
+ case 'f':
+ synth_opts->flc = true;
+ break;
+ case 'm':
+ synth_opts->llc = true;
+ break;
+ case 't':
+ synth_opts->tlb = true;
+ break;
+ case 'a':
+ synth_opts->remote_access = true;
+ break;
+ case 'q':
+ synth_opts->quick += 1;
+ break;
case ' ':
case ',':
break;
@@ -2482,7 +2588,7 @@ static int parse_addr_filter(struct evsel *evsel, const char *filter,
goto out_exit;
}
- if (perf_evsel__append_addr_filter(evsel, new_filter)) {
+ if (evsel__append_addr_filter(evsel, new_filter)) {
err = -ENOMEM;
goto out_exit;
}
@@ -2500,9 +2606,9 @@ out_exit:
return err;
}
-static int perf_evsel__nr_addr_filter(struct evsel *evsel)
+static int evsel__nr_addr_filter(struct evsel *evsel)
{
- struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
int nr_addr_filters = 0;
if (!pmu)
@@ -2521,7 +2627,7 @@ int auxtrace_parse_filters(struct evlist *evlist)
evlist__for_each_entry(evlist, evsel) {
filter = evsel->filter;
- max_nr = perf_evsel__nr_addr_filter(evsel);
+ max_nr = evsel__nr_addr_filter(evsel);
if (!filter || !max_nr)
continue;
evsel->filter = NULL;
@@ -2577,3 +2683,12 @@ void auxtrace__free(struct perf_session *session)
return session->auxtrace->free(session);
}
+
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ if (!session->auxtrace || !session->auxtrace->evsel_is_auxtrace)
+ return false;
+
+ return session->auxtrace->evsel_is_auxtrace(session, evsel);
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index e58ef160b599..951d2d14cf24 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -21,6 +21,7 @@
union perf_event;
struct perf_session;
struct evlist;
+struct evsel;
struct perf_tool;
struct mmap;
struct perf_sample;
@@ -54,6 +55,11 @@ enum itrace_period_type {
PERF_ITRACE_PERIOD_NANOSECS,
};
+#define AUXTRACE_ERR_FLG_OVERFLOW (1 << ('o' - 'a'))
+#define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a'))
+
+#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a'))
+
/**
* struct itrace_synth_opts - AUX area tracing synthesis options.
* @set: indicates whether or not options have been set
@@ -62,6 +68,7 @@ enum itrace_period_type {
* because 'perf inject' will write it out
* @instructions: whether to synthesize 'instructions' events
* @branches: whether to synthesize 'branches' events
+ * (branch misses only for Arm SPE)
* @transactions: whether to synthesize events for transactions
* @ptwrites: whether to synthesize events for ptwrites
* @pwr_events: whether to synthesize power events
@@ -73,8 +80,14 @@ enum itrace_period_type {
* @calls: limit branch samples to calls (can be combined with @returns)
* @returns: limit branch samples to returns (can be combined with @calls)
* @callchain: add callchain to 'instructions' events
+ * @add_callchain: add callchain to existing event records
* @thread_stack: feed branches to the thread_stack
* @last_branch: add branch context to 'instruction' events
+ * @add_last_branch: add branch context to existing event records
+ * @flc: whether to synthesize first level cache events
+ * @llc: whether to synthesize last level cache events
+ * @tlb: whether to synthesize TLB events
+ * @remote_access: whether to synthesize remote access events
* @callchain_sz: maximum callchain size
* @last_branch_sz: branch context size
* @period: 'instructions' events period
@@ -83,6 +96,11 @@ enum itrace_period_type {
* @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
* @ptime_range: time intervals to trace or NULL
* @range_num: number of time intervals to trace
+ * @error_plus_flags: flags to affect what errors are reported
+ * @error_minus_flags: flags to affect what errors are reported
+ * @log_plus_flags: flags to affect what is logged
+ * @log_minus_flags: flags to affect what is logged
+ * @quick: quicker (less detailed) decoding
*/
struct itrace_synth_opts {
bool set;
@@ -100,8 +118,14 @@ struct itrace_synth_opts {
bool calls;
bool returns;
bool callchain;
+ bool add_callchain;
bool thread_stack;
bool last_branch;
+ bool add_last_branch;
+ bool flc;
+ bool llc;
+ bool tlb;
+ bool remote_access;
unsigned int callchain_sz;
unsigned int last_branch_sz;
unsigned long long period;
@@ -110,6 +134,11 @@ struct itrace_synth_opts {
unsigned long *cpu_bitmap;
struct perf_time_interval *ptime_range;
int range_num;
+ unsigned int error_plus_flags;
+ unsigned int error_minus_flags;
+ unsigned int log_plus_flags;
+ unsigned int log_minus_flags;
+ unsigned int quick;
};
/**
@@ -166,6 +195,8 @@ struct auxtrace {
struct perf_tool *tool);
void (*free_events)(struct perf_session *session);
void (*free)(struct perf_session *session);
+ bool (*evsel_is_auxtrace)(struct perf_session *session,
+ struct evsel *evsel);
};
/**
@@ -584,20 +615,36 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session,
int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
void auxtrace__free_events(struct perf_session *session);
void auxtrace__free(struct perf_session *session);
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel);
#define ITRACE_HELP \
-" i: synthesize instructions events\n" \
-" b: synthesize branches events\n" \
+" i[period]: synthesize instructions events\n" \
+" b: synthesize branches events (branch misses for Arm SPE)\n" \
" c: synthesize branches events (calls only)\n" \
" r: synthesize branches events (returns only)\n" \
" x: synthesize transactions events\n" \
" w: synthesize ptwrite events\n" \
" p: synthesize power events\n" \
-" e: synthesize error events\n" \
-" d: create a debug log\n" \
+" o: synthesize other events recorded due to the use\n" \
+" of aux-output (refer to perf record)\n" \
+" e[flags]: synthesize error events\n" \
+" each flag must be preceded by + or -\n" \
+" error flags are: o (overflow)\n" \
+" l (data lost)\n" \
+" d[flags]: create a debug log\n" \
+" each flag must be preceded by + or -\n" \
+" log flags are: a (all perf events)\n" \
+" f: synthesize first level cache events\n" \
+" m: synthesize last level cache events\n" \
+" t: synthesize TLB events\n" \
+" a: synthesize remote access events\n" \
" g[len]: synthesize a call chain (use with i or x)\n" \
+" G[len]: synthesize a call chain on existing event records\n" \
" l[len]: synthesize last branch entries (use with i or x)\n" \
+" L[len]: synthesize last branch entries on existing event records\n" \
" sNUMBER: skip initial number of events\n" \
+" q: quicker (less detailed) decoding\n" \
" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
" concatenate multiple options. Default is ibxwpe or cewp\n"
@@ -750,6 +797,13 @@ void auxtrace_index__free(struct list_head *head __maybe_unused)
}
static inline
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+ struct evsel *evsel __maybe_unused)
+{
+ return false;
+}
+
+static inline
int auxtrace_parse_filters(struct evlist *evlist __maybe_unused)
{
return 0;
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index a3207d900339..3742511a08d1 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -6,6 +6,9 @@
#include <bpf/libbpf.h>
#include <linux/btf.h>
#include <linux/err.h>
+#include <linux/string.h>
+#include <internal/lib.h>
+#include <symbol/kallsyms.h>
#include "bpf-event.h"
#include "debug.h"
#include "dso.h"
@@ -290,11 +293,82 @@ out:
return err ? -1 : 0;
}
+struct kallsyms_parse {
+ union perf_event *event;
+ perf_event__handler_t process;
+ struct machine *machine;
+ struct perf_tool *tool;
+};
+
+static int
+process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data)
+{
+ struct machine *machine = data->machine;
+ union perf_event *event = data->event;
+ struct perf_record_ksymbol *ksymbol;
+ int len;
+
+ ksymbol = &event->ksymbol;
+
+ *ksymbol = (struct perf_record_ksymbol) {
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = offsetof(struct perf_record_ksymbol, name),
+ },
+ .addr = addr,
+ .len = page_size,
+ .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
+ .flags = 0,
+ };
+
+ len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name);
+ ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64));
+ memset((void *) event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+
+ return perf_tool__process_synth_event(data->tool, event, machine,
+ data->process);
+}
+
+static int
+kallsyms_process_symbol(void *data, const char *_name,
+ char type __maybe_unused, u64 start)
+{
+ char disp[KSYM_NAME_LEN];
+ char *module, *name;
+ unsigned long id;
+ int err = 0;
+
+ module = strchr(_name, '\t');
+ if (!module)
+ return 0;
+
+ /* We are going after [bpf] module ... */
+ if (strcmp(module + 1, "[bpf]"))
+ return 0;
+
+ name = memdup(_name, (module - _name) + 1);
+ if (!name)
+ return -ENOMEM;
+
+ name[module - _name] = 0;
+
+ /* .. and only for trampolines and dispatchers */
+ if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) ||
+ (sscanf(name, "bpf_dispatcher_%s", disp) == 1))
+ err = process_bpf_image(name, start, data);
+
+ free(name);
+ return err;
+}
+
int perf_event__synthesize_bpf_events(struct perf_session *session,
perf_event__handler_t process,
struct machine *machine,
struct record_opts *opts)
{
+ const char *kallsyms_filename = "/proc/kallsyms";
+ struct kallsyms_parse arg;
union perf_event *event;
__u32 id = 0;
int err;
@@ -303,6 +377,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size);
if (!event)
return -1;
+
+ /* Synthesize all the bpf programs in system. */
while (true) {
err = bpf_prog_get_next_id(id, &id);
if (err) {
@@ -335,6 +411,23 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
break;
}
}
+
+ /* Synthesize all the bpf images - trampolines/dispatchers. */
+ if (symbol_conf.kallsyms_name != NULL)
+ kallsyms_filename = symbol_conf.kallsyms_name;
+
+ arg = (struct kallsyms_parse) {
+ .event = event,
+ .process = process,
+ .machine = machine,
+ .tool = session->tool,
+ };
+
+ if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) {
+ pr_err("%s: failed to synthesize bpf images: %s\n",
+ __func__, strerror(errno));
+ }
+
free(event);
return err;
}
@@ -416,8 +509,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data)
return 0;
}
-int bpf_event__add_sb_event(struct evlist **evlist,
- struct perf_env *env)
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 81fdc88e6c1a..68f315c3df5b 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -33,8 +33,7 @@ struct btf_node {
#ifdef HAVE_LIBBPF_SUPPORT
int machine__process_bpf(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
-int bpf_event__add_sb_event(struct evlist **evlist,
- struct perf_env *env);
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env);
void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
struct perf_env *env,
FILE *fp);
@@ -46,8 +45,8 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused,
return 0;
}
-static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused,
- struct perf_env *env __maybe_unused)
+static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused,
+ struct perf_env *env __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 10c187b8b8ea..0374adcb223c 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -328,12 +328,6 @@ config_bpf_program(struct bpf_program *prog)
probe_conf.no_inlines = false;
probe_conf.force_add = false;
- config_str = bpf_program__title(prog, false);
- if (IS_ERR(config_str)) {
- pr_debug("bpf: unable to get title for program\n");
- return PTR_ERR(config_str);
- }
-
priv = calloc(sizeof(*priv), 1);
if (!priv) {
pr_debug("bpf: failed to alloc priv\n");
@@ -341,6 +335,7 @@ config_bpf_program(struct bpf_program *prog)
}
pev = &priv->pev;
+ config_str = bpf_program__section_name(prog);
pr_debug("bpf: config program '%s'\n", config_str);
err = parse_prog_config(config_str, &main_str, &is_tp, pev);
if (err)
@@ -454,10 +449,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
if (err) {
const char *title;
- title = bpf_program__title(prog, false);
- if (!title)
- title = "[unknown]";
-
+ title = bpf_program__section_name(prog);
pr_debug("Failed to generate prologue for program %s\n",
title);
return err;
@@ -1225,7 +1217,7 @@ bpf__obj_config_map(struct bpf_object *obj,
out:
free(map_name);
if (!err)
- key_scan_pos += strlen(map_opt);
+ *key_scan_pos += strlen(map_opt);
return err;
}
@@ -1430,7 +1422,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
}
- if (perf_evsel__is_bpf_output(evsel))
+ if (evsel__is_bpf_output(evsel))
check_pass = true;
if (attr->type == PERF_TYPE_RAW)
check_pass = true;
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
index b020a8678eb9..9887ae09242d 100644
--- a/tools/perf/util/bpf-prologue.c
+++ b/tools/perf/util/bpf-prologue.c
@@ -142,7 +142,8 @@ static int
gen_read_mem(struct bpf_insn_pos *pos,
int src_base_addr_reg,
int dst_addr_reg,
- long offset)
+ long offset,
+ int probeid)
{
/* mov arg3, src_base_addr_reg */
if (src_base_addr_reg != BPF_REG_ARG3)
@@ -159,7 +160,7 @@ gen_read_mem(struct bpf_insn_pos *pos,
ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
/* Call probe_read */
- ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos);
+ ins(BPF_EMIT_CALL(probeid), pos);
/*
* Error processing: if read fail, goto error code,
* will be relocated. Target should be the start of
@@ -241,7 +242,7 @@ static int
gen_prologue_slowpath(struct bpf_insn_pos *pos,
struct probe_trace_arg *args, int nargs)
{
- int err, i;
+ int err, i, probeid;
for (i = 0; i < nargs; i++) {
struct probe_trace_arg *arg = &args[i];
@@ -276,11 +277,16 @@ gen_prologue_slowpath(struct bpf_insn_pos *pos,
stack_offset), pos);
ref = arg->ref;
+ probeid = BPF_FUNC_probe_read_kernel;
while (ref) {
pr_debug("prologue: arg %d: offset %ld\n",
i, ref->offset);
+
+ if (ref->user_access)
+ probeid = BPF_FUNC_probe_read_user;
+
err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
- ref->offset);
+ ref->offset, probeid);
if (err) {
pr_err("prologue: failed to generate probe_read function call\n");
goto errout;
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 154a05cd03af..17b2ccc61094 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -15,13 +15,18 @@
#include "event.h"
struct branch_flags {
- u64 mispred:1;
- u64 predicted:1;
- u64 in_tx:1;
- u64 abort:1;
- u64 cycles:16;
- u64 type:4;
- u64 reserved:40;
+ union {
+ u64 value;
+ struct {
+ u64 mispred:1;
+ u64 predicted:1;
+ u64 in_tx:1;
+ u64 abort:1;
+ u64 cycles:16;
+ u64 type:4;
+ u64 reserved:40;
+ };
+ };
};
struct branch_info {
@@ -41,7 +46,7 @@ struct branch_entry {
struct branch_stack {
u64 nr;
u64 hw_idx;
- struct branch_entry entries[0];
+ struct branch_entry entries[];
};
/*
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index c076fc7fe025..8763772f1095 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -31,8 +31,13 @@
#include "probe-file.h"
#include "strlist.h"
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
#include <linux/ctype.h>
#include <linux/zalloc.h>
+#include <asm/bug.h>
static bool no_buildid_cache;
@@ -91,13 +96,13 @@ struct perf_tool build_id__mark_dso_hit_ops = {
.ordered_events = true,
};
-int build_id__sprintf(const u8 *build_id, int len, char *bf)
+int build_id__sprintf(const struct build_id *build_id, char *bf)
{
char *bid = bf;
- const u8 *raw = build_id;
- int i;
+ const u8 *raw = build_id->data;
+ size_t i;
- for (i = 0; i < len; ++i) {
+ for (i = 0; i < build_id->size; ++i) {
sprintf(bid, "%02x", *raw);
++raw;
bid += 2;
@@ -109,7 +114,7 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf)
int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
{
char notes[PATH_MAX];
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int ret;
if (!root_dir)
@@ -117,25 +122,23 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
- ret = sysfs__read_build_id(notes, build_id, sizeof(build_id));
+ ret = sysfs__read_build_id(notes, &bid);
if (ret < 0)
return ret;
- return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ return build_id__sprintf(&bid, sbuild_id);
}
int filename__sprintf_build_id(const char *pathname, char *sbuild_id)
{
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int ret;
- ret = filename__read_build_id(pathname, build_id, sizeof(build_id));
+ ret = filename__read_build_id(pathname, &bid);
if (ret < 0)
return ret;
- else if (ret != sizeof(build_id))
- return -EINVAL;
- return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ return build_id__sprintf(&bid, sbuild_id);
}
/* asnprintf consolidates asprintf and snprintf */
@@ -268,7 +271,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
if (!dso->has_build_id)
return NULL;
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
if (!linkname)
return NULL;
@@ -293,7 +296,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
continue; \
else
-static int write_buildid(const char *name, size_t name_len, u8 *build_id,
+static int write_buildid(const char *name, size_t name_len, struct build_id *bid,
pid_t pid, u16 misc, struct feat_fd *fd)
{
int err;
@@ -304,7 +307,9 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id,
len = PERF_ALIGN(len, NAME_ALIGN);
memset(&b, 0, sizeof(b));
- memcpy(&b.build_id, build_id, BUILD_ID_SIZE);
+ memcpy(&b.data, bid->data, bid->size);
+ b.size = (u8) bid->size;
+ misc |= PERF_RECORD_MISC_BUILD_ID_SIZE;
b.pid = pid;
b.header.misc = misc;
b.header.size = sizeof(b) + len;
@@ -351,7 +356,7 @@ static int machine__write_buildid_table(struct machine *machine,
in_kernel = pos->kernel ||
is_kernel_module(name,
PERF_RECORD_MISC_CPUMODE_UNKNOWN);
- err = write_buildid(name, name_len, pos->build_id, machine->pid,
+ err = write_buildid(name, name_len, &pos->bid, machine->pid,
in_kernel ? kmisc : umisc, fd);
if (err)
break;
@@ -636,6 +641,21 @@ static char *build_id_cache__find_debug(const char *sbuild_id,
if (realname && access(realname, R_OK))
zfree(&realname);
nsinfo__mountns_exit(&nsc);
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+ if (realname == NULL) {
+ debuginfod_client* c = debuginfod_begin();
+ if (c != NULL) {
+ int fd = debuginfod_find_debuginfo(c,
+ (const unsigned char*)sbuild_id, 0,
+ &realname);
+ if (fd >= 0)
+ close(fd); /* retaining reference by realname */
+ debuginfod_end(c);
+ }
+ }
+#endif
+
out:
free(debugfile);
return realname;
@@ -750,13 +770,13 @@ out_free:
return err;
}
-static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
+static int build_id_cache__add_b(const struct build_id *bid,
const char *name, struct nsinfo *nsi,
bool is_kallsyms, bool is_vdso)
{
char sbuild_id[SBUILD_ID_SIZE];
- build_id__sprintf(build_id, build_id_size, sbuild_id);
+ build_id__sprintf(bid, sbuild_id);
return build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms,
is_vdso);
@@ -822,8 +842,8 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine)
is_kallsyms = true;
name = machine->mmap_name;
}
- return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
- dso->nsinfo, is_kallsyms, is_vdso);
+ return build_id_cache__add_b(&dso->bid, name, dso->nsinfo,
+ is_kallsyms, is_vdso);
}
static int __dsos__cache_build_ids(struct list_head *head,
@@ -883,3 +903,10 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
return ret;
}
+
+void build_id__init(struct build_id *bid, const u8 *data, size_t size)
+{
+ WARN_ON(size > BUILD_ID_SIZE);
+ memcpy(bid->data, data, size);
+ bid->size = size;
+}
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index aad419bb165c..f293f99d5dba 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -8,13 +8,19 @@
#include "tool.h"
#include <linux/types.h>
+struct build_id {
+ u8 data[BUILD_ID_SIZE];
+ size_t size;
+};
+
struct nsinfo;
extern struct perf_tool build_id__mark_dso_hit_ops;
struct dso;
struct feat_fd;
-int build_id__sprintf(const u8 *build_id, int len, char *bf);
+void build_id__init(struct build_id *bid, const u8 *data, size_t size);
+int build_id__sprintf(const struct build_id *build_id, char *bf);
int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
@@ -29,6 +35,10 @@ int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
int dsos__hit_all(struct perf_session *session);
+int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct evsel *evsel,
+ struct machine *machine);
+
bool perf_session__read_build_ids(struct perf_session *session, bool with_hits);
int perf_session__write_buildid_table(struct perf_session *session,
struct feat_fd *fd);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 818aa4efd386..1b60985690bb 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1599,3 +1599,116 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
for (node = cursor->first; node != NULL; node = node->next)
map__zput(node->ms.map);
}
+
+void callchain_param_setup(u64 sample_type)
+{
+ if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
+ callchain_param.record_mode = CALLCHAIN_DWARF;
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ callchain_param.record_mode = CALLCHAIN_LBR;
+ else
+ callchain_param.record_mode = CALLCHAIN_FP;
+ }
+}
+
+static bool chain_match(struct callchain_list *base_chain,
+ struct callchain_list *pair_chain)
+{
+ enum match_result match;
+
+ match = match_chain_strings(base_chain->srcline,
+ pair_chain->srcline);
+ if (match != MATCH_ERROR)
+ return match == MATCH_EQ;
+
+ match = match_chain_dso_addresses(base_chain->ms.map,
+ base_chain->ip,
+ pair_chain->ms.map,
+ pair_chain->ip);
+
+ return match == MATCH_EQ;
+}
+
+bool callchain_cnode_matched(struct callchain_node *base_cnode,
+ struct callchain_node *pair_cnode)
+{
+ struct callchain_list *base_chain, *pair_chain;
+ bool match = false;
+
+ pair_chain = list_first_entry(&pair_cnode->val,
+ struct callchain_list,
+ list);
+
+ list_for_each_entry(base_chain, &base_cnode->val, list) {
+ if (&pair_chain->list == &pair_cnode->val)
+ return false;
+
+ if (!base_chain->srcline || !pair_chain->srcline) {
+ pair_chain = list_next_entry(pair_chain, list);
+ continue;
+ }
+
+ match = chain_match(base_chain, pair_chain);
+ if (!match)
+ return false;
+
+ pair_chain = list_next_entry(pair_chain, list);
+ }
+
+ /*
+ * Say chain1 is ABC, chain2 is ABCD, we consider they are
+ * not fully matched.
+ */
+ if (pair_chain && (&pair_chain->list != &pair_cnode->val))
+ return false;
+
+ return match;
+}
+
+static u64 count_callchain_hits(struct hist_entry *he)
+{
+ struct rb_root *root = &he->sorted_chain;
+ struct rb_node *rb_node = rb_first(root);
+ struct callchain_node *node;
+ u64 chain_hits = 0;
+
+ while (rb_node) {
+ node = rb_entry(rb_node, struct callchain_node, rb_node);
+ chain_hits += node->hit;
+ rb_node = rb_next(rb_node);
+ }
+
+ return chain_hits;
+}
+
+u64 callchain_total_hits(struct hists *hists)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+ u64 chain_hits = 0;
+
+ while (next) {
+ struct hist_entry *he = rb_entry(next, struct hist_entry,
+ rb_node);
+
+ chain_hits += count_callchain_hits(he);
+ next = rb_next(&he->rb_node);
+ }
+
+ return chain_hits;
+}
+
+s64 callchain_avg_cycles(struct callchain_node *cnode)
+{
+ struct callchain_list *chain;
+ s64 cycles = 0;
+
+ list_for_each_entry(chain, &cnode->val, list) {
+ if (chain->srcline && chain->branch_count)
+ cycles += chain->cycles_count / chain->branch_count;
+ }
+
+ return cycles;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 706bb7bbe1e1..5824134f983b 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -13,6 +13,7 @@ struct ip_callchain;
struct map;
struct perf_sample;
struct thread;
+struct hists;
#define HELP_PAD "\t\t\t\t"
@@ -143,6 +144,9 @@ struct callchain_cursor_node {
u64 ip;
struct map_symbol ms;
const char *srcline;
+ /* Indicate valid cursor node for LBR stitch */
+ bool valid;
+
bool branch;
struct branch_flags branch_flags;
u64 branch_from;
@@ -151,6 +155,11 @@ struct callchain_cursor_node {
struct callchain_cursor_node *next;
};
+struct stitch_list {
+ struct list_head node;
+ struct callchain_cursor_node cursor;
+};
+
struct callchain_cursor {
u64 nr;
struct callchain_cursor_node *first;
@@ -289,4 +298,13 @@ int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count);
+void callchain_param_setup(u64 sample_type);
+
+bool callchain_cnode_matched(struct callchain_node *base_cnode,
+ struct callchain_node *pair_cnode);
+
+u64 callchain_total_hits(struct hists *hists);
+
+s64 callchain_avg_cycles(struct callchain_node *cnode);
+
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h
index 051dc590ceee..ae52878c0b2e 100644
--- a/tools/perf/util/cap.h
+++ b/tools/perf/util/cap.h
@@ -29,4 +29,8 @@ static inline bool perf_cap__capable(int cap __maybe_unused)
#define CAP_SYSLOG 34
#endif
+#ifndef CAP_PERFMON
+#define CAP_PERFMON 38
+#endif
+
#endif /* __PERF_CAP_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index b73fb7823048..b81324a13a2b 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -3,6 +3,9 @@
#include "evsel.h"
#include "cgroup.h"
#include "evlist.h"
+#include "rblist.h"
+#include "metricgroup.h"
+#include "stat.h"
#include <linux/zalloc.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -48,7 +51,7 @@ static struct cgroup *evlist__find_cgroup(struct evlist *evlist, const char *str
return NULL;
}
-static struct cgroup *cgroup__new(const char *name)
+static struct cgroup *cgroup__new(const char *name, bool do_open)
{
struct cgroup *cgroup = zalloc(sizeof(*cgroup));
@@ -58,9 +61,14 @@ static struct cgroup *cgroup__new(const char *name)
cgroup->name = strdup(name);
if (!cgroup->name)
goto out_err;
- cgroup->fd = open_cgroup(name);
- if (cgroup->fd == -1)
- goto out_free_name;
+
+ if (do_open) {
+ cgroup->fd = open_cgroup(name);
+ if (cgroup->fd == -1)
+ goto out_free_name;
+ } else {
+ cgroup->fd = -1;
+ }
}
return cgroup;
@@ -76,7 +84,7 @@ struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name)
{
struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
- return cgroup ?: cgroup__new(name);
+ return cgroup ?: cgroup__new(name, true);
}
static int add_cgroup(struct evlist *evlist, const char *str)
@@ -107,7 +115,8 @@ found:
static void cgroup__delete(struct cgroup *cgroup)
{
- close(cgroup->fd);
+ if (cgroup->fd >= 0)
+ close(cgroup->fd);
zfree(&cgroup->name);
free(cgroup);
}
@@ -192,6 +201,103 @@ int parse_cgroups(const struct option *opt, const char *str,
return 0;
}
+int evlist__expand_cgroup(struct evlist *evlist, const char *str,
+ struct rblist *metric_events, bool open_cgroup)
+{
+ struct evlist *orig_list, *tmp_list;
+ struct evsel *pos, *evsel, *leader;
+ struct rblist orig_metric_events;
+ struct cgroup *cgrp = NULL;
+ const char *p, *e, *eos = str + strlen(str);
+ int ret = -1;
+
+ if (evlist->core.nr_entries == 0) {
+ fprintf(stderr, "must define events before cgroups\n");
+ return -EINVAL;
+ }
+
+ orig_list = evlist__new();
+ tmp_list = evlist__new();
+ if (orig_list == NULL || tmp_list == NULL) {
+ fprintf(stderr, "memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ /* save original events and init evlist */
+ perf_evlist__splice_list_tail(orig_list, &evlist->core.entries);
+ evlist->core.nr_entries = 0;
+
+ if (metric_events) {
+ orig_metric_events = *metric_events;
+ rblist__init(metric_events);
+ } else {
+ rblist__init(&orig_metric_events);
+ }
+
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+
+ /* allow empty cgroups, i.e., skip */
+ if (e - str) {
+ /* termination added */
+ char *name = strndup(str, e - str);
+ if (!name)
+ goto out_err;
+
+ cgrp = cgroup__new(name, open_cgroup);
+ free(name);
+ if (cgrp == NULL)
+ goto out_err;
+ } else {
+ cgrp = NULL;
+ }
+
+ leader = NULL;
+ evlist__for_each_entry(orig_list, pos) {
+ evsel = evsel__clone(pos);
+ if (evsel == NULL)
+ goto out_err;
+
+ cgroup__put(evsel->cgrp);
+ evsel->cgrp = cgroup__get(cgrp);
+
+ if (evsel__is_group_leader(pos))
+ leader = evsel;
+ evsel->leader = leader;
+
+ evlist__add(tmp_list, evsel);
+ }
+ /* cgroup__new() has a refcount, release it here */
+ cgroup__put(cgrp);
+ nr_cgroups++;
+
+ if (metric_events) {
+ perf_stat__collect_metric_expr(tmp_list);
+ if (metricgroup__copy_metric_events(tmp_list, cgrp,
+ metric_events,
+ &orig_metric_events) < 0)
+ break;
+ }
+
+ perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries);
+ tmp_list->core.nr_entries = 0;
+
+ if (!p) {
+ ret = 0;
+ break;
+ }
+ str = p+1;
+ }
+
+out_err:
+ evlist__delete(orig_list);
+ evlist__delete(tmp_list);
+ rblist__exit(&orig_metric_events);
+
+ return ret;
+}
+
static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id,
bool create, const char *path)
{
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index e98d5975fe55..162906f3412a 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -22,8 +22,11 @@ struct cgroup *cgroup__get(struct cgroup *cgroup);
void cgroup__put(struct cgroup *cgroup);
struct evlist;
+struct rblist;
struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name);
+int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups,
+ struct rblist *metric_events, bool open_cgroup);
void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup);
diff --git a/tools/perf/util/clockid.c b/tools/perf/util/clockid.c
new file mode 100644
index 000000000000..74365a5d99c1
--- /dev/null
+++ b/tools/perf/util/clockid.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <subcmd/parse-options.h>
+#include <stdio.h>
+#include <time.h>
+#include <strings.h>
+#include <linux/time64.h>
+#include "debug.h"
+#include "clockid.h"
+#include "record.h"
+
+struct clockid_map {
+ const char *name;
+ int clockid;
+};
+
+#define CLOCKID_MAP(n, c) \
+ { .name = n, .clockid = (c), }
+
+#define CLOCKID_END { .name = NULL, }
+
+
+/*
+ * Add the missing ones, we need to build on many distros...
+ */
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+#ifndef CLOCK_BOOTTIME
+#define CLOCK_BOOTTIME 7
+#endif
+#ifndef CLOCK_TAI
+#define CLOCK_TAI 11
+#endif
+
+static const struct clockid_map clockids[] = {
+ /* available for all events, NMI safe */
+ CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
+ CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
+
+ /* available for some events */
+ CLOCKID_MAP("realtime", CLOCK_REALTIME),
+ CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
+ CLOCKID_MAP("tai", CLOCK_TAI),
+
+ /* available for the lazy */
+ CLOCKID_MAP("mono", CLOCK_MONOTONIC),
+ CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
+ CLOCKID_MAP("real", CLOCK_REALTIME),
+ CLOCKID_MAP("boot", CLOCK_BOOTTIME),
+
+ CLOCKID_END,
+};
+
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+ struct timespec res;
+
+ *res_ns = 0;
+ if (!clock_getres(clk_id, &res))
+ *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+ else
+ pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+ return 0;
+}
+
+int parse_clockid(const struct option *opt, const char *str, int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+ const struct clockid_map *cm;
+ const char *ostr = str;
+
+ if (unset) {
+ opts->use_clockid = 0;
+ return 0;
+ }
+
+ /* no arg passed */
+ if (!str)
+ return 0;
+
+ /* no setting it twice */
+ if (opts->use_clockid)
+ return -1;
+
+ opts->use_clockid = true;
+
+ /* if its a number, we're done */
+ if (sscanf(str, "%d", &opts->clockid) == 1)
+ return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
+
+ /* allow a "CLOCK_" prefix to the name */
+ if (!strncasecmp(str, "CLOCK_", 6))
+ str += 6;
+
+ for (cm = clockids; cm->name; cm++) {
+ if (!strcasecmp(str, cm->name)) {
+ opts->clockid = cm->clockid;
+ return get_clockid_res(opts->clockid,
+ &opts->clockid_res_ns);
+ }
+ }
+
+ opts->use_clockid = false;
+ ui__warning("unknown clockid %s, check man page\n", ostr);
+ return -1;
+}
+
+const char *clockid_name(clockid_t clk_id)
+{
+ const struct clockid_map *cm;
+
+ for (cm = clockids; cm->name; cm++) {
+ if (cm->clockid == clk_id)
+ return cm->name;
+ }
+ return "(not found)";
+}
diff --git a/tools/perf/util/clockid.h b/tools/perf/util/clockid.h
new file mode 100644
index 000000000000..9b49b4711c76
--- /dev/null
+++ b/tools/perf/util/clockid.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PERF_CLOCKID_H
+#define __PERF_CLOCKID_H
+
+struct option;
+int parse_clockid(const struct option *opt, const char *str, int unset);
+
+const char *clockid_name(clockid_t clk_id);
+
+#endif
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
index a12872f2856a..fa8248aadb59 100644
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -28,7 +28,7 @@ int __weak sched_getcpu(void)
static int perf_flag_probe(void)
{
- /* use 'safest' configuration as used in perf_evsel__fallback() */
+ /* use 'safest' configuration as used in evsel__fallback() */
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
@@ -65,7 +65,7 @@ static int perf_flag_probe(void)
return 1;
}
- WARN_ONCE(err != EINVAL && err != EBUSY,
+ WARN_ONCE(err != EINVAL && err != EBUSY && err != EACCES,
"perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
err, str_error_r(err, sbuf, sizeof(sbuf)));
@@ -83,7 +83,7 @@ static int perf_flag_probe(void)
if (fd >= 0)
close(fd);
- if (WARN_ONCE(fd < 0 && err != EBUSY,
+ if (WARN_ONCE(fd < 0 && err != EBUSY && err != EACCES,
"perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
err, str_error_r(err, sbuf, sizeof(sbuf))))
return -1;
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index ef38eba56ed0..6969f82843ee 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -17,10 +17,10 @@
#include "util/event.h" /* proc_map_timeout */
#include "util/hist.h" /* perf_hist_config */
#include "util/llvm-utils.h" /* perf_llvm_config */
+#include "util/stat.h" /* perf_stat__set_big_num */
#include "build-id.h"
#include "debug.h"
#include "config.h"
-#include "debug.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <stdlib.h>
@@ -452,6 +452,15 @@ static int perf_ui_config(const char *var, const char *value)
return 0;
}
+static int perf_stat_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "stat.big-num"))
+ perf_stat__set_big_num(perf_config_bool(var, value));
+
+ /* Add other config variables here. */
+ return 0;
+}
+
int perf_default_config(const char *var, const char *value,
void *dummy __maybe_unused)
{
@@ -473,11 +482,14 @@ int perf_default_config(const char *var, const char *value,
if (strstarts(var, "buildid."))
return perf_buildid_config(var, value);
+ if (strstarts(var, "stat."))
+ return perf_stat_config(var, value);
+
/* Add other config variables here. */
return 0;
}
-static int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
{
int ret;
FILE *f = fopen(filename, "r");
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index c10b66dde2f3..8c881e3a3ec3 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -26,6 +26,8 @@ struct perf_config_set {
extern const char *config_exclusive_filename;
typedef int (*config_fn_t)(const char *, const char *, void *);
+
+int perf_config_from_file(config_fn_t fn, const char *filename, void *data);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
int perf_config_int(int *dest, const char *, const char *);
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index f94e1a23dad6..582f3aeaf5e4 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <stdlib.h>
+#include <string.h>
#include "evsel.h"
#include "counts.h"
#include <linux/zalloc.h>
@@ -42,24 +43,25 @@ void perf_counts__delete(struct perf_counts *counts)
}
}
-static void perf_counts__reset(struct perf_counts *counts)
+void perf_counts__reset(struct perf_counts *counts)
{
xyarray__reset(counts->loaded);
xyarray__reset(counts->values);
+ memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
}
-void perf_evsel__reset_counts(struct evsel *evsel)
+void evsel__reset_counts(struct evsel *evsel)
{
perf_counts__reset(evsel->counts);
}
-int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads)
+int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads)
{
evsel->counts = perf_counts__new(ncpus, nthreads);
return evsel->counts != NULL ? 0 : -ENOMEM;
}
-void perf_evsel__free_counts(struct evsel *evsel)
+void evsel__free_counts(struct evsel *evsel)
{
perf_counts__delete(evsel->counts);
evsel->counts = NULL;
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 92196df4945f..7ff36bf6d644 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -37,9 +37,10 @@ perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool lo
struct perf_counts *perf_counts__new(int ncpus, int nthreads);
void perf_counts__delete(struct perf_counts *counts);
+void perf_counts__reset(struct perf_counts *counts);
-void perf_evsel__reset_counts(struct evsel *evsel);
-int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
-void perf_evsel__free_counts(struct evsel *evsel);
+void evsel__reset_counts(struct evsel *evsel);
+int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
+void evsel__free_counts(struct evsel *evsel);
#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index 7bf6b811f715..6201c3790d86 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -22,7 +22,7 @@ struct numa_topology_node {
struct numa_topology {
u32 nr;
- struct numa_topology_node nodes[0];
+ struct numa_topology_node nodes[];
};
struct cpu_topology *cpu_topology__new(void);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index cd92a99eb89d..cd007cc9c283 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -564,6 +564,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
resp = cs_etm_decoder__set_tid(etmq, packet_queue,
elem, trace_chan_id);
break;
+ /* Unused packet types */
+ case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH:
case OCSD_GEN_TRC_ELEM_ADDR_NACC:
case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 62d2f9b9ce1b..a2a369e2fbb6 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -94,6 +94,9 @@ struct cs_etm_queue {
struct cs_etm_traceid_queue **traceid_queues;
};
+/* RB tree for quick conversion between traceID and metadata pointers */
+static struct intlist *traceid_list;
+
static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
@@ -631,6 +634,16 @@ static void cs_etm__free(struct perf_session *session)
zfree(&aux);
}
+static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+
+ return evsel->core.attr.type == aux->pmu_type;
+}
+
static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
{
struct machine *machine;
@@ -1331,8 +1344,15 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
}
- if (etm->synth_opts.last_branch)
+ if (etm->synth_opts.last_branch) {
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
if (etm->synth_opts.instructions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
@@ -2618,6 +2638,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
etm->auxtrace.flush_events = cs_etm__flush_events;
etm->auxtrace.free_events = cs_etm__free_events;
etm->auxtrace.free = cs_etm__free;
+ etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
session->auxtrace = &etm->auxtrace;
etm->unknown_thread = thread__new(999999999, 999999999);
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 650ecc2a6349..4ad925d6d799 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -114,9 +114,6 @@ enum cs_etm_isa {
CS_ETM_ISA_T32,
};
-/* RB tree for quick conversion between traceID and metadata pointers */
-struct intlist *traceid_list;
-
struct cs_etm_queue;
struct cs_etm_packet {
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index dbc772bfb04e..27c5fef9ad54 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -31,6 +31,9 @@
#include "config.h"
#include <linux/ctype.h>
#include <linux/err.h>
+#include <linux/time64.h>
+#include "util.h"
+#include "clockid.h"
#define pr_N(n, fmt, ...) \
eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
@@ -835,7 +838,7 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}
- if (perf_evsel__is_bpf_output(evsel)) {
+ if (evsel__is_bpf_output(evsel)) {
ret = add_bpf_output_values(event_class, event, sample);
if (ret)
return -1;
@@ -1155,7 +1158,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel)
{
struct bt_ctf_event_class *event_class;
struct evsel_priv *priv;
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
int ret;
pr("Adding event '%s' (type %d)\n", name, evsel->core.attr.type);
@@ -1174,7 +1177,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel)
goto err;
}
- if (perf_evsel__is_bpf_output(evsel)) {
+ if (evsel__is_bpf_output(evsel)) {
ret = add_bpf_output_types(cw, event_class);
if (ret)
goto err;
@@ -1381,11 +1384,26 @@ do { \
return 0;
}
-static int ctf_writer__setup_clock(struct ctf_writer *cw)
+static int ctf_writer__setup_clock(struct ctf_writer *cw,
+ struct perf_session *session,
+ bool tod)
{
struct bt_ctf_clock *clock = cw->clock;
+ const char *desc = "perf clock";
+ int64_t offset = 0;
- bt_ctf_clock_set_description(clock, "perf clock");
+ if (tod) {
+ struct perf_env *env = &session->header.env;
+
+ if (!env->clock.enabled) {
+ pr_err("Can't provide --tod time, missing clock data. "
+ "Please record with -k/--clockid option.\n");
+ return -1;
+ }
+
+ desc = clockid_name(env->clock.clockid);
+ offset = env->clock.tod_ns - env->clock.clockid_ns;
+ }
#define SET(__n, __v) \
do { \
@@ -1394,8 +1412,8 @@ do { \
} while (0)
SET(frequency, 1000000000);
- SET(offset_s, 0);
- SET(offset, 0);
+ SET(offset, offset);
+ SET(description, desc);
SET(precision, 10);
SET(is_absolute, 0);
@@ -1481,7 +1499,8 @@ static void ctf_writer__cleanup(struct ctf_writer *cw)
memset(cw, 0, sizeof(*cw));
}
-static int ctf_writer__init(struct ctf_writer *cw, const char *path)
+static int ctf_writer__init(struct ctf_writer *cw, const char *path,
+ struct perf_session *session, bool tod)
{
struct bt_ctf_writer *writer;
struct bt_ctf_stream_class *stream_class;
@@ -1505,7 +1524,7 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
cw->clock = clock;
- if (ctf_writer__setup_clock(cw)) {
+ if (ctf_writer__setup_clock(cw, session, tod)) {
pr("Failed to setup CTF clock.\n");
goto err_cleanup;
}
@@ -1613,17 +1632,15 @@ int bt_convert__perf2ctf(const char *input, const char *path,
if (err)
return err;
- /* CTF writer */
- if (ctf_writer__init(cw, path))
- return -1;
-
err = -1;
/* perf.data session */
session = perf_session__new(&data, 0, &c.tool);
- if (IS_ERR(session)) {
- err = PTR_ERR(session);
- goto free_writer;
- }
+ if (IS_ERR(session))
+ return PTR_ERR(session);
+
+ /* CTF writer */
+ if (ctf_writer__init(cw, path, session, opts->tod))
+ goto free_session;
if (c.queue_size) {
ordered_events__set_alloc_size(&session->ordered_events,
@@ -1632,17 +1649,17 @@ int bt_convert__perf2ctf(const char *input, const char *path,
/* CTF writer env/clock setup */
if (ctf_writer__setup_env(cw, session))
- goto free_session;
+ goto free_writer;
/* CTF events setup */
if (setup_events(cw, session))
- goto free_session;
+ goto free_writer;
if (opts->all && setup_non_sample_events(cw, session))
- goto free_session;
+ goto free_writer;
if (setup_streams(cw, session))
- goto free_session;
+ goto free_writer;
err = perf_session__process_events(session);
if (!err)
@@ -1670,10 +1687,10 @@ int bt_convert__perf2ctf(const char *input, const char *path,
return err;
-free_session:
- perf_session__delete(session);
free_writer:
ctf_writer__cleanup(cw);
+free_session:
+ perf_session__delete(session);
pr_err("Error during conversion setup.\n");
return err;
}
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
index af90b6076c06..feab5f114e37 100644
--- a/tools/perf/util/data-convert.h
+++ b/tools/perf/util/data-convert.h
@@ -5,6 +5,7 @@
struct perf_data_convert_opts {
bool force;
bool all;
+ bool tod;
};
#endif /* __DATA_CONVERT_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index adb656745ecc..5cda5565777a 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -20,6 +20,7 @@
#include "target.h"
#include "ui/helpline.h"
#include "ui/ui.h"
+#include "util/parse-sublevel-options.h"
#include <linux/ctype.h>
@@ -173,65 +174,37 @@ void trace_event(union perf_event *event)
trace_event_printer, event);
}
-static struct debug_variable {
- const char *name;
- int *ptr;
-} debug_variables[] = {
- { .name = "verbose", .ptr = &verbose },
- { .name = "ordered-events", .ptr = &debug_ordered_events},
- { .name = "stderr", .ptr = &redirect_to_stderr},
- { .name = "data-convert", .ptr = &debug_data_convert },
- { .name = "perf-event-open", .ptr = &debug_peo_args },
+static struct sublevel_option debug_opts[] = {
+ { .name = "verbose", .value_ptr = &verbose },
+ { .name = "ordered-events", .value_ptr = &debug_ordered_events},
+ { .name = "stderr", .value_ptr = &redirect_to_stderr},
+ { .name = "data-convert", .value_ptr = &debug_data_convert },
+ { .name = "perf-event-open", .value_ptr = &debug_peo_args },
{ .name = NULL, }
};
int perf_debug_option(const char *str)
{
- struct debug_variable *var = &debug_variables[0];
- char *vstr, *s = strdup(str);
- int v = 1;
-
- vstr = strchr(s, '=');
- if (vstr)
- *vstr++ = 0;
-
- while (var->name) {
- if (!strcmp(s, var->name))
- break;
- var++;
- }
-
- if (!var->name) {
- pr_err("Unknown debug variable name '%s'\n", s);
- free(s);
- return -1;
- }
+ int ret;
- if (vstr) {
- v = atoi(vstr);
- /*
- * Allow only values in range (0, 10),
- * otherwise set 0.
- */
- v = (v < 0) || (v > 10) ? 0 : v;
- }
+ ret = perf_parse_sublevel_options(str, debug_opts);
+ if (ret)
+ return ret;
- if (quiet)
- v = -1;
+ /* Allow only verbose value in range (0, 10), otherwise set 0. */
+ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose;
- *var->ptr = v;
- free(s);
return 0;
}
int perf_quiet_option(void)
{
- struct debug_variable *var = &debug_variables[0];
+ struct sublevel_option *opt = &debug_opts[0];
/* disable all debug messages */
- while (var->name) {
- *var->ptr = -1;
- var++;
+ while (opt->name) {
+ *opt->value_ptr = -1;
+ opt++;
}
return 0;
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
index 6fb7f34c0814..39c05200ed65 100644
--- a/tools/perf/util/demangle-java.c
+++ b/tools/perf/util/demangle-java.c
@@ -15,7 +15,7 @@ enum {
MODE_CLASS = 1,
MODE_FUNC = 2,
MODE_TYPE = 3,
- MODE_CTYPE = 3, /* class arg */
+ MODE_CTYPE = 4, /* class arg */
};
#define BASE_ENT(c, n) [c - 'A']=n
@@ -27,7 +27,7 @@ static const char *base_types['Z' - 'A' + 1] = {
BASE_ENT('I', "int" ),
BASE_ENT('J', "long" ),
BASE_ENT('S', "short" ),
- BASE_ENT('Z', "bool" ),
+ BASE_ENT('Z', "boolean" ),
};
/*
@@ -59,15 +59,16 @@ __demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int
switch (*q) {
case 'L':
- if (mode == MODE_PREFIX || mode == MODE_CTYPE) {
- if (mode == MODE_CTYPE) {
+ if (mode == MODE_PREFIX || mode == MODE_TYPE) {
+ if (mode == MODE_TYPE) {
if (narg)
rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
narg++;
}
- rlen += scnprintf(buf + rlen, maxlen - rlen, "class ");
if (mode == MODE_PREFIX)
mode = MODE_CLASS;
+ else
+ mode = MODE_CTYPE;
} else
buf[rlen++] = *q;
break;
@@ -120,7 +121,7 @@ __demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int
if (mode != MODE_CLASS && mode != MODE_CTYPE)
goto error;
/* safe because at least one other char to process */
- if (isalpha(*(q + 1)))
+ if (isalpha(*(q + 1)) && mode == MODE_CLASS)
rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
if (mode == MODE_CLASS)
mode = MODE_FUNC;
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 91f21239608b..55c11e854fe4 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -47,6 +47,7 @@ char dso__symtab_origin(const struct dso *dso)
[DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO] = 'D',
[DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f',
[DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u',
+ [DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO] = 'x',
[DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO] = 'o',
[DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b',
[DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd',
@@ -129,6 +130,21 @@ int dso__read_binary_type_filename(const struct dso *dso,
snprintf(filename + len, size - len, "%s", dso->long_name);
break;
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ /*
+ * Ubuntu can mixup /usr/lib with /lib, putting debuginfo in
+ * /usr/lib/debug/lib when it is expected to be in
+ * /usr/lib/debug/usr/lib
+ */
+ if (strlen(dso->long_name) < 9 ||
+ strncmp(dso->long_name, "/usr/lib/", 9)) {
+ ret = -1;
+ break;
+ }
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+ snprintf(filename + len, size - len, "%s", dso->long_name + 4);
+ break;
+
case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
{
const char *last_slash;
@@ -156,9 +172,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
break;
}
- build_id__sprintf(dso->build_id,
- sizeof(dso->build_id),
- build_id_hex);
+ build_id__sprintf(&dso->bid, build_id_hex);
len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/");
snprintf(filename + len, size - len, "%.2s/%s.debug",
build_id_hex, build_id_hex + 2);
@@ -191,6 +205,8 @@ int dso__read_binary_type_filename(const struct dso *dso,
case DSO_BINARY_TYPE__GUEST_KALLSYMS:
case DSO_BINARY_TYPE__JAVA_JIT:
case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
case DSO_BINARY_TYPE__NOT_FOUND:
ret = -1;
break;
@@ -881,6 +897,8 @@ static struct dso_cache *dso_cache__populate(struct dso *dso,
if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
*ret = bpf_read(dso, cache_offset, cache->data);
+ else if (dso->binary_type == DSO_BINARY_TYPE__OOL)
+ *ret = DSO__DATA_CACHE_SIZE;
else
*ret = file_read(dso, machine, cache_offset, cache->data);
@@ -1245,7 +1263,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id)
dso->has_build_id = 0;
dso->has_srcline = 1;
dso->a2l_fails = 1;
- dso->kernel = DSO_TYPE_USER;
+ dso->kernel = DSO_SPACE__USER;
dso->needs_swap = DSO_SWAP__UNSET;
dso->comp = COMP_ID__NONE;
RB_CLEAR_NODE(&dso->rb_node);
@@ -1308,15 +1326,16 @@ void dso__put(struct dso *dso)
dso__delete(dso);
}
-void dso__set_build_id(struct dso *dso, void *build_id)
+void dso__set_build_id(struct dso *dso, struct build_id *bid)
{
- memcpy(dso->build_id, build_id, sizeof(dso->build_id));
+ dso->bid = *bid;
dso->has_build_id = 1;
}
-bool dso__build_id_equal(const struct dso *dso, u8 *build_id)
+bool dso__build_id_equal(const struct dso *dso, struct build_id *bid)
{
- return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0;
+ return dso->bid.size == bid->size &&
+ memcmp(dso->bid.data, bid->data, dso->bid.size) == 0;
}
void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
@@ -1326,8 +1345,7 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
if (machine__is_default_guest(machine))
return;
sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
- if (sysfs__read_build_id(path, dso->build_id,
- sizeof(dso->build_id)) == 0)
+ if (sysfs__read_build_id(path, &dso->bid) == 0)
dso->has_build_id = true;
}
@@ -1345,18 +1363,17 @@ int dso__kernel_module_get_build_id(struct dso *dso,
"%s/sys/module/%.*s/notes/.note.gnu.build-id",
root_dir, (int)strlen(name) - 1, name);
- if (sysfs__read_build_id(filename, dso->build_id,
- sizeof(dso->build_id)) == 0)
+ if (sysfs__read_build_id(filename, &dso->bid) == 0)
dso->has_build_id = true;
return 0;
}
-size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
+static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
{
char sbuild_id[SBUILD_ID_SIZE];
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
return fprintf(fp, "%s", sbuild_id);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 2db64b79617a..d8cb4f5680a4 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -30,6 +30,7 @@ enum dso_binary_type {
DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
DSO_BINARY_TYPE__GUEST_KMODULE,
@@ -40,13 +41,15 @@ enum dso_binary_type {
DSO_BINARY_TYPE__GUEST_KCORE,
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
DSO_BINARY_TYPE__BPF_PROG_INFO,
+ DSO_BINARY_TYPE__BPF_IMAGE,
+ DSO_BINARY_TYPE__OOL,
DSO_BINARY_TYPE__NOT_FOUND,
};
-enum dso_kernel_type {
- DSO_TYPE_USER = 0,
- DSO_TYPE_KERNEL,
- DSO_TYPE_GUEST_KERNEL
+enum dso_space_type {
+ DSO_SPACE__USER = 0,
+ DSO_SPACE__KERNEL,
+ DSO_SPACE__KERNEL_GUEST
};
enum dso_swap_type {
@@ -136,7 +139,7 @@ struct dso_cache {
struct rb_node rb_node;
u64 offset;
u64 size;
- char data[0];
+ char data[];
};
struct auxtrace_cache;
@@ -157,7 +160,7 @@ struct dso {
void *a2l;
char *symsrc_filename;
unsigned int a2l_fails;
- enum dso_kernel_type kernel;
+ enum dso_space_type kernel;
enum dso_swap_type needs_swap;
enum dso_binary_type symtab_type;
enum dso_binary_type binary_type;
@@ -173,7 +176,7 @@ struct dso {
bool sorted_by_name;
bool loaded;
u8 rel;
- u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
u64 text_offset;
const char *short_name;
const char *long_name;
@@ -208,7 +211,7 @@ struct dso {
struct nsinfo *nsinfo;
struct dso_id id;
refcount_t refcnt;
- char name[0];
+ char name[];
};
/* dso__for_each_symbol - iterate over the symbols of given type
@@ -257,8 +260,8 @@ bool dso__sorted_by_name(const struct dso *dso);
void dso__set_sorted_by_name(struct dso *dso);
void dso__sort_by_name(struct dso *dso);
-void dso__set_build_id(struct dso *dso, void *build_id);
-bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
+void dso__set_build_id(struct dso *dso, struct build_id *bid);
+bool dso__build_id_equal(const struct dso *dso, struct build_id *bid);
void dso__read_running_kernel_build_id(struct dso *dso,
struct machine *machine);
int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir);
@@ -359,7 +362,6 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
void dso__reset_find_symbol_cache(struct dso *dso);
-size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
size_t dso__fprintf(struct dso *dso, FILE *fp);
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index 939471731ea6..183a81d5b2f9 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -73,8 +73,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
continue;
}
nsinfo__mountns_enter(pos->nsinfo, &nsc);
- if (filename__read_build_id(pos->long_name, pos->build_id,
- sizeof(pos->build_id)) > 0) {
+ if (filename__read_build_id(pos->long_name, &pos->bid) > 0) {
have_build_id = true;
pos->has_build_id = true;
}
@@ -288,10 +287,12 @@ size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
size_t ret = 0;
list_for_each_entry(pos, head, node) {
+ char sbuild_id[SBUILD_ID_SIZE];
+
if (skip && skip(pos, parm))
continue;
- ret += dso__fprintf_buildid(pos, fp);
- ret += fprintf(fp, " %s\n", pos->long_name);
+ build_id__sprintf(&pos->bid, sbuild_id);
+ ret += fprintf(fp, "%-40s %s\n", sbuild_id, pos->long_name);
}
return ret;
}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 7632075a8792..a12972652006 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -48,6 +48,7 @@ struct perf_env {
char *cpuid;
unsigned long long total_mem;
unsigned int msr_pmu_type;
+ unsigned int max_branches;
int nr_cmdline;
int nr_sibling_cores;
@@ -57,12 +58,14 @@ struct perf_env {
int nr_memory_nodes;
int nr_pmu_mappings;
int nr_groups;
+ int nr_cpu_pmu_caps;
char *cmdline;
const char **cmdline_argv;
char *sibling_cores;
char *sibling_dies;
char *sibling_threads;
char *pmu_mappings;
+ char *cpu_pmu_caps;
struct cpu_topology_map *cpu;
struct cpu_cache_level *caches;
int caches_cnt;
@@ -74,7 +77,6 @@ struct perf_env {
struct numa_node *numa_nodes;
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
- u64 clockid_res_ns;
/*
* bpf_info_lock protects bpf rbtrees. This is needed because the
@@ -97,6 +99,19 @@ struct perf_env {
/* For fast cpu to numa node lookup via perf_env__numa_node */
int *numa_map;
int nr_numa_map;
+
+ /* For real clock time reference. */
+ struct {
+ u64 tod_ns;
+ u64 clockid_ns;
+ u64 clockid_res_ns;
+ int clockid;
+ /*
+ * enabled is valid for report mode, and is true if above
+ * values are set, it's set in process_clock_data
+ */
+ bool enabled;
+ } clock;
};
enum perf_compress_type {
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index dc0e11214ae1..05616d4138a9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -31,6 +31,7 @@
#include "stat.h"
#include "session.h"
#include "bpf-event.h"
+#include "print_binary.h"
#include "tool.h"
#include "../perf.h"
@@ -55,6 +56,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_KSYMBOL] = "KSYMBOL",
[PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
[PERF_RECORD_CGROUP] = "CGROUP",
+ [PERF_RECORD_TEXT_POKE] = "TEXT_POKE",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -267,6 +269,14 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
return machine__process_bpf(machine, event, sample);
}
+int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_text_poke(machine, event, sample);
+}
+
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n",
@@ -388,7 +398,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
if (event->header.type == PERF_RECORD_SWITCH)
return fprintf(fp, " %s\n", in_out);
- return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n",
+ return fprintf(fp, " %s %s pid/tid: %5d/%-5d\n",
in_out, out ? "next" : "prev",
event->context_switch.next_prev_pid,
event->context_switch.next_prev_tid);
@@ -413,7 +423,52 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp)
event->bpf.type, event->bpf.flags, event->bpf.id);
}
-size_t perf_event__fprintf(union perf_event *event, FILE *fp)
+static int text_poke_printer(enum binary_printer_ops op, unsigned int val,
+ void *extra, FILE *fp)
+{
+ bool old = *(bool *)extra;
+
+ switch ((int)op) {
+ case BINARY_PRINT_LINE_BEGIN:
+ return fprintf(fp, " %s bytes:", old ? "Old" : "New");
+ case BINARY_PRINT_NUM_DATA:
+ return fprintf(fp, " %02x", val);
+ case BINARY_PRINT_LINE_END:
+ return fprintf(fp, "\n");
+ default:
+ return 0;
+ }
+}
+
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine, FILE *fp)
+{
+ struct perf_record_text_poke_event *tp = &event->text_poke;
+ size_t ret;
+ bool old;
+
+ ret = fprintf(fp, " %" PRI_lx64 " ", tp->addr);
+ if (machine) {
+ struct addr_location al;
+
+ al.map = maps__find(&machine->kmaps, tp->addr);
+ if (al.map && map__load(al.map) >= 0) {
+ al.addr = al.map->map_ip(al.map, tp->addr);
+ al.sym = map__find_symbol(al.map, al.addr);
+ if (al.sym)
+ ret += symbol__fprintf_symname_offs(al.sym, &al, fp);
+ }
+ }
+ ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len);
+ old = true;
+ ret += binary__fprintf(tp->bytes, tp->old_len, 16, text_poke_printer,
+ &old, fp);
+ old = false;
+ ret += binary__fprintf(tp->bytes + tp->old_len, tp->new_len, 16,
+ text_poke_printer, &old, fp);
+ return ret;
+}
+
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
perf_event__name(event->header.type));
@@ -457,6 +512,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_BPF_EVENT:
ret += perf_event__fprintf_bpf(event, fp);
break;
+ case PERF_RECORD_TEXT_POKE:
+ ret += perf_event__fprintf_text_poke(event, machine, fp);
+ break;
default:
ret += fprintf(fp, "\n");
}
@@ -626,7 +684,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
ret = strlist__has_entry(symbol_conf.sym_list,
al->sym->name);
}
- if (!(ret && al->sym)) {
+ if (!ret && al->sym) {
snprintf(al_addr_str, sz, "0x%"PRIx64,
al->map->unmap_ip(al->map, al->sym->start));
ret = strlist__has_entry(symbol_conf.sym_list,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b8289f160f07..b828b99176f4 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -79,7 +79,7 @@ struct sample_read {
struct ip_callchain {
u64 nr;
- u64 ips[0];
+ u64 ips[];
};
struct branch_stack;
@@ -351,6 +351,10 @@ int perf_event__process_bpf(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_text_poke(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -385,7 +389,8 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
-size_t perf_event__fprintf(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp);
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp);
int kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name, u64 *addr);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 1548237b6558..8bdf3d2c907c 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -23,6 +23,7 @@
#include "asm/bug.h"
#include "bpf-event.h"
#include "util/string2.h"
+#include "util/perf_api_probe.h"
#include <signal.h>
#include <unistd.h>
#include <sched.h>
@@ -62,6 +63,9 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
perf_evlist__set_maps(&evlist->core, cpus, threads);
evlist->workload.pid = -1;
evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
+ evlist->ctl_fd.fd = -1;
+ evlist->ctl_fd.ack = -1;
+ evlist->ctl_fd.pos = -1;
}
struct evlist *evlist__new(void)
@@ -78,7 +82,7 @@ struct evlist *perf_evlist__new_default(void)
{
struct evlist *evlist = evlist__new();
- if (evlist && perf_evlist__add_default(evlist)) {
+ if (evlist && evlist__add_default(evlist)) {
evlist__delete(evlist);
evlist = NULL;
}
@@ -90,7 +94,7 @@ struct evlist *perf_evlist__new_dummy(void)
{
struct evlist *evlist = evlist__new();
- if (evlist && perf_evlist__add_dummy(evlist)) {
+ if (evlist && evlist__add_dummy(evlist)) {
evlist__delete(evlist);
evlist = NULL;
}
@@ -118,7 +122,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel)
- perf_evsel__calc_id_pos(evsel);
+ evsel__calc_id_pos(evsel);
perf_evlist__set_id_pos(evlist);
}
@@ -230,9 +234,9 @@ void perf_evlist__set_leader(struct evlist *evlist)
}
}
-int __perf_evlist__add_default(struct evlist *evlist, bool precise)
+int __evlist__add_default(struct evlist *evlist, bool precise)
{
- struct evsel *evsel = perf_evsel__new_cycles(precise);
+ struct evsel *evsel = evsel__new_cycles(precise);
if (evsel == NULL)
return -ENOMEM;
@@ -241,14 +245,14 @@ int __perf_evlist__add_default(struct evlist *evlist, bool precise)
return 0;
}
-int perf_evlist__add_dummy(struct evlist *evlist)
+int evlist__add_dummy(struct evlist *evlist)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_DUMMY,
.size = sizeof(attr), /* to capture ABI version */
};
- struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
+ struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries);
if (evsel == NULL)
return -ENOMEM;
@@ -257,15 +261,14 @@ int perf_evlist__add_dummy(struct evlist *evlist)
return 0;
}
-static int evlist__add_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs, size_t nr_attrs)
+static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
struct evsel *evsel, *n;
LIST_HEAD(head);
size_t i;
for (i = 0; i < nr_attrs; i++) {
- evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
+ evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
if (evsel == NULL)
goto out_delete_partial_list;
list_add_tail(&evsel->core.node, &head);
@@ -281,8 +284,7 @@ out_delete_partial_list:
return -1;
}
-int __perf_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs, size_t nr_attrs)
+int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
size_t i;
@@ -321,10 +323,9 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
return NULL;
}
-int perf_evlist__add_newtp(struct evlist *evlist,
- const char *sys, const char *name, void *handler)
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler)
{
- struct evsel *evsel = perf_evsel__newtp(sys, name);
+ struct evsel *evsel = evsel__newtp(sys, name);
if (IS_ERR(evsel))
return -1;
@@ -379,25 +380,36 @@ void evlist__disable(struct evlist *evlist)
{
struct evsel *pos;
struct affinity affinity;
- int cpu, i;
+ int cpu, i, imm = 0;
+ bool has_imm = false;
if (affinity__setup(&affinity) < 0)
return;
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
-
- evlist__for_each_entry(evlist, pos) {
- if (evsel__cpu_iter_skip(pos, cpu))
- continue;
- if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
- continue;
- evsel__disable_cpu(pos, pos->cpu_iter - 1);
+ /* Disable 'immediate' events last */
+ for (imm = 0; imm <= 1; imm++) {
+ evlist__for_each_cpu(evlist, i, cpu) {
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evlist, pos) {
+ if (evsel__cpu_iter_skip(pos, cpu))
+ continue;
+ if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ if (pos->immediate)
+ has_imm = true;
+ if (pos->immediate != imm)
+ continue;
+ evsel__disable_cpu(pos, pos->cpu_iter - 1);
+ }
}
+ if (!has_imm)
+ break;
}
+
affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
pos->disabled = true;
}
@@ -420,14 +432,14 @@ void evlist__enable(struct evlist *evlist)
evlist__for_each_entry(evlist, pos) {
if (evsel__cpu_iter_skip(pos, cpu))
continue;
- if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
evsel__enable_cpu(pos, pos->cpu_iter - 1);
}
}
affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
pos->disabled = false;
}
@@ -488,7 +500,7 @@ int perf_evlist__enable_event_idx(struct evlist *evlist,
int evlist__add_pollfd(struct evlist *evlist, int fd)
{
- return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
+ return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default);
}
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
@@ -528,7 +540,7 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
if (sid)
return container_of(sid->evsel, struct evsel, core);
- if (!perf_evlist__sample_id_all(evlist))
+ if (!evlist__sample_id_all(evlist))
return evlist__first(evlist);
return NULL;
@@ -934,6 +946,10 @@ int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
perf_evlist__set_maps(&evlist->core, cpus, threads);
+ /* as evlist now has references, put count here */
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
+
return 0;
out_delete_threads:
@@ -947,7 +963,7 @@ void __perf_evlist__set_sample_bit(struct evlist *evlist,
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel)
- __perf_evsel__set_sample_bit(evsel, bit);
+ __evsel__set_sample_bit(evsel, bit);
}
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
@@ -956,7 +972,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist,
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel)
- __perf_evsel__reset_sample_bit(evsel, bit);
+ __evsel__reset_sample_bit(evsel, bit);
}
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
@@ -994,7 +1010,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
continue;
- err = perf_evsel__set_filter(evsel, filter);
+ err = evsel__set_filter(evsel, filter);
if (err)
break;
}
@@ -1014,7 +1030,7 @@ int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
continue;
- err = perf_evsel__append_tp_filter(evsel, filter);
+ err = evsel__append_tp_filter(evsel, filter);
if (err)
break;
}
@@ -1076,7 +1092,7 @@ int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
}
-bool perf_evlist__valid_sample_type(struct evlist *evlist)
+bool evlist__valid_sample_type(struct evlist *evlist)
{
struct evsel *pos;
@@ -1095,7 +1111,7 @@ bool perf_evlist__valid_sample_type(struct evlist *evlist)
return true;
}
-u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
+u64 __evlist__combined_sample_type(struct evlist *evlist)
{
struct evsel *evsel;
@@ -1108,13 +1124,13 @@ u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
return evlist->combined_sample_type;
}
-u64 perf_evlist__combined_sample_type(struct evlist *evlist)
+u64 evlist__combined_sample_type(struct evlist *evlist)
{
evlist->combined_sample_type = 0;
- return __perf_evlist__combined_sample_type(evlist);
+ return __evlist__combined_sample_type(evlist);
}
-u64 perf_evlist__combined_branch_type(struct evlist *evlist)
+u64 evlist__combined_branch_type(struct evlist *evlist)
{
struct evsel *evsel;
u64 branch_type = 0;
@@ -1131,8 +1147,10 @@ bool perf_evlist__valid_read_format(struct evlist *evlist)
u64 sample_type = first->core.attr.sample_type;
evlist__for_each_entry(evlist, pos) {
- if (read_format != pos->core.attr.read_format)
- return false;
+ if (read_format != pos->core.attr.read_format) {
+ pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n",
+ read_format, (u64)pos->core.attr.read_format);
+ }
}
/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
@@ -1177,7 +1195,7 @@ out:
return size;
}
-bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
+bool evlist__valid_sample_id_all(struct evlist *evlist)
{
struct evsel *first = evlist__first(evlist), *pos = first;
@@ -1189,7 +1207,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
return true;
}
-bool perf_evlist__sample_id_all(struct evlist *evlist)
+bool evlist__sample_id_all(struct evlist *evlist)
{
struct evsel *first = evlist__first(evlist);
return first->core.attr.sample_id_all;
@@ -1259,11 +1277,12 @@ static int perf_evlist__create_syswide_maps(struct evlist *evlist)
goto out_put;
perf_evlist__set_maps(&evlist->core, cpus, threads);
-out:
- return err;
+
+ perf_thread_map__put(threads);
out_put:
perf_cpu_map__put(cpus);
- goto out;
+out:
+ return err;
}
int evlist__open(struct evlist *evlist)
@@ -1436,7 +1455,7 @@ int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
if (!evsel)
return -EFAULT;
- return perf_evsel__parse_sample(evsel, event, sample);
+ return evsel__parse_sample(evsel, event, sample);
}
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
@@ -1447,11 +1466,10 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
if (!evsel)
return -EFAULT;
- return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
+ return evsel__parse_sample_timestamp(evsel, event, timestamp);
}
-int perf_evlist__strerror_open(struct evlist *evlist,
- int err, char *buf, size_t size)
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size)
{
int printed, value;
char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
@@ -1504,7 +1522,7 @@ out_default:
return 0;
}
-int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
{
char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
@@ -1552,6 +1570,18 @@ void perf_evlist__to_front(struct evlist *evlist,
list_splice(&move, &evlist->core.entries);
}
+struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->tracking)
+ return evsel;
+ }
+
+ return evlist__first(evlist);
+}
+
void perf_evlist__set_tracking_event(struct evlist *evlist,
struct evsel *tracking_evsel)
{
@@ -1702,132 +1732,251 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
return leader;
}
-int perf_evlist__add_sb_event(struct evlist **evlist,
- struct perf_event_attr *attr,
- perf_evsel__sb_cb_t cb,
- void *data)
+static int evlist__parse_control_fifo(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
{
- struct evsel *evsel;
- bool new_evlist = (*evlist) == NULL;
+ char *s, *p;
+ int ret = 0, fd;
- if (*evlist == NULL)
- *evlist = evlist__new();
- if (*evlist == NULL)
- return -1;
+ if (strncmp(str, "fifo:", 5))
+ return -EINVAL;
+
+ str += 5;
+ if (!*str || *str == ',')
+ return -EINVAL;
+
+ s = strdup(str);
+ if (!s)
+ return -ENOMEM;
+
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
- if (!attr->sample_id_all) {
- pr_warning("enabling sample_id_all for all side band events\n");
- attr->sample_id_all = 1;
+ /*
+ * O_RDWR avoids POLLHUPs which is necessary to allow the other
+ * end of a FIFO to be repeatedly opened and closed.
+ */
+ fd = open(s, O_RDWR | O_NONBLOCK | O_CLOEXEC);
+ if (fd < 0) {
+ pr_err("Failed to open '%s'\n", s);
+ ret = -errno;
+ goto out_free;
+ }
+ *ctl_fd = fd;
+ *ctl_fd_close = true;
+
+ if (p && *++p) {
+ /* O_RDWR | O_NONBLOCK means the other end need not be open */
+ fd = open(p, O_RDWR | O_NONBLOCK | O_CLOEXEC);
+ if (fd < 0) {
+ pr_err("Failed to open '%s'\n", p);
+ ret = -errno;
+ goto out_free;
+ }
+ *ctl_fd_ack = fd;
}
- evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
- if (!evsel)
- goto out_err;
+out_free:
+ free(s);
+ return ret;
+}
+
+int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close)
+{
+ char *comma = NULL, *endptr = NULL;
+
+ *ctl_fd_close = false;
+
+ if (strncmp(str, "fd:", 3))
+ return evlist__parse_control_fifo(str, ctl_fd, ctl_fd_ack, ctl_fd_close);
+
+ *ctl_fd = strtoul(&str[3], &endptr, 0);
+ if (endptr == &str[3])
+ return -EINVAL;
+
+ comma = strchr(str, ',');
+ if (comma) {
+ if (endptr != comma)
+ return -EINVAL;
+
+ *ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+ if (endptr == comma + 1 || *endptr != '\0')
+ return -EINVAL;
+ }
- evsel->side_band.cb = cb;
- evsel->side_band.data = data;
- evlist__add(*evlist, evsel);
return 0;
+}
-out_err:
- if (new_evlist) {
- evlist__delete(*evlist);
- *evlist = NULL;
+void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close)
+{
+ if (*ctl_fd_close) {
+ *ctl_fd_close = false;
+ close(ctl_fd);
+ if (ctl_fd_ack >= 0)
+ close(ctl_fd_ack);
}
- return -1;
}
-static void *perf_evlist__poll_thread(void *arg)
+int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
{
- struct evlist *evlist = arg;
- bool draining = false;
- int i, done = 0;
- /*
- * In order to read symbols from other namespaces perf to needs to call
- * setns(2). This isn't permitted if the struct_fs has multiple users.
- * unshare(2) the fs so that we may continue to setns into namespaces
- * that we're observing when, for instance, reading the build-ids at
- * the end of a 'perf record' session.
- */
- unshare(CLONE_FS);
+ if (fd == -1) {
+ pr_debug("Control descriptor is not initialized\n");
+ return 0;
+ }
- while (!done) {
- bool got_data = false;
+ evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
+ fdarray_flag__nonfilterable);
+ if (evlist->ctl_fd.pos < 0) {
+ evlist->ctl_fd.pos = -1;
+ pr_err("Failed to add ctl fd entry: %m\n");
+ return -1;
+ }
+
+ evlist->ctl_fd.fd = fd;
+ evlist->ctl_fd.ack = ack;
+
+ return 0;
+}
- if (evlist->thread.done)
- draining = true;
+bool evlist__ctlfd_initialized(struct evlist *evlist)
+{
+ return evlist->ctl_fd.pos >= 0;
+}
- if (!draining)
- evlist__poll(evlist, 1000);
+int evlist__finalize_ctlfd(struct evlist *evlist)
+{
+ struct pollfd *entries = evlist->core.pollfd.entries;
- for (i = 0; i < evlist->core.nr_mmaps; i++) {
- struct mmap *map = &evlist->mmap[i];
- union perf_event *event;
+ if (!evlist__ctlfd_initialized(evlist))
+ return 0;
- if (perf_mmap__read_init(&map->core))
- continue;
- while ((event = perf_mmap__read_event(&map->core)) != NULL) {
- struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+ entries[evlist->ctl_fd.pos].fd = -1;
+ entries[evlist->ctl_fd.pos].events = 0;
+ entries[evlist->ctl_fd.pos].revents = 0;
- if (evsel && evsel->side_band.cb)
- evsel->side_band.cb(event, evsel->side_band.data);
- else
- pr_warning("cannot locate proper evsel for the side band event\n");
+ evlist->ctl_fd.pos = -1;
+ evlist->ctl_fd.ack = -1;
+ evlist->ctl_fd.fd = -1;
- perf_mmap__consume(&map->core);
- got_data = true;
- }
- perf_mmap__read_done(&map->core);
- }
+ return 0;
+}
- if (draining && !got_data)
- break;
+static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
+ char *cmd_data, size_t data_size)
+{
+ int err;
+ char c;
+ size_t bytes_read = 0;
+
+ *cmd = EVLIST_CTL_CMD_UNSUPPORTED;
+ memset(cmd_data, 0, data_size);
+ data_size--;
+
+ do {
+ err = read(evlist->ctl_fd.fd, &c, 1);
+ if (err > 0) {
+ if (c == '\n' || c == '\0')
+ break;
+ cmd_data[bytes_read++] = c;
+ if (bytes_read == data_size)
+ break;
+ continue;
+ } else if (err == -1) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ err = 0;
+ else
+ pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd);
+ }
+ break;
+ } while (1);
+
+ pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data,
+ bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0");
+
+ if (bytes_read > 0) {
+ if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG,
+ (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_ENABLE;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG,
+ (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_DISABLE;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_SNAPSHOT_TAG,
+ (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_SNAPSHOT;
+ pr_debug("is snapshot\n");
+ }
}
- return NULL;
+
+ return bytes_read ? (int)bytes_read : err;
}
-int perf_evlist__start_sb_thread(struct evlist *evlist,
- struct target *target)
+int evlist__ctlfd_ack(struct evlist *evlist)
{
- struct evsel *counter;
+ int err;
- if (!evlist)
+ if (evlist->ctl_fd.ack == -1)
return 0;
- if (perf_evlist__create_maps(evlist, target))
- goto out_delete_evlist;
+ err = write(evlist->ctl_fd.ack, EVLIST_CTL_CMD_ACK_TAG,
+ sizeof(EVLIST_CTL_CMD_ACK_TAG));
+ if (err == -1)
+ pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist->ctl_fd.ack);
- evlist__for_each_entry(evlist, counter) {
- if (evsel__open(counter, evlist->core.cpus,
- evlist->core.threads) < 0)
- goto out_delete_evlist;
- }
+ return err;
+}
- if (evlist__mmap(evlist, UINT_MAX))
- goto out_delete_evlist;
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
+{
+ int err = 0;
+ char cmd_data[EVLIST_CTL_CMD_MAX_LEN];
+ int ctlfd_pos = evlist->ctl_fd.pos;
+ struct pollfd *entries = evlist->core.pollfd.entries;
- evlist__for_each_entry(evlist, counter) {
- if (evsel__enable(counter))
- goto out_delete_evlist;
- }
+ if (!evlist__ctlfd_initialized(evlist) || !entries[ctlfd_pos].revents)
+ return 0;
- evlist->thread.done = 0;
- if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
- goto out_delete_evlist;
+ if (entries[ctlfd_pos].revents & POLLIN) {
+ err = evlist__ctlfd_recv(evlist, cmd, cmd_data,
+ EVLIST_CTL_CMD_MAX_LEN);
+ if (err > 0) {
+ switch (*cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ evlist__enable(evlist);
+ break;
+ case EVLIST_CTL_CMD_DISABLE:
+ evlist__disable(evlist);
+ break;
+ case EVLIST_CTL_CMD_SNAPSHOT:
+ break;
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ pr_debug("ctlfd: unsupported %d\n", *cmd);
+ break;
+ }
+ if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED ||
+ *cmd == EVLIST_CTL_CMD_SNAPSHOT))
+ evlist__ctlfd_ack(evlist);
+ }
+ }
- return 0;
+ if (entries[ctlfd_pos].revents & (POLLHUP | POLLERR))
+ evlist__finalize_ctlfd(evlist);
+ else
+ entries[ctlfd_pos].revents = 0;
-out_delete_evlist:
- evlist__delete(evlist);
- evlist = NULL;
- return -1;
+ return err;
}
-void perf_evlist__stop_sb_thread(struct evlist *evlist)
+struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
{
- if (!evlist)
- return;
- evlist->thread.done = 1;
- pthread_join(evlist->thread.th, NULL);
- evlist__delete(evlist);
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->idx == idx)
+ return evsel;
+ }
+ return NULL;
}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index f5bd5c386df1..e1a450322bc5 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -74,6 +74,11 @@ struct evlist {
pthread_t th;
volatile int done;
} thread;
+ struct {
+ int fd; /* control file descriptor */
+ int ack; /* ack file descriptor for control commands */
+ int pos; /* index at evlist core object to check signals */
+ } ctl_fd;
};
struct evsel_str_handler {
@@ -92,31 +97,31 @@ void evlist__delete(struct evlist *evlist);
void evlist__add(struct evlist *evlist, struct evsel *entry);
void evlist__remove(struct evlist *evlist, struct evsel *evsel);
-int __perf_evlist__add_default(struct evlist *evlist, bool precise);
+int __evlist__add_default(struct evlist *evlist, bool precise);
-static inline int perf_evlist__add_default(struct evlist *evlist)
+static inline int evlist__add_default(struct evlist *evlist)
{
- return __perf_evlist__add_default(evlist, true);
+ return __evlist__add_default(evlist, true);
}
-int __perf_evlist__add_default_attrs(struct evlist *evlist,
+int __evlist__add_default_attrs(struct evlist *evlist,
struct perf_event_attr *attrs, size_t nr_attrs);
-#define perf_evlist__add_default_attrs(evlist, array) \
- __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+#define evlist__add_default_attrs(evlist, array) \
+ __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
-int perf_evlist__add_dummy(struct evlist *evlist);
+int evlist__add_dummy(struct evlist *evlist);
-int perf_evlist__add_sb_event(struct evlist **evlist,
+int perf_evlist__add_sb_event(struct evlist *evlist,
struct perf_event_attr *attr,
- perf_evsel__sb_cb_t cb,
+ evsel__sb_cb_t cb,
void *data);
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data);
int perf_evlist__start_sb_thread(struct evlist *evlist,
struct target *target);
void perf_evlist__stop_sb_thread(struct evlist *evlist);
-int perf_evlist__add_newtp(struct evlist *evlist,
- const char *sys, const char *name, void *handler);
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler);
int __evlist__set_tracepoints_handlers(struct evlist *evlist,
const struct evsel_str_handler *assocs,
@@ -173,10 +178,6 @@ void evlist__close(struct evlist *evlist);
struct callchain_param;
void perf_evlist__set_id_pos(struct evlist *evlist);
-bool perf_can_sample_identifier(void);
-bool perf_can_record_switch_events(void);
-bool perf_can_record_cpu_wide(void);
-bool perf_can_aux_sample(void);
void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
struct callchain_param *callchain);
int record_opts__config(struct record_opts *opts);
@@ -222,10 +223,10 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
void __perf_evlist__set_leader(struct list_head *list);
void perf_evlist__set_leader(struct evlist *evlist);
-u64 __perf_evlist__combined_sample_type(struct evlist *evlist);
-u64 perf_evlist__combined_sample_type(struct evlist *evlist);
-u64 perf_evlist__combined_branch_type(struct evlist *evlist);
-bool perf_evlist__sample_id_all(struct evlist *evlist);
+u64 __evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_branch_type(struct evlist *evlist);
+bool evlist__sample_id_all(struct evlist *evlist);
u16 perf_evlist__id_hdr_size(struct evlist *evlist);
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
@@ -235,8 +236,8 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
union perf_event *event,
u64 *timestamp);
-bool perf_evlist__valid_sample_type(struct evlist *evlist);
-bool perf_evlist__valid_sample_id_all(struct evlist *evlist);
+bool evlist__valid_sample_type(struct evlist *evlist);
+bool evlist__valid_sample_id_all(struct evlist *evlist);
bool perf_evlist__valid_read_format(struct evlist *evlist);
void perf_evlist__splice_list_tail(struct evlist *evlist,
@@ -261,8 +262,8 @@ static inline struct evsel *evlist__last(struct evlist *evlist)
return container_of(evsel, struct evsel, core);
}
-int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
-int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
bool perf_evlist__can_select_event(struct evlist *evlist, const char *str);
void perf_evlist__to_front(struct evlist *evlist,
@@ -338,6 +339,7 @@ void perf_evlist__to_front(struct evlist *evlist,
evlist__cpu_iter_start(evlist); \
perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus)
+struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist);
void perf_evlist__set_tracking_event(struct evlist *evlist,
struct evsel *tracking_evsel);
@@ -358,4 +360,31 @@ void perf_evlist__force_leader(struct evlist *evlist);
struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
struct evsel *evsel,
bool close);
+#define EVLIST_CTL_CMD_ENABLE_TAG "enable"
+#define EVLIST_CTL_CMD_DISABLE_TAG "disable"
+#define EVLIST_CTL_CMD_ACK_TAG "ack\n"
+#define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot"
+
+#define EVLIST_CTL_CMD_MAX_LEN 64
+
+enum evlist_ctl_cmd {
+ EVLIST_CTL_CMD_UNSUPPORTED = 0,
+ EVLIST_CTL_CMD_ENABLE,
+ EVLIST_CTL_CMD_DISABLE,
+ EVLIST_CTL_CMD_ACK,
+ EVLIST_CTL_CMD_SNAPSHOT,
+};
+
+int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close);
+void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close);
+int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
+int evlist__finalize_ctlfd(struct evlist *evlist);
+bool evlist__ctlfd_initialized(struct evlist *evlist);
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
+int evlist__ctlfd_ack(struct evlist *evlist);
+
+#define EVLIST_ENABLED_MSG "Events enabled\n"
+#define EVLIST_DISABLED_MSG "Events disabled\n"
+
+struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index eb880efbce16..1cad6051d8b0 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -56,14 +56,14 @@ struct perf_missing_features perf_missing_features;
static clockid_t clockid;
-static int perf_evsel__no_extra_init(struct evsel *evsel __maybe_unused)
+static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
{
return 0;
}
void __weak test_attr__ready(void) { }
-static void perf_evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
+static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
{
}
@@ -73,13 +73,12 @@ static struct {
void (*fini)(struct evsel *evsel);
} perf_evsel__object = {
.size = sizeof(struct evsel),
- .init = perf_evsel__no_extra_init,
- .fini = perf_evsel__no_extra_fini,
+ .init = evsel__no_extra_init,
+ .fini = evsel__no_extra_fini,
};
-int perf_evsel__object_config(size_t object_size,
- int (*init)(struct evsel *evsel),
- void (*fini)(struct evsel *evsel))
+int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel),
+ void (*fini)(struct evsel *evsel))
{
if (object_size == 0)
@@ -102,7 +101,7 @@ set_methods:
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
-int __perf_evsel__sample_size(u64 sample_type)
+int __evsel__sample_size(u64 sample_type)
{
u64 mask = sample_type & PERF_SAMPLE_MASK;
int size = 0;
@@ -178,53 +177,53 @@ static int __perf_evsel__calc_is_pos(u64 sample_type)
return idx;
}
-void perf_evsel__calc_id_pos(struct evsel *evsel)
+void evsel__calc_id_pos(struct evsel *evsel)
{
evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
}
-void __perf_evsel__set_sample_bit(struct evsel *evsel,
+void __evsel__set_sample_bit(struct evsel *evsel,
enum perf_event_sample_format bit)
{
if (!(evsel->core.attr.sample_type & bit)) {
evsel->core.attr.sample_type |= bit;
evsel->sample_size += sizeof(u64);
- perf_evsel__calc_id_pos(evsel);
+ evsel__calc_id_pos(evsel);
}
}
-void __perf_evsel__reset_sample_bit(struct evsel *evsel,
+void __evsel__reset_sample_bit(struct evsel *evsel,
enum perf_event_sample_format bit)
{
if (evsel->core.attr.sample_type & bit) {
evsel->core.attr.sample_type &= ~bit;
evsel->sample_size -= sizeof(u64);
- perf_evsel__calc_id_pos(evsel);
+ evsel__calc_id_pos(evsel);
}
}
-void perf_evsel__set_sample_id(struct evsel *evsel,
+void evsel__set_sample_id(struct evsel *evsel,
bool can_sample_identifier)
{
if (can_sample_identifier) {
- perf_evsel__reset_sample_bit(evsel, ID);
- perf_evsel__set_sample_bit(evsel, IDENTIFIER);
+ evsel__reset_sample_bit(evsel, ID);
+ evsel__set_sample_bit(evsel, IDENTIFIER);
} else {
- perf_evsel__set_sample_bit(evsel, ID);
+ evsel__set_sample_bit(evsel, ID);
}
evsel->core.attr.read_format |= PERF_FORMAT_ID;
}
/**
- * perf_evsel__is_function_event - Return whether given evsel is a function
+ * evsel__is_function_event - Return whether given evsel is a function
* trace event
*
* @evsel - evsel selector to be tested
*
* Return %true if event is function trace event
*/
-bool perf_evsel__is_function_event(struct evsel *evsel)
+bool evsel__is_function_event(struct evsel *evsel)
{
#define FUNCTION_EVENT "ftrace:function"
@@ -249,17 +248,18 @@ void evsel__init(struct evsel *evsel,
evsel->bpf_fd = -1;
INIT_LIST_HEAD(&evsel->config_terms);
perf_evsel__object.init(evsel);
- evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
- perf_evsel__calc_id_pos(evsel);
+ evsel->sample_size = __evsel__sample_size(attr->sample_type);
+ evsel__calc_id_pos(evsel);
evsel->cmdline_group_boundary = false;
evsel->metric_expr = NULL;
evsel->metric_name = NULL;
evsel->metric_events = NULL;
+ evsel->per_pkg_mask = NULL;
evsel->collect_stat = false;
evsel->pmu_name = NULL;
}
-struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
{
struct evsel *evsel = zalloc(perf_evsel__object.size);
@@ -267,13 +267,13 @@ struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
return NULL;
evsel__init(evsel, attr, idx);
- if (perf_evsel__is_bpf_output(evsel)) {
+ if (evsel__is_bpf_output(evsel)) {
evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
evsel->core.attr.sample_period = 1;
}
- if (perf_evsel__is_clock(evsel)) {
+ if (evsel__is_clock(evsel)) {
/*
* The evsel->unit points to static alias->unit
* so it's ok to use static string in here.
@@ -292,7 +292,7 @@ static bool perf_event_can_profile_kernel(void)
return perf_event_paranoid_check(1);
}
-struct evsel *perf_evsel__new_cycles(bool precise)
+struct evsel *evsel__new_cycles(bool precise)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
@@ -331,10 +331,114 @@ error_free:
goto out;
}
+static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
+{
+ struct evsel_config_term *pos, *tmp;
+
+ list_for_each_entry(pos, &src->config_terms, list) {
+ tmp = malloc(sizeof(*tmp));
+ if (tmp == NULL)
+ return -ENOMEM;
+
+ *tmp = *pos;
+ if (tmp->free_str) {
+ tmp->val.str = strdup(pos->val.str);
+ if (tmp->val.str == NULL) {
+ free(tmp);
+ return -ENOMEM;
+ }
+ }
+ list_add_tail(&tmp->list, &dst->config_terms);
+ }
+ return 0;
+}
+
+/**
+ * evsel__clone - create a new evsel copied from @orig
+ * @orig: original evsel
+ *
+ * The assumption is that @orig is not configured nor opened yet.
+ * So we only care about the attributes that can be set while it's parsed.
+ */
+struct evsel *evsel__clone(struct evsel *orig)
+{
+ struct evsel *evsel;
+
+ BUG_ON(orig->core.fd);
+ BUG_ON(orig->counts);
+ BUG_ON(orig->priv);
+ BUG_ON(orig->per_pkg_mask);
+
+ /* cannot handle BPF objects for now */
+ if (orig->bpf_obj)
+ return NULL;
+
+ evsel = evsel__new(&orig->core.attr);
+ if (evsel == NULL)
+ return NULL;
+
+ evsel->core.cpus = perf_cpu_map__get(orig->core.cpus);
+ evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus);
+ evsel->core.threads = perf_thread_map__get(orig->core.threads);
+ evsel->core.nr_members = orig->core.nr_members;
+ evsel->core.system_wide = orig->core.system_wide;
+
+ if (orig->name) {
+ evsel->name = strdup(orig->name);
+ if (evsel->name == NULL)
+ goto out_err;
+ }
+ if (orig->group_name) {
+ evsel->group_name = strdup(orig->group_name);
+ if (evsel->group_name == NULL)
+ goto out_err;
+ }
+ if (orig->pmu_name) {
+ evsel->pmu_name = strdup(orig->pmu_name);
+ if (evsel->pmu_name == NULL)
+ goto out_err;
+ }
+ if (orig->filter) {
+ evsel->filter = strdup(orig->filter);
+ if (evsel->filter == NULL)
+ goto out_err;
+ }
+ evsel->cgrp = cgroup__get(orig->cgrp);
+ evsel->tp_format = orig->tp_format;
+ evsel->handler = orig->handler;
+ evsel->leader = orig->leader;
+
+ evsel->max_events = orig->max_events;
+ evsel->tool_event = orig->tool_event;
+ evsel->unit = orig->unit;
+ evsel->scale = orig->scale;
+ evsel->snapshot = orig->snapshot;
+ evsel->per_pkg = orig->per_pkg;
+ evsel->percore = orig->percore;
+ evsel->precise_max = orig->precise_max;
+ evsel->use_uncore_alias = orig->use_uncore_alias;
+ evsel->is_libpfm_event = orig->is_libpfm_event;
+
+ evsel->exclude_GH = orig->exclude_GH;
+ evsel->sample_read = orig->sample_read;
+ evsel->auto_merge_stats = orig->auto_merge_stats;
+ evsel->collect_stat = orig->collect_stat;
+ evsel->weak_group = orig->weak_group;
+
+ if (evsel__copy_config_terms(evsel, orig) < 0)
+ goto out_err;
+
+ return evsel;
+
+out_err:
+ evsel__delete(evsel);
+ return NULL;
+}
+
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
-struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx)
{
struct evsel *evsel = zalloc(perf_evsel__object.size);
int err = -ENOMEM;
@@ -372,7 +476,7 @@ out_err:
return ERR_PTR(err);
}
-const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
+const char *evsel__hw_names[PERF_COUNT_HW_MAX] = {
"cycles",
"instructions",
"cache-references",
@@ -385,10 +489,10 @@ const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
"ref-cycles",
};
-static const char *__perf_evsel__hw_name(u64 config)
+static const char *__evsel__hw_name(u64 config)
{
- if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
- return perf_evsel__hw_names[config];
+ if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config])
+ return evsel__hw_names[config];
return "unknown-hardware";
}
@@ -429,13 +533,13 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
return r;
}
-static int perf_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
{
- int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->core.attr.config));
+ int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
}
-const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
+const char *evsel__sw_names[PERF_COUNT_SW_MAX] = {
"cpu-clock",
"task-clock",
"page-faults",
@@ -448,20 +552,20 @@ const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
"dummy",
};
-static const char *__perf_evsel__sw_name(u64 config)
+static const char *__evsel__sw_name(u64 config)
{
- if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
- return perf_evsel__sw_names[config];
+ if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config])
+ return evsel__sw_names[config];
return "unknown-software";
}
-static int perf_evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
{
- int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->core.attr.config));
+ int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
}
-static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
+static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
{
int r;
@@ -479,15 +583,14 @@ static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
return r;
}
-static int perf_evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
{
struct perf_event_attr *attr = &evsel->core.attr;
- int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
+ int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
}
-const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
- [PERF_EVSEL__MAX_ALIASES] = {
+const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
{ "L1-dcache", "l1-d", "l1d", "L1-data", },
{ "L1-icache", "l1-i", "l1i", "L1-instruction", },
{ "LLC", "L2", },
@@ -497,15 +600,13 @@ const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
{ "node", },
};
-const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_EVSEL__MAX_ALIASES] = {
+const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
{ "load", "loads", "read", },
{ "store", "stores", "write", },
{ "prefetch", "prefetches", "speculative-read", "speculative-load", },
};
-const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
- [PERF_EVSEL__MAX_ALIASES] = {
+const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
{ "refs", "Reference", "ops", "access", },
{ "misses", "miss", },
};
@@ -521,7 +622,7 @@ const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
* L1I : Read and prefetch only
* ITLB and BPU : Read-only
*/
-static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
+static unsigned long evsel__hw_cache_stat[C(MAX)] = {
[C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
[C(L1I)] = (CACHE_READ | CACHE_PREFETCH),
[C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
@@ -531,28 +632,27 @@ static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
[C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
};
-bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
+bool evsel__is_cache_op_valid(u8 type, u8 op)
{
- if (perf_evsel__hw_cache_stat[type] & COP(op))
+ if (evsel__hw_cache_stat[type] & COP(op))
return true; /* valid */
else
return false; /* invalid */
}
-int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
- char *bf, size_t size)
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size)
{
if (result) {
- return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
- perf_evsel__hw_cache_op[op][0],
- perf_evsel__hw_cache_result[result][0]);
+ return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0],
+ evsel__hw_cache_op[op][0],
+ evsel__hw_cache_result[result][0]);
}
- return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
- perf_evsel__hw_cache_op[op][1]);
+ return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0],
+ evsel__hw_cache_op[op][1]);
}
-static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
+static int __evsel__hw_cache_name(u64 config, char *bf, size_t size)
{
u8 op, result, type = (config >> 0) & 0xff;
const char *err = "unknown-ext-hardware-cache-type";
@@ -571,33 +671,33 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
goto out_err;
err = "invalid-cache";
- if (!perf_evsel__is_cache_op_valid(type, op))
+ if (!evsel__is_cache_op_valid(type, op))
goto out_err;
- return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
+ return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
out_err:
return scnprintf(bf, size, "%s", err);
}
-static int perf_evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
{
- int ret = __perf_evsel__hw_cache_name(evsel->core.attr.config, bf, size);
+ int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
}
-static int perf_evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
{
int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
}
-static int perf_evsel__tool_name(char *bf, size_t size)
+static int evsel__tool_name(char *bf, size_t size)
{
int ret = scnprintf(bf, size, "duration_time");
return ret;
}
-const char *perf_evsel__name(struct evsel *evsel)
+const char *evsel__name(struct evsel *evsel)
{
char bf[128];
@@ -609,22 +709,22 @@ const char *perf_evsel__name(struct evsel *evsel)
switch (evsel->core.attr.type) {
case PERF_TYPE_RAW:
- perf_evsel__raw_name(evsel, bf, sizeof(bf));
+ evsel__raw_name(evsel, bf, sizeof(bf));
break;
case PERF_TYPE_HARDWARE:
- perf_evsel__hw_name(evsel, bf, sizeof(bf));
+ evsel__hw_name(evsel, bf, sizeof(bf));
break;
case PERF_TYPE_HW_CACHE:
- perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
+ evsel__hw_cache_name(evsel, bf, sizeof(bf));
break;
case PERF_TYPE_SOFTWARE:
if (evsel->tool_event)
- perf_evsel__tool_name(bf, sizeof(bf));
+ evsel__tool_name(bf, sizeof(bf));
else
- perf_evsel__sw_name(evsel, bf, sizeof(bf));
+ evsel__sw_name(evsel, bf, sizeof(bf));
break;
case PERF_TYPE_TRACEPOINT:
@@ -632,7 +732,7 @@ const char *perf_evsel__name(struct evsel *evsel)
break;
case PERF_TYPE_BREAKPOINT:
- perf_evsel__bp_name(evsel, bf, sizeof(bf));
+ evsel__bp_name(evsel, bf, sizeof(bf));
break;
default:
@@ -649,7 +749,7 @@ out_unknown:
return "unknown";
}
-const char *perf_evsel__group_name(struct evsel *evsel)
+const char *evsel__group_name(struct evsel *evsel)
{
return evsel->group_name ?: "anon group";
}
@@ -664,21 +764,19 @@ const char *perf_evsel__group_name(struct evsel *evsel)
* For record -e 'cycles,instructions' and report --group
* 'cycles:u, instructions:u'
*/
-int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
{
int ret = 0;
struct evsel *pos;
- const char *group_name = perf_evsel__group_name(evsel);
+ const char *group_name = evsel__group_name(evsel);
if (!evsel->forced_leader)
ret = scnprintf(buf, size, "%s { ", group_name);
- ret += scnprintf(buf + ret, size - ret, "%s",
- perf_evsel__name(evsel));
+ ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel));
for_each_group_member(pos, evsel)
- ret += scnprintf(buf + ret, size - ret, ", %s",
- perf_evsel__name(pos));
+ ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos));
if (!evsel->forced_leader)
ret += scnprintf(buf + ret, size - ret, " }");
@@ -686,14 +784,13 @@ int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
return ret;
}
-static void __perf_evsel__config_callchain(struct evsel *evsel,
- struct record_opts *opts,
- struct callchain_param *param)
+static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *param)
{
- bool function = perf_evsel__is_function_event(evsel);
+ bool function = evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->core.attr;
- perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+ evsel__set_sample_bit(evsel, CALLCHAIN);
attr->sample_max_stack = param->max_stack;
@@ -708,7 +805,7 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
"to get user callchain information. "
"Falling back to framepointers.\n");
} else {
- perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ evsel__set_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
PERF_SAMPLE_BRANCH_CALL_STACK |
PERF_SAMPLE_BRANCH_NO_CYCLES |
@@ -722,8 +819,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
if (param->record_mode == CALLCHAIN_DWARF) {
if (!function) {
- perf_evsel__set_sample_bit(evsel, REGS_USER);
- perf_evsel__set_sample_bit(evsel, STACK_USER);
+ evsel__set_sample_bit(evsel, REGS_USER);
+ evsel__set_sample_bit(evsel, STACK_USER);
if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
attr->sample_regs_user |= DWARF_MINIMAL_REGS;
pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
@@ -746,12 +843,11 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
}
}
-void perf_evsel__config_callchain(struct evsel *evsel,
- struct record_opts *opts,
- struct callchain_param *param)
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *param)
{
if (param->enabled)
- return __perf_evsel__config_callchain(evsel, opts, param);
+ return __evsel__config_callchain(evsel, opts, param);
}
static void
@@ -760,23 +856,23 @@ perf_evsel__reset_callgraph(struct evsel *evsel,
{
struct perf_event_attr *attr = &evsel->core.attr;
- perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ evsel__reset_sample_bit(evsel, CALLCHAIN);
if (param->record_mode == CALLCHAIN_LBR) {
- perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ evsel__reset_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
PERF_SAMPLE_BRANCH_CALL_STACK |
PERF_SAMPLE_BRANCH_HW_INDEX);
}
if (param->record_mode == CALLCHAIN_DWARF) {
- perf_evsel__reset_sample_bit(evsel, REGS_USER);
- perf_evsel__reset_sample_bit(evsel, STACK_USER);
+ evsel__reset_sample_bit(evsel, REGS_USER);
+ evsel__reset_sample_bit(evsel, STACK_USER);
}
}
-static void apply_config_terms(struct evsel *evsel,
- struct record_opts *opts, bool track)
+static void evsel__apply_config_terms(struct evsel *evsel,
+ struct record_opts *opts, bool track)
{
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms;
struct perf_event_attr *attr = &evsel->core.attr;
/* callgraph default */
@@ -789,69 +885,69 @@ static void apply_config_terms(struct evsel *evsel,
list_for_each_entry(term, config_terms, list) {
switch (term->type) {
- case PERF_EVSEL__CONFIG_TERM_PERIOD:
+ case EVSEL__CONFIG_TERM_PERIOD:
if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
attr->sample_period = term->val.period;
attr->freq = 0;
- perf_evsel__reset_sample_bit(evsel, PERIOD);
+ evsel__reset_sample_bit(evsel, PERIOD);
}
break;
- case PERF_EVSEL__CONFIG_TERM_FREQ:
+ case EVSEL__CONFIG_TERM_FREQ:
if (!(term->weak && opts->user_freq != UINT_MAX)) {
attr->sample_freq = term->val.freq;
attr->freq = 1;
- perf_evsel__set_sample_bit(evsel, PERIOD);
+ evsel__set_sample_bit(evsel, PERIOD);
}
break;
- case PERF_EVSEL__CONFIG_TERM_TIME:
+ case EVSEL__CONFIG_TERM_TIME:
if (term->val.time)
- perf_evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, TIME);
else
- perf_evsel__reset_sample_bit(evsel, TIME);
+ evsel__reset_sample_bit(evsel, TIME);
break;
- case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+ case EVSEL__CONFIG_TERM_CALLGRAPH:
callgraph_buf = term->val.str;
break;
- case PERF_EVSEL__CONFIG_TERM_BRANCH:
+ case EVSEL__CONFIG_TERM_BRANCH:
if (term->val.str && strcmp(term->val.str, "no")) {
- perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ evsel__set_sample_bit(evsel, BRANCH_STACK);
parse_branch_str(term->val.str,
&attr->branch_sample_type);
} else
- perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ evsel__reset_sample_bit(evsel, BRANCH_STACK);
break;
- case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+ case EVSEL__CONFIG_TERM_STACK_USER:
dump_size = term->val.stack_user;
break;
- case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
+ case EVSEL__CONFIG_TERM_MAX_STACK:
max_stack = term->val.max_stack;
break;
- case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
+ case EVSEL__CONFIG_TERM_MAX_EVENTS:
evsel->max_events = term->val.max_events;
break;
- case PERF_EVSEL__CONFIG_TERM_INHERIT:
+ case EVSEL__CONFIG_TERM_INHERIT:
/*
* attr->inherit should has already been set by
- * perf_evsel__config. If user explicitly set
+ * evsel__config. If user explicitly set
* inherit using config terms, override global
* opt->no_inherit setting.
*/
attr->inherit = term->val.inherit ? 1 : 0;
break;
- case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
+ case EVSEL__CONFIG_TERM_OVERWRITE:
attr->write_backward = term->val.overwrite ? 1 : 0;
break;
- case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
+ case EVSEL__CONFIG_TERM_DRV_CFG:
break;
- case PERF_EVSEL__CONFIG_TERM_PERCORE:
+ case EVSEL__CONFIG_TERM_PERCORE:
break;
- case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT:
+ case EVSEL__CONFIG_TERM_AUX_OUTPUT:
attr->aux_output = term->val.aux_output ? 1 : 0;
break;
- case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
+ case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
/* Already applied by auxtrace */
break;
- case PERF_EVSEL__CONFIG_TERM_CFG_CHG:
+ case EVSEL__CONFIG_TERM_CFG_CHG:
break;
default:
break;
@@ -897,25 +993,18 @@ static void apply_config_terms(struct evsel *evsel,
/* set perf-event callgraph */
if (param.enabled) {
if (sample_address) {
- perf_evsel__set_sample_bit(evsel, ADDR);
- perf_evsel__set_sample_bit(evsel, DATA_SRC);
+ evsel__set_sample_bit(evsel, ADDR);
+ evsel__set_sample_bit(evsel, DATA_SRC);
evsel->core.attr.mmap_data = track;
}
- perf_evsel__config_callchain(evsel, opts, &param);
+ evsel__config_callchain(evsel, opts, &param);
}
}
}
-static bool is_dummy_event(struct evsel *evsel)
-{
- return (evsel->core.attr.type == PERF_TYPE_SOFTWARE) &&
- (evsel->core.attr.config == PERF_COUNT_SW_DUMMY);
-}
-
-struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel,
- enum evsel_term_type type)
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
{
- struct perf_evsel_config_term *term, *found_term = NULL;
+ struct evsel_config_term *term, *found_term = NULL;
list_for_each_entry(term, &evsel->config_terms, list) {
if (term->type == type)
@@ -953,8 +1042,8 @@ struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel
* enable/disable events specifically, as there's no
* initial traced exec call.
*/
-void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
- struct callchain_param *callchain)
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain)
{
struct evsel *leader = evsel->leader;
struct perf_event_attr *attr = &evsel->core.attr;
@@ -965,17 +1054,17 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->inherit = !opts->no_inherit;
attr->write_backward = opts->overwrite ? 1 : 0;
- perf_evsel__set_sample_bit(evsel, IP);
- perf_evsel__set_sample_bit(evsel, TID);
+ evsel__set_sample_bit(evsel, IP);
+ evsel__set_sample_bit(evsel, TID);
if (evsel->sample_read) {
- perf_evsel__set_sample_bit(evsel, READ);
+ evsel__set_sample_bit(evsel, READ);
/*
* We need ID even in case of single event, because
* PERF_SAMPLE_READ process ID specific data.
*/
- perf_evsel__set_sample_id(evsel, false);
+ evsel__set_sample_id(evsel, false);
/*
* Apply group format only if we belong to group
@@ -991,35 +1080,20 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
* We default some events to have a default interval. But keep
* it a weak assumption overridable by the user.
*/
- if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
- opts->user_interval != ULLONG_MAX)) {
+ if (!attr->sample_period) {
if (opts->freq) {
- perf_evsel__set_sample_bit(evsel, PERIOD);
attr->freq = 1;
attr->sample_freq = opts->freq;
} else {
attr->sample_period = opts->default_interval;
}
}
-
/*
- * Disable sampling for all group members other
- * than leader in case leader 'leads' the sampling.
+ * If attr->freq was set (here or earlier), ask for period
+ * to be sampled.
*/
- if ((leader != evsel) && leader->sample_read) {
- attr->freq = 0;
- attr->sample_freq = 0;
- attr->sample_period = 0;
- attr->write_backward = 0;
-
- /*
- * We don't get sample for slave events, we make them
- * when delivering group leader sample. Set the slave
- * event to follow the master sample_type to ease up
- * report.
- */
- attr->sample_type = leader->core.attr.sample_type;
- }
+ if (attr->freq)
+ evsel__set_sample_bit(evsel, PERIOD);
if (opts->no_samples)
attr->sample_freq = 0;
@@ -1033,7 +1107,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
}
if (opts->sample_address) {
- perf_evsel__set_sample_bit(evsel, ADDR);
+ evsel__set_sample_bit(evsel, ADDR);
attr->mmap_data = track;
}
@@ -1042,24 +1116,26 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
* event, due to issues with page faults while tracing page
* fault handler and its overall trickiness nature.
*/
- if (perf_evsel__is_function_event(evsel))
+ if (evsel__is_function_event(evsel))
evsel->core.attr.exclude_callchain_user = 1;
if (callchain && callchain->enabled && !evsel->no_aux_samples)
- perf_evsel__config_callchain(evsel, opts, callchain);
+ evsel__config_callchain(evsel, opts, callchain);
- if (opts->sample_intr_regs) {
+ if (opts->sample_intr_regs && !evsel->no_aux_samples &&
+ !evsel__is_dummy_event(evsel)) {
attr->sample_regs_intr = opts->sample_intr_regs;
- perf_evsel__set_sample_bit(evsel, REGS_INTR);
+ evsel__set_sample_bit(evsel, REGS_INTR);
}
- if (opts->sample_user_regs) {
+ if (opts->sample_user_regs && !evsel->no_aux_samples &&
+ !evsel__is_dummy_event(evsel)) {
attr->sample_regs_user |= opts->sample_user_regs;
- perf_evsel__set_sample_bit(evsel, REGS_USER);
+ evsel__set_sample_bit(evsel, REGS_USER);
}
if (target__has_cpu(&opts->target) || opts->sample_cpu)
- perf_evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, CPU);
/*
* When the user explicitly disabled time don't force it here.
@@ -1068,37 +1144,42 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
(!perf_missing_features.sample_id_all &&
(!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
opts->sample_time_set)))
- perf_evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, TIME);
if (opts->raw_samples && !evsel->no_aux_samples) {
- perf_evsel__set_sample_bit(evsel, TIME);
- perf_evsel__set_sample_bit(evsel, RAW);
- perf_evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, RAW);
+ evsel__set_sample_bit(evsel, CPU);
}
if (opts->sample_address)
- perf_evsel__set_sample_bit(evsel, DATA_SRC);
+ evsel__set_sample_bit(evsel, DATA_SRC);
if (opts->sample_phys_addr)
- perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
+ evsel__set_sample_bit(evsel, PHYS_ADDR);
if (opts->no_buffering) {
attr->watermark = 0;
attr->wakeup_events = 1;
}
if (opts->branch_stack && !evsel->no_aux_samples) {
- perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ evsel__set_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type = opts->branch_stack;
}
if (opts->sample_weight)
- perf_evsel__set_sample_bit(evsel, WEIGHT);
+ evsel__set_sample_bit(evsel, WEIGHT);
attr->task = track;
attr->mmap = track;
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
- attr->ksymbol = track && !perf_missing_features.ksymbol;
+ /*
+ * ksymbol is tracked separately with text poke because it needs to be
+ * system wide and enabled immediately.
+ */
+ if (!opts->text_poke)
+ attr->ksymbol = track && !perf_missing_features.ksymbol;
attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
if (opts->record_namespaces)
@@ -1106,14 +1187,14 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
if (opts->record_cgroup) {
attr->cgroup = track && !perf_missing_features.cgroup;
- perf_evsel__set_sample_bit(evsel, CGROUP);
+ evsel__set_sample_bit(evsel, CGROUP);
}
if (opts->record_switch_events)
attr->context_switch = track;
if (opts->sample_transaction)
- perf_evsel__set_sample_bit(evsel, TRANSACTION);
+ evsel__set_sample_bit(evsel, TRANSACTION);
if (opts->running_time) {
evsel->core.attr.read_format |=
@@ -1127,15 +1208,15 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
* Disabling only independent events or group leaders,
* keeping group members enabled.
*/
- if (perf_evsel__is_group_leader(evsel))
+ if (evsel__is_group_leader(evsel))
attr->disabled = 1;
/*
* Setting enable_on_exec for independent events and
* group leaders for traced executed by perf.
*/
- if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
- !opts->initial_delay)
+ if (target__none(&opts->target) && evsel__is_group_leader(evsel) &&
+ !opts->initial_delay)
attr->enable_on_exec = 1;
if (evsel->immediate) {
@@ -1169,28 +1250,31 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(evsel, opts, track);
+ evsel__apply_config_terms(evsel, opts, track);
evsel->ignore_missing_thread = opts->ignore_missing_thread;
/* The --period option takes the precedence. */
if (opts->period_set) {
if (opts->period)
- perf_evsel__set_sample_bit(evsel, PERIOD);
+ evsel__set_sample_bit(evsel, PERIOD);
else
- perf_evsel__reset_sample_bit(evsel, PERIOD);
+ evsel__reset_sample_bit(evsel, PERIOD);
}
/*
+ * A dummy event never triggers any actual counter and therefore
+ * cannot be used with branch_stack.
+ *
* For initial_delay, a dummy event is added implicitly.
* The software event will trigger -EOPNOTSUPP error out,
* if BRANCH_STACK bit is set.
*/
- if (opts->initial_delay && is_dummy_event(evsel))
- perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ if (evsel__is_dummy_event(evsel))
+ evsel__reset_sample_bit(evsel, BRANCH_STACK);
}
-int perf_evsel__set_filter(struct evsel *evsel, const char *filter)
+int evsel__set_filter(struct evsel *evsel, const char *filter)
{
char *new_filter = strdup(filter);
@@ -1203,13 +1287,12 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter)
return -1;
}
-static int perf_evsel__append_filter(struct evsel *evsel,
- const char *fmt, const char *filter)
+static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter)
{
char *new_filter;
if (evsel->filter == NULL)
- return perf_evsel__set_filter(evsel, filter);
+ return evsel__set_filter(evsel, filter);
if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
free(evsel->filter);
@@ -1220,14 +1303,14 @@ static int perf_evsel__append_filter(struct evsel *evsel,
return -1;
}
-int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter)
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter)
{
- return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
+ return evsel__append_filter(evsel, "(%s) && (%s)", filter);
}
-int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter)
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
{
- return perf_evsel__append_filter(evsel, "%s,%s", filter);
+ return evsel__append_filter(evsel, "%s,%s", filter);
}
/* Caller has to clear disabled after going through all CPUs. */
@@ -1266,9 +1349,9 @@ int evsel__disable(struct evsel *evsel)
return err;
}
-static void perf_evsel__free_config_terms(struct evsel *evsel)
+static void evsel__free_config_terms(struct evsel *evsel)
{
- struct perf_evsel_config_term *term, *h;
+ struct evsel_config_term *term, *h;
list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
list_del_init(&term->list);
@@ -1278,14 +1361,14 @@ static void perf_evsel__free_config_terms(struct evsel *evsel)
}
}
-void perf_evsel__exit(struct evsel *evsel)
+void evsel__exit(struct evsel *evsel)
{
assert(list_empty(&evsel->core.node));
assert(evsel->evlist == NULL);
- perf_evsel__free_counts(evsel);
+ evsel__free_counts(evsel);
perf_evsel__free_fd(&evsel->core);
perf_evsel__free_id(&evsel->core);
- perf_evsel__free_config_terms(evsel);
+ evsel__free_config_terms(evsel);
cgroup__put(evsel->cgrp);
perf_cpu_map__put(evsel->core.cpus);
perf_cpu_map__put(evsel->core.own_cpus);
@@ -1293,17 +1376,19 @@ void perf_evsel__exit(struct evsel *evsel)
zfree(&evsel->group_name);
zfree(&evsel->name);
zfree(&evsel->pmu_name);
+ zfree(&evsel->per_pkg_mask);
+ zfree(&evsel->metric_events);
perf_evsel__object.fini(evsel);
}
void evsel__delete(struct evsel *evsel)
{
- perf_evsel__exit(evsel);
+ evsel__exit(evsel);
free(evsel);
}
-void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
- struct perf_counts_values *count)
+void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count)
{
struct perf_counts_values tmp;
@@ -1342,8 +1427,7 @@ void perf_counts_values__scale(struct perf_counts_values *count,
*pscaled = scaled;
}
-static int
-perf_evsel__read_one(struct evsel *evsel, int cpu, int thread)
+static int evsel__read_one(struct evsel *evsel, int cpu, int thread)
{
struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
@@ -1403,8 +1487,7 @@ perf_evsel__process_group_data(struct evsel *leader,
return 0;
}
-static int
-perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
+static int evsel__read_group(struct evsel *leader, int cpu, int thread)
{
struct perf_stat_evsel *ps = leader->stats;
u64 read_format = leader->core.attr.read_format;
@@ -1414,7 +1497,7 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
if (!(read_format & PERF_FORMAT_ID))
return -EINVAL;
- if (!perf_evsel__is_group_leader(leader))
+ if (!evsel__is_group_leader(leader))
return -EINVAL;
if (!data) {
@@ -1434,18 +1517,17 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
return perf_evsel__process_group_data(leader, cpu, thread, data);
}
-int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread)
+int evsel__read_counter(struct evsel *evsel, int cpu, int thread)
{
u64 read_format = evsel->core.attr.read_format;
if (read_format & PERF_FORMAT_GROUP)
- return perf_evsel__read_group(evsel, cpu, thread);
- else
- return perf_evsel__read_one(evsel, cpu, thread);
+ return evsel__read_group(evsel, cpu, thread);
+
+ return evsel__read_one(evsel, cpu, thread);
}
-int __perf_evsel__read_on_cpu(struct evsel *evsel,
- int cpu, int thread, bool scale)
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale)
{
struct perf_counts_values count;
size_t nv = scale ? 3 : 1;
@@ -1453,13 +1535,13 @@ int __perf_evsel__read_on_cpu(struct evsel *evsel,
if (FD(evsel, cpu, thread) < 0)
return -EINVAL;
- if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
+ if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
return -ENOMEM;
if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
return -errno;
- perf_evsel__compute_deltas(evsel, cpu, thread, &count);
+ evsel__compute_deltas(evsel, cpu, thread, &count);
perf_counts_values__scale(&count, scale, NULL);
*perf_counts(evsel->counts, cpu, thread) = count;
return 0;
@@ -1470,7 +1552,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
struct evsel *leader = evsel->leader;
int fd;
- if (perf_evsel__is_group_leader(evsel))
+ if (evsel__is_group_leader(evsel))
return -1;
/*
@@ -1706,6 +1788,11 @@ retry_open:
FD(evsel, cpu, thread) = fd;
+ if (unlikely(test_attr__enabled)) {
+ test_attr__open(&evsel->core.attr, pid, cpus->map[cpu],
+ fd, group_fd, flags);
+ }
+
if (fd < 0) {
err = -errno;
@@ -1749,8 +1836,7 @@ retry_open:
/*
* If we succeeded but had to kill clockid, fail and
- * have perf_evsel__open_strerror() print us a nice
- * error.
+ * have evsel__open_strerror() print us a nice error.
*/
if (perf_missing_features.clockid ||
perf_missing_features.clockid_wrong) {
@@ -1854,7 +1940,7 @@ try_fallback:
} else if (!perf_missing_features.group_read &&
evsel->core.attr.inherit &&
(evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
- perf_evsel__is_group_leader(evsel)) {
+ evsel__is_group_leader(evsel)) {
perf_missing_features.group_read = true;
pr_debug2_peo("switching off group read\n");
goto fallback_missing_features;
@@ -1888,9 +1974,7 @@ void evsel__close(struct evsel *evsel)
perf_evsel__free_id(&evsel->core);
}
-int perf_evsel__open_per_cpu(struct evsel *evsel,
- struct perf_cpu_map *cpus,
- int cpu)
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu)
{
if (cpu == -1)
return evsel__open_cpu(evsel, cpus, NULL, 0,
@@ -1899,8 +1983,7 @@ int perf_evsel__open_per_cpu(struct evsel *evsel,
return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
}
-int perf_evsel__open_per_thread(struct evsel *evsel,
- struct perf_thread_map *threads)
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
{
return evsel__open(evsel, NULL, threads);
}
@@ -1995,8 +2078,8 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size)
return 0;
}
-int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
- struct perf_sample *data)
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+ struct perf_sample *data)
{
u64 type = evsel->core.attr.sample_type;
bool swapped = evsel->needs_swap;
@@ -2136,7 +2219,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
}
}
- if (evsel__has_callchain(evsel)) {
+ if (type & PERF_SAMPLE_CALLCHAIN) {
const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
OVERFLOW_CHECK_u64(array);
@@ -2190,7 +2273,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
return -EFAULT;
sz = data->branch_stack->nr * sizeof(struct branch_entry);
- if (perf_evsel__has_branch_hw_idx(evsel))
+ if (evsel__has_branch_hw_idx(evsel))
sz += sizeof(u64);
else
data->no_hw_idx = true;
@@ -2298,9 +2381,8 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
return 0;
}
-int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
- union perf_event *event,
- u64 *timestamp)
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+ u64 *timestamp)
{
u64 type = evsel->core.attr.sample_type;
const __u64 *array;
@@ -2342,15 +2424,14 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
return 0;
}
-struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name)
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
{
return tep_find_field(evsel->tp_format, name);
}
-void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample,
- const char *name)
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
{
- struct tep_format_field *field = perf_evsel__field(evsel, name);
+ struct tep_format_field *field = evsel__field(evsel, name);
int offset;
if (!field)
@@ -2405,10 +2486,9 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
return 0;
}
-u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
- const char *name)
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name)
{
- struct tep_format_field *field = perf_evsel__field(evsel, name);
+ struct tep_format_field *field = evsel__field(evsel, name);
if (!field)
return 0;
@@ -2416,8 +2496,7 @@ u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
}
-bool perf_evsel__fallback(struct evsel *evsel, int err,
- char *msg, size_t msgsize)
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
{
int paranoid;
@@ -2442,13 +2521,17 @@ bool perf_evsel__fallback(struct evsel *evsel, int err,
return true;
} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
(paranoid = perf_event_paranoid()) > 1) {
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
char *new_name;
const char *sep = ":";
+ /* If event has exclude user then don't exclude kernel. */
+ if (evsel->core.attr.exclude_user)
+ return false;
+
/* Is there already the separator in the name. */
if (strchr(name, '/') ||
- strchr(name, ':'))
+ (strchr(name, ':') && !evsel->is_libpfm_event))
sep = "";
if (asprintf(&new_name, "%s%su", name, sep) < 0)
@@ -2505,39 +2588,48 @@ static bool find_process(const char *name)
return ret ? false : true;
}
-int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
- int err, char *msg, size_t size)
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+ int err, char *msg, size_t size)
{
char sbuf[STRERR_BUFSIZE];
- int printed = 0;
+ int printed = 0, enforced = 0;
switch (err) {
case EPERM:
case EACCES:
+ printed += scnprintf(msg + printed, size - printed,
+ "Access to performance monitoring and observability operations is limited.\n");
+
+ if (!sysfs__read_int("fs/selinux/enforce", &enforced)) {
+ if (enforced) {
+ printed += scnprintf(msg + printed, size - printed,
+ "Enforced MAC policy settings (SELinux) can limit access to performance\n"
+ "monitoring and observability operations. Inspect system audit records for\n"
+ "more perf_event access control information and adjusting the policy.\n");
+ }
+ }
+
if (err == EPERM)
- printed = scnprintf(msg, size,
- "No permission to enable %s event.\n\n",
- perf_evsel__name(evsel));
+ printed += scnprintf(msg, size,
+ "No permission to enable %s event.\n\n", evsel__name(evsel));
return scnprintf(msg + printed, size - printed,
- "You may not have permission to collect %sstats.\n\n"
- "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
- "which controls use of the performance events system by\n"
- "unprivileged users (without CAP_SYS_ADMIN).\n\n"
- "The current value is %d:\n\n"
+ "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
+ "access to performance monitoring and observability operations for processes\n"
+ "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
+ "More information can be found at 'Perf events and tool security' document:\n"
+ "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
+ "perf_event_paranoid setting is %d:\n"
" -1: Allow use of (almost) all events by all users\n"
" Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
- ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n"
- " Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n"
- ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
- ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
- "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
- " kernel.perf_event_paranoid = -1\n" ,
- target->system_wide ? "system-wide " : "",
- perf_event_paranoid());
+ ">= 0: Disallow raw and ftrace function tracepoint access\n"
+ ">= 1: Disallow CPU event access\n"
+ ">= 2: Disallow kernel profiling\n"
+ "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
+ "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
+ perf_event_paranoid());
case ENOENT:
- return scnprintf(msg, size, "The %s event is not supported.",
- perf_evsel__name(evsel));
+ return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel));
case EMFILE:
return scnprintf(msg, size, "%s",
"Too many events are opened.\n"
@@ -2558,10 +2650,14 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
"No such device - did you specify an out-of-range profile CPU?");
break;
case EOPNOTSUPP:
+ if (evsel->core.attr.aux_output)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware doesn't support 'aux_output' feature",
+ evsel__name(evsel));
if (evsel->core.attr.sample_period != 0)
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
- perf_evsel__name(evsel));
+ evsel__name(evsel));
if (evsel->core.attr.precise_ip)
return scnprintf(msg, size, "%s",
"\'precise\' request may not be supported. Try removing 'p' modifier.");
@@ -2594,11 +2690,10 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size,
"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
"/bin/dmesg | grep -i perf may provide additional information.\n",
- err, str_error_r(err, sbuf, sizeof(sbuf)),
- perf_evsel__name(evsel));
+ err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));
}
-struct perf_env *perf_evsel__env(struct evsel *evsel)
+struct perf_env *evsel__env(struct evsel *evsel)
{
if (evsel && evsel->evlist)
return evsel->evlist->env;
@@ -2623,7 +2718,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
return 0;
}
-int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
{
struct perf_cpu_map *cpus = evsel->core.cpus;
struct perf_thread_map *threads = evsel->core.threads;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 53187c501ee8..79a860d8e3ee 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -18,7 +18,7 @@ struct perf_counts;
struct perf_stat_evsel;
union perf_event;
-typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data);
+typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
enum perf_tool_event {
PERF_TOOL_NONE = 0,
@@ -42,68 +42,91 @@ enum perf_tool_event {
*/
struct evsel {
struct perf_evsel core;
- struct evlist *evlist;
- char *filter;
+ struct evlist *evlist;
+ off_t id_offset;
+ int idx;
+ int id_pos;
+ int is_pos;
+ unsigned int sample_size;
+
+ /*
+ * These fields can be set in the parse-events code or similar.
+ * Please check evsel__clone() to copy them properly so that
+ * they can be released properly.
+ */
+ struct {
+ char *name;
+ char *group_name;
+ const char *pmu_name;
+ struct tep_event *tp_format;
+ char *filter;
+ unsigned long max_events;
+ double scale;
+ const char *unit;
+ struct cgroup *cgrp;
+ enum perf_tool_event tool_event;
+ /* parse modifier helper */
+ int exclude_GH;
+ int sample_read;
+ bool snapshot;
+ bool per_pkg;
+ bool percore;
+ bool precise_max;
+ bool use_uncore_alias;
+ bool is_libpfm_event;
+ bool auto_merge_stats;
+ bool collect_stat;
+ bool weak_group;
+ int bpf_fd;
+ struct bpf_object *bpf_obj;
+ };
+
+ /*
+ * metric fields are similar, but needs more care as they can have
+ * references to other metric (evsel).
+ */
+ const char * metric_expr;
+ const char * metric_name;
+ struct evsel **metric_events;
+ struct evsel *metric_leader;
+
+ void *handler;
struct perf_counts *counts;
struct perf_counts *prev_raw_counts;
- int idx;
- unsigned long max_events;
unsigned long nr_events_printed;
- char *name;
- double scale;
- const char *unit;
- struct tep_event *tp_format;
- off_t id_offset;
struct perf_stat_evsel *stats;
void *priv;
u64 db_id;
- struct cgroup *cgrp;
- void *handler;
- unsigned int sample_size;
- int id_pos;
- int is_pos;
- enum perf_tool_event tool_event;
bool uniquified_name;
- bool snapshot;
bool supported;
bool needs_swap;
bool disabled;
bool no_aux_samples;
bool immediate;
bool tracking;
- bool per_pkg;
- bool precise_max;
bool ignore_missing_thread;
bool forced_leader;
- bool use_uncore_alias;
- /* parse modifier helper */
- int exclude_GH;
- int sample_read;
- unsigned long *per_pkg_mask;
- struct evsel *leader;
- char *group_name;
bool cmdline_group_boundary;
- struct list_head config_terms;
- struct bpf_object *bpf_obj;
- int bpf_fd;
- int err;
- bool auto_merge_stats;
bool merged_stat;
- const char * metric_expr;
- const char * metric_name;
- struct evsel **metric_events;
- struct evsel *metric_leader;
- bool collect_stat;
- bool weak_group;
bool reset_group;
bool errored;
- bool percore;
+ unsigned long *per_pkg_mask;
+ struct evsel *leader;
+ struct list_head config_terms;
+ int err;
int cpu_iter;
- const char *pmu_name;
struct {
- perf_evsel__sb_cb_t *cb;
- void *data;
+ evsel__sb_cb_t *cb;
+ void *data;
} side_band;
+ /*
+ * For reporting purposes, an evsel sample can have a callchain
+ * synthesized from AUX area data. Keep track of synthesized sample
+ * types here. Note, the recorded sample_type cannot be changed because
+ * it is needed to continue to parse events.
+ * See also evsel__has_callchain().
+ */
+ __u64 synth_sample_type;
};
struct perf_missing_features {
@@ -135,7 +158,7 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
return perf_evsel__cpus(&evsel->core);
}
-static inline int perf_evsel__nr_cpus(struct evsel *evsel)
+static inline int evsel__nr_cpus(struct evsel *evsel)
{
return evsel__cpus(evsel)->nr;
}
@@ -143,233 +166,208 @@ static inline int perf_evsel__nr_cpus(struct evsel *evsel)
void perf_counts_values__scale(struct perf_counts_values *count,
bool scale, s8 *pscaled);
-void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
- struct perf_counts_values *count);
+void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count);
-int perf_evsel__object_config(size_t object_size,
- int (*init)(struct evsel *evsel),
- void (*fini)(struct evsel *evsel));
+int evsel__object_config(size_t object_size,
+ int (*init)(struct evsel *evsel),
+ void (*fini)(struct evsel *evsel));
-struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx);
+struct perf_pmu *evsel__find_pmu(struct evsel *evsel);
+bool evsel__is_aux_event(struct evsel *evsel);
+
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx);
static inline struct evsel *evsel__new(struct perf_event_attr *attr)
{
- return perf_evsel__new_idx(attr, 0);
+ return evsel__new_idx(attr, 0);
}
-struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx);
+struct evsel *evsel__clone(struct evsel *orig);
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
-static inline struct evsel *perf_evsel__newtp(const char *sys, const char *name)
+static inline struct evsel *evsel__newtp(const char *sys, const char *name)
{
- return perf_evsel__newtp_idx(sys, name, 0);
+ return evsel__newtp_idx(sys, name, 0);
}
-struct evsel *perf_evsel__new_cycles(bool precise);
+struct evsel *evsel__new_cycles(bool precise);
struct tep_event *event_format__new(const char *sys, const char *name);
void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
-void perf_evsel__exit(struct evsel *evsel);
+void evsel__exit(struct evsel *evsel);
void evsel__delete(struct evsel *evsel);
struct callchain_param;
-void perf_evsel__config(struct evsel *evsel,
- struct record_opts *opts,
- struct callchain_param *callchain);
-void perf_evsel__config_callchain(struct evsel *evsel,
- struct record_opts *opts,
- struct callchain_param *callchain);
-
-int __perf_evsel__sample_size(u64 sample_type);
-void perf_evsel__calc_id_pos(struct evsel *evsel);
-
-bool perf_evsel__is_cache_op_valid(u8 type, u8 op);
-
-#define PERF_EVSEL__MAX_ALIASES 8
-
-extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
- [PERF_EVSEL__MAX_ALIASES];
-extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_EVSEL__MAX_ALIASES];
-extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
- [PERF_EVSEL__MAX_ALIASES];
-extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX];
-extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
-int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
- char *bf, size_t size);
-const char *perf_evsel__name(struct evsel *evsel);
-
-const char *perf_evsel__group_name(struct evsel *evsel);
-int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
-
-void __perf_evsel__set_sample_bit(struct evsel *evsel,
- enum perf_event_sample_format bit);
-void __perf_evsel__reset_sample_bit(struct evsel *evsel,
- enum perf_event_sample_format bit);
-
-#define perf_evsel__set_sample_bit(evsel, bit) \
- __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
-
-#define perf_evsel__reset_sample_bit(evsel, bit) \
- __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
-
-void perf_evsel__set_sample_id(struct evsel *evsel,
- bool use_sample_identifier);
-
-int perf_evsel__set_filter(struct evsel *evsel, const char *filter);
-int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter);
-int perf_evsel__append_addr_filter(struct evsel *evsel,
- const char *filter);
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain);
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain);
+
+int __evsel__sample_size(u64 sample_type);
+void evsel__calc_id_pos(struct evsel *evsel);
+
+bool evsel__is_cache_op_valid(u8 type, u8 op);
+
+#define EVSEL__MAX_ALIASES 8
+
+extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES];
+extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES];
+extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES];
+extern const char *evsel__hw_names[PERF_COUNT_HW_MAX];
+extern const char *evsel__sw_names[PERF_COUNT_SW_MAX];
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
+const char *evsel__name(struct evsel *evsel);
+
+const char *evsel__group_name(struct evsel *evsel);
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
+
+void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
+void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
+
+#define evsel__set_sample_bit(evsel, bit) \
+ __evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+#define evsel__reset_sample_bit(evsel, bit) \
+ __evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
+
+int evsel__set_filter(struct evsel *evsel, const char *filter);
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
int evsel__enable_cpu(struct evsel *evsel, int cpu);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
int evsel__disable_cpu(struct evsel *evsel, int cpu);
-int perf_evsel__open_per_cpu(struct evsel *evsel,
- struct perf_cpu_map *cpus,
- int cpu);
-int perf_evsel__open_per_thread(struct evsel *evsel,
- struct perf_thread_map *threads);
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu);
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void evsel__close(struct evsel *evsel);
struct perf_sample;
-void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample,
- const char *name);
-u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
- const char *name);
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name);
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name);
-static inline char *perf_evsel__strval(struct evsel *evsel,
- struct perf_sample *sample,
- const char *name)
+static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name)
{
- return perf_evsel__rawptr(evsel, sample, name);
+ return evsel__rawptr(evsel, sample, name);
}
struct tep_format_field;
u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap);
-struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name);
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
-#define perf_evsel__match(evsel, t, c) \
+#define evsel__match(evsel, t, c) \
(evsel->core.attr.type == PERF_TYPE_##t && \
evsel->core.attr.config == PERF_COUNT_##c)
-static inline bool perf_evsel__match2(struct evsel *e1,
- struct evsel *e2)
+static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
{
return (e1->core.attr.type == e2->core.attr.type) &&
(e1->core.attr.config == e2->core.attr.config);
}
-#define perf_evsel__cmp(a, b) \
- ((a) && \
- (b) && \
- (a)->core.attr.type == (b)->core.attr.type && \
- (a)->core.attr.config == (b)->core.attr.config)
-
-int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread);
+int evsel__read_counter(struct evsel *evsel, int cpu, int thread);
-int __perf_evsel__read_on_cpu(struct evsel *evsel,
- int cpu, int thread, bool scale);
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale);
/**
- * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ * evsel__read_on_cpu - Read out the results on a CPU and thread
*
* @evsel - event selector to read value
* @cpu - CPU of interest
* @thread - thread of interest
*/
-static inline int perf_evsel__read_on_cpu(struct evsel *evsel,
- int cpu, int thread)
+static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread)
{
- return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+ return __evsel__read_on_cpu(evsel, cpu, thread, false);
}
/**
- * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
*
* @evsel - event selector to read value
* @cpu - CPU of interest
* @thread - thread of interest
*/
-static inline int perf_evsel__read_on_cpu_scaled(struct evsel *evsel,
- int cpu, int thread)
+static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread)
{
- return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+ return __evsel__read_on_cpu(evsel, cpu, thread, true);
}
-int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
- struct perf_sample *sample);
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+ struct perf_sample *sample);
-int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
- union perf_event *event,
- u64 *timestamp);
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+ u64 *timestamp);
-static inline struct evsel *perf_evsel__next(struct evsel *evsel)
+static inline struct evsel *evsel__next(struct evsel *evsel)
{
return list_entry(evsel->core.node.next, struct evsel, core.node);
}
-static inline struct evsel *perf_evsel__prev(struct evsel *evsel)
+static inline struct evsel *evsel__prev(struct evsel *evsel)
{
return list_entry(evsel->core.node.prev, struct evsel, core.node);
}
/**
- * perf_evsel__is_group_leader - Return whether given evsel is a leader event
+ * evsel__is_group_leader - Return whether given evsel is a leader event
*
* @evsel - evsel selector to be tested
*
* Return %true if @evsel is a group leader or a stand-alone event
*/
-static inline bool perf_evsel__is_group_leader(const struct evsel *evsel)
+static inline bool evsel__is_group_leader(const struct evsel *evsel)
{
return evsel->leader == evsel;
}
/**
- * perf_evsel__is_group_event - Return whether given evsel is a group event
+ * evsel__is_group_event - Return whether given evsel is a group event
*
* @evsel - evsel selector to be tested
*
* Return %true iff event group view is enabled and @evsel is a actual group
* leader which has other members in the group
*/
-static inline bool perf_evsel__is_group_event(struct evsel *evsel)
+static inline bool evsel__is_group_event(struct evsel *evsel)
{
if (!symbol_conf.event_group)
return false;
- return perf_evsel__is_group_leader(evsel) && evsel->core.nr_members > 1;
+ return evsel__is_group_leader(evsel) && evsel->core.nr_members > 1;
}
-bool perf_evsel__is_function_event(struct evsel *evsel);
+bool evsel__is_function_event(struct evsel *evsel);
-static inline bool perf_evsel__is_bpf_output(struct evsel *evsel)
+static inline bool evsel__is_bpf_output(struct evsel *evsel)
{
- return perf_evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
+ return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
}
-static inline bool perf_evsel__is_clock(struct evsel *evsel)
+static inline bool evsel__is_clock(struct evsel *evsel)
{
- return perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
- perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
+ return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+ evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
}
-bool perf_evsel__fallback(struct evsel *evsel, int err,
- char *msg, size_t msgsize);
-int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
- int err, char *msg, size_t size);
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+ int err, char *msg, size_t size);
-static inline int perf_evsel__group_idx(struct evsel *evsel)
+static inline int evsel__group_idx(struct evsel *evsel)
{
return evsel->idx - evsel->leader->idx;
}
@@ -386,22 +384,43 @@ for ((_evsel) = _leader; \
(_evsel) && (_evsel)->leader == (_leader); \
(_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
-static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel)
+static inline bool evsel__has_branch_callstack(const struct evsel *evsel)
{
return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
}
-static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel)
+static inline bool evsel__has_branch_hw_idx(const struct evsel *evsel)
{
return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
}
static inline bool evsel__has_callchain(const struct evsel *evsel)
{
- return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
+ /*
+ * For reporting purposes, an evsel sample can have a recorded callchain
+ * or a callchain synthesized from AUX area data.
+ */
+ return evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN ||
+ evsel->synth_sample_type & PERF_SAMPLE_CALLCHAIN;
+}
+
+static inline bool evsel__has_br_stack(const struct evsel *evsel)
+{
+ /*
+ * For reporting purposes, an evsel sample can have a recorded branch
+ * stack or a branch stack synthesized from AUX area data.
+ */
+ return evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK ||
+ evsel->synth_sample_type & PERF_SAMPLE_BRANCH_STACK;
+}
+
+static inline bool evsel__is_dummy_event(struct evsel *evsel)
+{
+ return (evsel->core.attr.type == PERF_TYPE_SOFTWARE) &&
+ (evsel->core.attr.config == PERF_COUNT_SW_DUMMY);
}
-struct perf_env *perf_evsel__env(struct evsel *evsel);
+struct perf_env *evsel__env(struct evsel *evsel);
-int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
index e026ab67b008..aee6f808b512 100644
--- a/tools/perf/util/evsel_config.h
+++ b/tools/perf/util/evsel_config.h
@@ -6,30 +6,30 @@
#include <stdbool.h>
/*
- * The 'struct perf_evsel_config_term' is used to pass event
- * specific configuration data to perf_evsel__config routine.
+ * The 'struct evsel_config_term' is used to pass event
+ * specific configuration data to evsel__config routine.
* It is allocated within event parsing and attached to
- * perf_evsel::config_terms list head.
+ * evsel::config_terms list head.
*/
enum evsel_term_type {
- PERF_EVSEL__CONFIG_TERM_PERIOD,
- PERF_EVSEL__CONFIG_TERM_FREQ,
- PERF_EVSEL__CONFIG_TERM_TIME,
- PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
- PERF_EVSEL__CONFIG_TERM_STACK_USER,
- PERF_EVSEL__CONFIG_TERM_INHERIT,
- PERF_EVSEL__CONFIG_TERM_MAX_STACK,
- PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
- PERF_EVSEL__CONFIG_TERM_OVERWRITE,
- PERF_EVSEL__CONFIG_TERM_DRV_CFG,
- PERF_EVSEL__CONFIG_TERM_BRANCH,
- PERF_EVSEL__CONFIG_TERM_PERCORE,
- PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT,
- PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
- PERF_EVSEL__CONFIG_TERM_CFG_CHG,
+ EVSEL__CONFIG_TERM_PERIOD,
+ EVSEL__CONFIG_TERM_FREQ,
+ EVSEL__CONFIG_TERM_TIME,
+ EVSEL__CONFIG_TERM_CALLGRAPH,
+ EVSEL__CONFIG_TERM_STACK_USER,
+ EVSEL__CONFIG_TERM_INHERIT,
+ EVSEL__CONFIG_TERM_MAX_STACK,
+ EVSEL__CONFIG_TERM_MAX_EVENTS,
+ EVSEL__CONFIG_TERM_OVERWRITE,
+ EVSEL__CONFIG_TERM_DRV_CFG,
+ EVSEL__CONFIG_TERM_BRANCH,
+ EVSEL__CONFIG_TERM_PERCORE,
+ EVSEL__CONFIG_TERM_AUX_OUTPUT,
+ EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
+ EVSEL__CONFIG_TERM_CFG_CHG,
};
-struct perf_evsel_config_term {
+struct evsel_config_term {
struct list_head list;
enum evsel_term_type type;
bool free_str;
@@ -53,10 +53,9 @@ struct perf_evsel_config_term {
struct evsel;
-struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel,
- enum evsel_term_type type);
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type);
-#define perf_evsel__get_config_term(evsel, type) \
- __perf_evsel__get_config_term(evsel, PERF_EVSEL__CONFIG_TERM_ ## type)
+#define evsel__get_config_term(evsel, type) \
+ __evsel__get_config_term(evsel, EVSEL__CONFIG_TERM_ ## type)
#endif // __PERF_EVSEL_CONFIG_H
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 3b4842840db0..fb498a723a00 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -35,8 +35,7 @@ static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, vo
return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
}
-int perf_evsel__fprintf(struct evsel *evsel,
- struct perf_attr_details *details, FILE *fp)
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp)
{
bool first = true;
int printed = 0;
@@ -44,22 +43,22 @@ int perf_evsel__fprintf(struct evsel *evsel,
if (details->event_group) {
struct evsel *pos;
- if (!perf_evsel__is_group_leader(evsel))
+ if (!evsel__is_group_leader(evsel))
return 0;
if (evsel->core.nr_members > 1)
printed += fprintf(fp, "%s{", evsel->group_name ?: "");
- printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+ printed += fprintf(fp, "%s", evsel__name(evsel));
for_each_group_member(pos, evsel)
- printed += fprintf(fp, ",%s", perf_evsel__name(pos));
+ printed += fprintf(fp, ",%s", evsel__name(pos));
if (evsel->core.nr_members > 1)
printed += fprintf(fp, "}");
goto out;
}
- printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+ printed += fprintf(fp, "%s", evsel__name(evsel));
if (details->verbose) {
printed += perf_event_attr__fprintf(fp, &evsel->core.attr,
diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h
index 47e6c8456bb1..3093d096c29f 100644
--- a/tools/perf/util/evsel_fprintf.h
+++ b/tools/perf/util/evsel_fprintf.h
@@ -15,8 +15,7 @@ struct perf_attr_details {
bool trace_fields;
};
-int perf_evsel__fprintf(struct evsel *evsel,
- struct perf_attr_details *details, FILE *fp);
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp);
#define EVSEL__PRINT_IP (1<<0)
#define EVSEL__PRINT_SYM (1<<1)
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index fd192ddf93c1..53482ef53c41 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -1,40 +1,212 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdbool.h>
#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "metricgroup.h"
+#include "debug.h"
#include "expr.h"
#include "expr-bison.h"
-#define YY_EXTRA_TYPE int
#include "expr-flex.h"
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <ctype.h>
#ifdef PARSER_DEBUG
extern int expr_debug;
#endif
+static size_t key_hash(const void *key, void *ctx __maybe_unused)
+{
+ const char *str = (const char *)key;
+ size_t hash = 0;
+
+ while (*str != '\0') {
+ hash *= 31;
+ hash += *str;
+ str++;
+ }
+ return hash;
+}
+
+static bool key_equal(const void *key1, const void *key2,
+ void *ctx __maybe_unused)
+{
+ return !strcmp((const char *)key1, (const char *)key2);
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ int ret;
+
+ data_ptr = malloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+
+ data_ptr->parent = ctx->parent;
+
+ ret = hashmap__set(&ctx->ids, id, data_ptr,
+ (const void **)&old_key, (void **)&old_data);
+ if (ret)
+ free(data_ptr);
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
/* Caller must make sure id is allocated */
-void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ int ret;
+
+ data_ptr = malloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+ data_ptr->val = val;
+ data_ptr->is_ref = false;
+
+ ret = hashmap__set(&ctx->ids, id, data_ptr,
+ (const void **)&old_key, (void **)&old_data);
+ if (ret)
+ free(data_ptr);
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ char *name, *p;
+ int ret;
+
+ data_ptr = zalloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+
+ name = strdup(ref->metric_name);
+ if (!name) {
+ free(data_ptr);
+ return -ENOMEM;
+ }
+
+ /*
+ * The jevents tool converts all metric expressions
+ * to lowercase, including metric references, hence
+ * we need to add lowercase name for metric, so it's
+ * properly found.
+ */
+ for (p = name; *p; p++)
+ *p = tolower(*p);
+
+ /*
+ * Intentionally passing just const char pointers,
+ * originally from 'struct pmu_event' object.
+ * We don't need to change them, so there's no
+ * need to create our own copy.
+ */
+ data_ptr->ref.metric_name = ref->metric_name;
+ data_ptr->ref.metric_expr = ref->metric_expr;
+ data_ptr->ref.counted = false;
+ data_ptr->is_ref = true;
+
+ ret = hashmap__set(&ctx->ids, name, data_ptr,
+ (const void **)&old_key, (void **)&old_data);
+ if (ret)
+ free(data_ptr);
+
+ pr_debug2("adding ref metric %s: %s\n",
+ ref->metric_name, ref->metric_expr);
+
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **data)
+{
+ return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1;
+}
+
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **datap)
+{
+ struct expr_id_data *data;
+
+ if (expr__get_id(ctx, id, datap) || !*datap) {
+ pr_debug("%s not found\n", id);
+ return -1;
+ }
+
+ data = *datap;
+
+ pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n",
+ data->is_ref, data->ref.counted, data->val, id);
+
+ if (data->is_ref && !data->ref.counted) {
+ data->ref.counted = true;
+ pr_debug("processing metric: %s ENTRY\n", id);
+ if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) {
+ pr_debug("%s failed to count\n", id);
+ return -1;
+ }
+ pr_debug("processing metric: %s EXIT: %f\n", id, data->val);
+ }
+
+ return 0;
+}
+
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id)
{
- int idx;
+ struct expr_id_data *old_val = NULL;
+ char *old_key = NULL;
- assert(ctx->num_ids < MAX_PARSE_ID);
- idx = ctx->num_ids++;
- ctx->ids[idx].name = name;
- ctx->ids[idx].val = val;
+ hashmap__delete(&ctx->ids, id,
+ (const void **)&old_key, (void **)&old_val);
+ free(old_key);
+ free(old_val);
}
-void expr__ctx_init(struct parse_ctx *ctx)
+void expr__ctx_init(struct expr_parse_ctx *ctx)
{
- ctx->num_ids = 0;
+ hashmap__init(&ctx->ids, key_hash, key_equal, NULL);
+}
+
+void expr__ctx_clear(struct expr_parse_ctx *ctx)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+ free((char *)cur->key);
+ free(cur->value);
+ }
+ hashmap__clear(&ctx->ids);
}
static int
-__expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
- int start)
+__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
+ int start, int runtime)
{
+ struct expr_scanner_ctx scanner_ctx = {
+ .start_token = start,
+ .runtime = runtime,
+ };
YY_BUFFER_STATE buffer;
void *scanner;
int ret;
- ret = expr_lex_init_extra(start, &scanner);
+ pr_debug2("parsing metric: %s\n", expr);
+
+ ret = expr_lex_init_extra(&scanner_ctx, &scanner);
if (ret)
return ret;
@@ -42,6 +214,7 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
#ifdef PARSER_DEBUG
expr_debug = 1;
+ expr_set_debug(1, scanner);
#endif
ret = expr_parse(val, ctx, scanner);
@@ -52,61 +225,19 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
return ret;
}
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr)
-{
- return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0;
-}
-
-static bool
-already_seen(const char *val, const char *one, const char **other,
- int num_other)
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+ const char *expr, int runtime)
{
- int i;
-
- if (one && !strcasecmp(one, val))
- return true;
- for (i = 0; i < num_other; i++)
- if (!strcasecmp(other[i], val))
- return true;
- return false;
+ return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0;
}
-int expr__find_other(const char *expr, const char *one, const char ***other,
- int *num_other)
+int expr__find_other(const char *expr, const char *one,
+ struct expr_parse_ctx *ctx, int runtime)
{
- int err, i = 0, j = 0;
- struct parse_ctx ctx;
+ int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime);
- expr__ctx_init(&ctx);
- err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER);
- if (err)
- return -1;
-
- *other = malloc((ctx.num_ids + 1) * sizeof(char *));
- if (!*other)
- return -ENOMEM;
-
- for (i = 0, j = 0; i < ctx.num_ids; i++) {
- const char *str = ctx.ids[i].name;
-
- if (already_seen(str, one, *other, j))
- continue;
+ if (one)
+ expr__del_id(ctx, one);
- str = strdup(str);
- if (!str)
- goto out;
- (*other)[j++] = str;
- }
- (*other)[j] = NULL;
-
-out:
- if (i != ctx.num_ids) {
- while (--j)
- free((char *) (*other)[i]);
- free(*other);
- err = -1;
- }
-
- *num_other = j;
- return err;
+ return ret;
}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 9377538f4097..fc2b5e824a66 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,23 +2,59 @@
#ifndef PARSE_CTX_H
#define PARSE_CTX_H 1
-#define EXPR_MAX_OTHER 20
-#define MAX_PARSE_ID EXPR_MAX_OTHER
+// There are fixes that need to land upstream before we can use libbpf's headers,
+// for now use our copy uncoditionally, since the data structures at this point
+// are exactly the same, no problem.
+//#ifdef HAVE_LIBBPF_SUPPORT
+//#include <bpf/hashmap.h>
+//#else
+#include "util/hashmap.h"
+//#endif
-struct parse_id {
- const char *name;
- double val;
+struct metric_ref;
+
+struct expr_id {
+ char *id;
+ struct expr_id *parent;
+};
+
+struct expr_parse_ctx {
+ struct hashmap ids;
+ struct expr_id *parent;
+};
+
+struct expr_id_data {
+ union {
+ double val;
+ struct {
+ const char *metric_name;
+ const char *metric_expr;
+ bool counted;
+ } ref;
+ struct expr_id *parent;
+ };
+
+ bool is_ref;
};
-struct parse_ctx {
- int num_ids;
- struct parse_id ids[MAX_PARSE_ID];
+struct expr_scanner_ctx {
+ int start_token;
+ int runtime;
};
-void expr__ctx_init(struct parse_ctx *ctx);
-void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr);
-int expr__find_other(const char *expr, const char *one, const char ***other,
- int *num_other);
+void expr__ctx_init(struct expr_parse_ctx *ctx);
+void expr__ctx_clear(struct expr_parse_ctx *ctx);
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref);
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **data);
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **datap);
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+ const char *expr, int runtime);
+int expr__find_other(const char *expr, const char *one,
+ struct expr_parse_ctx *ids, int runtime);
#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index eaad29243c23..13e5e3c75f56 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -10,12 +10,12 @@
char *expr_get_text(yyscan_t yyscanner);
YYSTYPE *expr_get_lval(yyscan_t yyscanner);
-static int __value(YYSTYPE *yylval, char *str, int base, int token)
+static double __value(YYSTYPE *yylval, char *str, int token)
{
- u64 num;
+ double num;
errno = 0;
- num = strtoull(str, NULL, base);
+ num = strtod(str, NULL);
if (errno)
return EXPR_ERROR;
@@ -23,19 +23,19 @@ static int __value(YYSTYPE *yylval, char *str, int base, int token)
return token;
}
-static int value(yyscan_t scanner, int base)
+static int value(yyscan_t scanner)
{
YYSTYPE *yylval = expr_get_lval(scanner);
char *text = expr_get_text(scanner);
- return __value(yylval, text, base, NUMBER);
+ return __value(yylval, text, NUMBER);
}
/*
* Allow @ instead of / to be able to specify pmu/event/ without
* conflicts with normal division.
*/
-static char *normalize(char *str)
+static char *normalize(char *str, int runtime)
{
char *ret = str;
char *dst = str;
@@ -45,6 +45,19 @@ static char *normalize(char *str)
*dst++ = '/';
else if (*str == '\\')
*dst++ = *++str;
+ else if (*str == '?') {
+ char *paramval;
+ int i = 0;
+ int size = asprintf(&paramval, "%d", runtime);
+
+ if (size < 0)
+ *dst++ = '0';
+ else {
+ while (i < size)
+ *dst++ = paramval[i++];
+ free(paramval);
+ }
+ }
else
*dst++ = *str;
str++;
@@ -54,49 +67,52 @@ static char *normalize(char *str)
return ret;
}
-static int str(yyscan_t scanner, int token)
+static int str(yyscan_t scanner, int token, int runtime)
{
YYSTYPE *yylval = expr_get_lval(scanner);
char *text = expr_get_text(scanner);
- yylval->str = normalize(strdup(text));
+ yylval->str = normalize(strdup(text), runtime);
if (!yylval->str)
return EXPR_ERROR;
- yylval->str = normalize(yylval->str);
+ yylval->str = normalize(yylval->str, runtime);
return token;
}
%}
-number [0-9]+
+number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)
sch [-,=]
spec \\{sch}
-sym [0-9a-zA-Z_\.:@]+
-symbol {spec}*{sym}*{spec}*{sym}*
+sym [0-9a-zA-Z_\.:@?]+
+symbol ({spec}|{sym})+
%%
- {
- int start_token;
+ struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
- start_token = expr_get_extra(yyscanner);
+ {
+ int start_token = sctx->start_token;
- if (start_token) {
- expr_set_extra(NULL, yyscanner);
+ if (sctx->start_token) {
+ sctx->start_token = 0;
return start_token;
}
}
+d_ratio { return D_RATIO; }
max { return MAX; }
min { return MIN; }
if { return IF; }
else { return ELSE; }
#smt_on { return SMT_ON; }
-{number} { return value(yyscanner, 10); }
-{symbol} { return str(yyscanner, ID); }
+{number} { return value(yyscanner); }
+{symbol} { return str(yyscanner, ID, sctx->runtime); }
"|" { return '|'; }
"^" { return '^'; }
"&" { return '&'; }
+"<" { return '<'; }
+">" { return '>'; }
"-" { return '-'; }
"+" { return '+'; }
"*" { return '*'; }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 4720cbe79357..d34b370391c6 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -10,12 +10,20 @@
#include "smt.h"
#include <string.h>
+static double d_ratio(double val0, double val1)
+{
+ if (val1 == 0) {
+ return 0;
+ }
+ return val0 / val1;
+}
+
%}
%define api.pure full
%parse-param { double *final_val }
-%parse-param { struct parse_ctx *ctx }
+%parse-param { struct expr_parse_ctx *ctx }
%parse-param {void *scanner}
%lex-param {void* scanner}
@@ -27,11 +35,13 @@
%token EXPR_PARSE EXPR_OTHER EXPR_ERROR
%token <num> NUMBER
%token <str> ID
-%token MIN MAX IF ELSE SMT_ON
+%destructor { free ($$); } <str>
+%token MIN MAX IF ELSE SMT_ON D_RATIO
%left MIN MAX IF
%left '|'
%left '^'
%left '&'
+%left '<' '>'
%left '-' '+'
%left '*' '/' '%'
%left NEG NOT
@@ -39,26 +49,13 @@
%{
static void expr_error(double *final_val __maybe_unused,
- struct parse_ctx *ctx __maybe_unused,
+ struct expr_parse_ctx *ctx __maybe_unused,
void *scanner,
const char *s)
{
pr_debug("%s\n", s);
}
-static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
-{
- int i;
-
- for (i = 0; i < ctx->num_ids; i++) {
- if (!strcasecmp(ctx->ids[i].name, id)) {
- *val = ctx->ids[i].val;
- return 0;
- }
- }
- return -1;
-}
-
%}
%%
@@ -72,16 +69,12 @@ all_other: all_other other
other: ID
{
- if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) {
- pr_err("failed: way too many variables");
- YYABORT;
- }
-
- ctx->ids[ctx->num_ids++].name = $1;
+ expr__add_id(ctx, $1);
}
|
-MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')'
-
+MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ','
+|
+'<' | '>' | D_RATIO
all_expr: if_expr { *final_val = $1; }
;
@@ -92,24 +85,43 @@ if_expr:
;
expr: NUMBER
- | ID { if (lookup_id(ctx, $1, &$$) < 0) {
- pr_debug("%s not found\n", $1);
- YYABORT;
- }
+ | ID {
+ struct expr_id_data *data;
+
+ if (expr__resolve_id(ctx, $1, &data)) {
+ free($1);
+ YYABORT;
+ }
+
+ $$ = data->val;
+ free($1);
}
| expr '|' expr { $$ = (long)$1 | (long)$3; }
| expr '&' expr { $$ = (long)$1 & (long)$3; }
| expr '^' expr { $$ = (long)$1 ^ (long)$3; }
+ | expr '<' expr { $$ = $1 < $3; }
+ | expr '>' expr { $$ = $1 > $3; }
| expr '+' expr { $$ = $1 + $3; }
| expr '-' expr { $$ = $1 - $3; }
| expr '*' expr { $$ = $1 * $3; }
- | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; }
- | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; }
+ | expr '/' expr { if ($3 == 0) {
+ pr_debug("division by zero\n");
+ YYABORT;
+ }
+ $$ = $1 / $3;
+ }
+ | expr '%' expr { if ((long)$3 == 0) {
+ pr_debug("division by zero\n");
+ YYABORT;
+ }
+ $$ = (long)$1 % (long)$3;
+ }
| '-' expr %prec NEG { $$ = -$2; }
| '(' if_expr ')' { $$ = $2; }
| MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
| MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; }
| SMT_ON { $$ = smt_on() > 0; }
+ | D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); }
;
%%
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
index 30e9f618f6cd..dd40683bd4c0 100644
--- a/tools/perf/util/genelf_debug.c
+++ b/tools/perf/util/genelf_debug.c
@@ -342,7 +342,7 @@ static void emit_lineno_info(struct buffer_ext *be,
*/
/* start state of the state machine we take care of */
- unsigned long last_vma = code_addr;
+ unsigned long last_vma = 0;
char const *cur_filename = NULL;
unsigned long cur_file_idx = 0;
int last_line = 1;
@@ -473,7 +473,7 @@ jit_process_debug_info(uint64_t code_addr,
ent = debug_entry_next(ent);
}
add_compilation_unit(di, buffer_ext_size(dl));
- add_debug_line(dl, debug, nr_debug_entries, 0);
+ add_debug_line(dl, debug, nr_debug_entries, GEN_ELF_TEXT_OFFSET);
add_debug_abbrev(da);
if (0) buffer_ext_dump(da, "abbrev");
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
deleted file mode 100644
index f36c7e31780a..000000000000
--- a/tools/perf/util/group.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef GROUP_H
-#define GROUP_H 1
-
-bool arch_topdown_check_group(bool *warn);
-void arch_topdown_group_warn(void);
-
-#endif
diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
new file mode 100644
index 000000000000..a405dad068f5
--- /dev/null
+++ b/tools/perf/util/hashmap.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ entry->next = *pprev;
+ *pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ *pprev = entry->next;
+ entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx)
+{
+ map->hash_fn = hash_fn;
+ map->equal_fn = equal_fn;
+ map->ctx = ctx;
+
+ map->buckets = NULL;
+ map->cap = 0;
+ map->cap_bits = 0;
+ map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx)
+{
+ struct hashmap *map = malloc(sizeof(struct hashmap));
+
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+ hashmap__init(map, hash_fn, equal_fn, ctx);
+ return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+ struct hashmap_entry *cur, *tmp;
+ size_t bkt;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ free(cur);
+ }
+ free(map->buckets);
+ map->buckets = NULL;
+ map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+ if (!map)
+ return;
+
+ hashmap__clear(map);
+ free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+ return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+ return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+ /* grow if empty or more than 75% filled */
+ return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+ struct hashmap_entry **new_buckets;
+ struct hashmap_entry *cur, *tmp;
+ size_t new_cap_bits, new_cap;
+ size_t h, bkt;
+
+ new_cap_bits = map->cap_bits + 1;
+ if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+ new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+ new_cap = 1UL << new_cap_bits;
+ new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+ if (!new_buckets)
+ return -ENOMEM;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+ hashmap_add_entry(&new_buckets[h], cur);
+ }
+
+ map->cap = new_cap;
+ map->cap_bits = new_cap_bits;
+ free(map->buckets);
+ map->buckets = new_buckets;
+
+ return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+ const void *key, size_t hash,
+ struct hashmap_entry ***pprev,
+ struct hashmap_entry **entry)
+{
+ struct hashmap_entry *cur, **prev_ptr;
+
+ if (!map->buckets)
+ return false;
+
+ for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+ cur;
+ prev_ptr = &cur->next, cur = cur->next) {
+ if (map->equal_fn(cur->key, key, map->ctx)) {
+ if (pprev)
+ *pprev = prev_ptr;
+ *entry = cur;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+ enum hashmap_insert_strategy strategy,
+ const void **old_key, void **old_value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+ int err;
+
+ if (old_key)
+ *old_key = NULL;
+ if (old_value)
+ *old_value = NULL;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (strategy != HASHMAP_APPEND &&
+ hashmap_find_entry(map, key, h, NULL, &entry)) {
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+ entry->key = key;
+ entry->value = value;
+ return 0;
+ } else if (strategy == HASHMAP_ADD) {
+ return -EEXIST;
+ }
+ }
+
+ if (strategy == HASHMAP_UPDATE)
+ return -ENOENT;
+
+ if (hashmap_needs_to_grow(map)) {
+ err = hashmap_grow(map);
+ if (err)
+ return err;
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ }
+
+ entry = malloc(sizeof(struct hashmap_entry));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->key = key;
+ entry->value = value;
+ hashmap_add_entry(&map->buckets[h], entry);
+ map->sz++;
+
+ return 0;
+}
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, NULL, &entry))
+ return false;
+
+ if (value)
+ *value = entry->value;
+ return true;
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+ const void **old_key, void **old_value)
+{
+ struct hashmap_entry **pprev, *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+ return false;
+
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ hashmap_del_entry(pprev, entry);
+ free(entry);
+ map->sz--;
+
+ return true;
+}
+
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
new file mode 100644
index 000000000000..e0af36b0e5d8
--- /dev/null
+++ b/tools/perf/util/hashmap.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <limits.h>
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+ /* shuffle bits and return requested number of upper bits */
+#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
+ /* LP64 case */
+ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
+#elif (__SIZEOF_SIZE_T__ <= __SIZEOF_LONG__)
+ return (h * 2654435769lu) >> (__SIZEOF_LONG__ * 8 - bits);
+#else
+# error "Unsupported size_t size"
+#endif
+}
+
+typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
+typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+
+struct hashmap_entry {
+ const void *key;
+ void *value;
+ struct hashmap_entry *next;
+};
+
+struct hashmap {
+ hashmap_hash_fn hash_fn;
+ hashmap_equal_fn equal_fn;
+ void *ctx;
+
+ struct hashmap_entry **buckets;
+ size_t cap;
+ size_t cap_bits;
+ size_t sz;
+};
+
+#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
+ .hash_fn = (hash_fn), \
+ .equal_fn = (equal_fn), \
+ .ctx = (ctx), \
+ .buckets = NULL, \
+ .cap = 0, \
+ .cap_bits = 0, \
+ .sz = 0, \
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ * update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ * nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ * This turns hashmap into a multimap by allowing multiple values to be
+ * associated with the same key. Most useful read API for such hashmap is
+ * hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ * used, it will return last inserted key/value entry (first in a bucket
+ * chain).
+ */
+enum hashmap_insert_strategy {
+ HASHMAP_ADD,
+ HASHMAP_SET,
+ HASHMAP_UPDATE,
+ HASHMAP_APPEND,
+};
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+ enum hashmap_insert_strategy strategy,
+ const void **old_key, void **old_value);
+
+static inline int hashmap__add(struct hashmap *map,
+ const void *key, void *value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
+}
+
+static inline int hashmap__set(struct hashmap *map,
+ const void *key, void *value,
+ const void **old_key, void **old_value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_SET,
+ old_key, old_value);
+}
+
+static inline int hashmap__update(struct hashmap *map,
+ const void *key, void *value,
+ const void **old_key, void **old_value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_UPDATE,
+ old_key, old_value);
+}
+
+static inline int hashmap__append(struct hashmap *map,
+ const void *key, void *value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+ const void **old_key, void **old_value);
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; \
+ cur && ({tmp = cur->next; true; }); \
+ cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key) \
+ for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+ map->cap_bits); \
+ map->buckets ? map->buckets[bkt] : NULL; }); \
+ cur; \
+ cur = cur->next) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
+ for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+ map->cap_bits); \
+ cur = map->buckets ? map->buckets[bkt] : NULL; }); \
+ cur && ({ tmp = cur->next; true; }); \
+ cur = tmp) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index acbd046bf95c..be850e9f8852 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -46,6 +46,7 @@
#include "util/util.h" // perf_exe()
#include "cputopo.h"
#include "bpf-event.h"
+#include "clockid.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -525,7 +526,7 @@ static int write_event_desc(struct feat_fd *ff,
/*
* write event string as passed on cmdline
*/
- ret = do_write_string(ff, perf_evsel__name(evsel));
+ ret = do_write_string(ff, evsel__name(evsel));
if (ret < 0)
return ret;
/*
@@ -783,8 +784,7 @@ static int write_group_desc(struct feat_fd *ff,
return ret;
evlist__for_each_entry(evlist, evsel) {
- if (perf_evsel__is_group_leader(evsel) &&
- evsel->core.nr_members > 1) {
+ if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
const char *name = evsel->group_name ?: "{anon_group}";
u32 leader_idx = evsel->idx;
u32 nr_members = evsel->core.nr_members;
@@ -892,8 +892,42 @@ static int write_auxtrace(struct feat_fd *ff,
static int write_clockid(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- return do_write(ff, &ff->ph->env.clockid_res_ns,
- sizeof(ff->ph->env.clockid_res_ns));
+ return do_write(ff, &ff->ph->env.clock.clockid_res_ns,
+ sizeof(ff->ph->env.clock.clockid_res_ns));
+}
+
+static int write_clock_data(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ u64 *data64;
+ u32 data32;
+ int ret;
+
+ /* version */
+ data32 = 1;
+
+ ret = do_write(ff, &data32, sizeof(data32));
+ if (ret < 0)
+ return ret;
+
+ /* clockid */
+ data32 = ff->ph->env.clock.clockid;
+
+ ret = do_write(ff, &data32, sizeof(data32));
+ if (ret < 0)
+ return ret;
+
+ /* TOD ref time */
+ data64 = &ff->ph->env.clock.tod_ns;
+
+ ret = do_write(ff, data64, sizeof(*data64));
+ if (ret < 0)
+ return ret;
+
+ /* clockid ref time */
+ data64 = &ff->ph->env.clock.clockid_ns;
+
+ return do_write(ff, data64, sizeof(*data64));
}
static int write_dir_format(struct feat_fd *ff,
@@ -1395,6 +1429,38 @@ static int write_compressed(struct feat_fd *ff __maybe_unused,
return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
}
+static int write_cpu_pmu_caps(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
+ struct perf_pmu_caps *caps = NULL;
+ int nr_caps;
+ int ret;
+
+ if (!cpu_pmu)
+ return -ENOENT;
+
+ nr_caps = perf_pmu__caps_parse(cpu_pmu);
+ if (nr_caps < 0)
+ return nr_caps;
+
+ ret = do_write(ff, &nr_caps, sizeof(nr_caps));
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(caps, &cpu_pmu->caps, list) {
+ ret = do_write_string(ff, caps->name);
+ if (ret < 0)
+ return ret;
+
+ ret = do_write_string(ff, caps->value);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
static void print_hostname(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1515,7 +1581,50 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
static void print_clockid(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
- ff->ph->env.clockid_res_ns * 1000);
+ ff->ph->env.clock.clockid_res_ns * 1000);
+}
+
+static void print_clock_data(struct feat_fd *ff, FILE *fp)
+{
+ struct timespec clockid_ns;
+ char tstr[64], date[64];
+ struct timeval tod_ns;
+ clockid_t clockid;
+ struct tm ltime;
+ u64 ref;
+
+ if (!ff->ph->env.clock.enabled) {
+ fprintf(fp, "# reference time disabled\n");
+ return;
+ }
+
+ /* Compute TOD time. */
+ ref = ff->ph->env.clock.tod_ns;
+ tod_ns.tv_sec = ref / NSEC_PER_SEC;
+ ref -= tod_ns.tv_sec * NSEC_PER_SEC;
+ tod_ns.tv_usec = ref / NSEC_PER_USEC;
+
+ /* Compute clockid time. */
+ ref = ff->ph->env.clock.clockid_ns;
+ clockid_ns.tv_sec = ref / NSEC_PER_SEC;
+ ref -= clockid_ns.tv_sec * NSEC_PER_SEC;
+ clockid_ns.tv_nsec = ref;
+
+ clockid = ff->ph->env.clock.clockid;
+
+ if (localtime_r(&tod_ns.tv_sec, &ltime) == NULL)
+ snprintf(tstr, sizeof(tstr), "<error>");
+ else {
+ strftime(date, sizeof(date), "%F %T", &ltime);
+ scnprintf(tstr, sizeof(tstr), "%s.%06d",
+ date, (int) tod_ns.tv_usec);
+ }
+
+ fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
+ fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
+ tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec,
+ clockid_ns.tv_sec, clockid_ns.tv_nsec,
+ clockid_name(clockid));
}
static void print_dir_format(struct feat_fd *ff, FILE *fp)
@@ -1809,6 +1918,27 @@ static void print_compressed(struct feat_fd *ff, FILE *fp)
ff->ph->env.comp_level, ff->ph->env.comp_ratio);
}
+static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+ const char *delimiter = "# cpu pmu capabilities: ";
+ u32 nr_caps = ff->ph->env.nr_cpu_pmu_caps;
+ char *str;
+
+ if (!nr_caps) {
+ fprintf(fp, "# cpu pmu capabilities: not available\n");
+ return;
+ }
+
+ str = ff->ph->env.cpu_pmu_caps;
+ while (nr_caps--) {
+ fprintf(fp, "%s%s", delimiter, str);
+ delimiter = ", ";
+ str += strlen(str) + 1;
+ }
+
+ fprintf(fp, "\n");
+}
+
static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
{
const char *delimiter = "# pmu mappings: ";
@@ -1854,14 +1984,12 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp)
session = container_of(ff->ph, struct perf_session, header);
evlist__for_each_entry(session->evlist, evsel) {
- if (perf_evsel__is_group_leader(evsel) &&
- evsel->core.nr_members > 1) {
- fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
- perf_evsel__name(evsel));
+ if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
+ fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", evsel__name(evsel));
nr = evsel->core.nr_members - 1;
} else if (nr) {
- fprintf(fp, ",%s", perf_evsel__name(evsel));
+ fprintf(fp, ",%s", evsel__name(evsel));
if (--nr == 0)
fprintf(fp, "}\n");
@@ -1928,7 +2056,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
struct machine *machine;
u16 cpumode;
struct dso *dso;
- enum dso_kernel_type dso_type;
+ enum dso_space_type dso_space;
machine = perf_session__findnew_machine(session, bev->pid);
if (!machine)
@@ -1938,14 +2066,14 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
switch (cpumode) {
case PERF_RECORD_MISC_KERNEL:
- dso_type = DSO_TYPE_KERNEL;
+ dso_space = DSO_SPACE__KERNEL;
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
- dso_type = DSO_TYPE_GUEST_KERNEL;
+ dso_space = DSO_SPACE__KERNEL_GUEST;
break;
case PERF_RECORD_MISC_USER:
case PERF_RECORD_MISC_GUEST_USER:
- dso_type = DSO_TYPE_USER;
+ dso_space = DSO_SPACE__USER;
break;
default:
goto out;
@@ -1954,24 +2082,28 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
dso = machine__findnew_dso(machine, filename);
if (dso != NULL) {
char sbuild_id[SBUILD_ID_SIZE];
+ struct build_id bid;
+ size_t size = BUILD_ID_SIZE;
+
+ if (bev->header.misc & PERF_RECORD_MISC_BUILD_ID_SIZE)
+ size = bev->size;
- dso__set_build_id(dso, &bev->build_id);
+ build_id__init(&bid, bev->data, size);
+ dso__set_build_id(dso, &bid);
- if (dso_type != DSO_TYPE_USER) {
+ if (dso_space != DSO_SPACE__USER) {
struct kmod_path m = { .name = NULL, };
if (!kmod_path__parse_name(&m, filename) && m.kmod)
dso__set_module_info(dso, &m, machine);
- else
- dso->kernel = dso_type;
+ dso->kernel = dso_space;
free(m.name);
}
- build_id__sprintf(dso->build_id, sizeof(dso->build_id),
- sbuild_id);
- pr_debug("build id event received for %s: %s\n",
- dso->long_name, sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
+ pr_debug("build id event received for %s: %s [%zu]\n",
+ dso->long_name, sbuild_id, size);
dso__put(dso);
}
@@ -2682,12 +2814,46 @@ out:
static int process_clockid(struct feat_fd *ff,
void *data __maybe_unused)
{
- if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+ if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns))
return -1;
return 0;
}
+static int process_clock_data(struct feat_fd *ff,
+ void *_data __maybe_unused)
+{
+ u32 data32;
+ u64 data64;
+
+ /* version */
+ if (do_read_u32(ff, &data32))
+ return -1;
+
+ if (data32 != 1)
+ return -1;
+
+ /* clockid */
+ if (do_read_u32(ff, &data32))
+ return -1;
+
+ ff->ph->env.clock.clockid = data32;
+
+ /* TOD ref time */
+ if (do_read_u64(ff, &data64))
+ return -1;
+
+ ff->ph->env.clock.tod_ns = data64;
+
+ /* clockid ref time */
+ if (do_read_u64(ff, &data64))
+ return -1;
+
+ ff->ph->env.clock.clockid_ns = data64;
+ ff->ph->env.clock.enabled = true;
+ return 0;
+}
+
static int process_dir_format(struct feat_fd *ff,
void *_data __maybe_unused)
{
@@ -2846,6 +3012,60 @@ static int process_compressed(struct feat_fd *ff,
return 0;
}
+static int process_cpu_pmu_caps(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ char *name, *value;
+ struct strbuf sb;
+ u32 nr_caps;
+
+ if (do_read_u32(ff, &nr_caps))
+ return -1;
+
+ if (!nr_caps) {
+ pr_debug("cpu pmu capabilities not available\n");
+ return 0;
+ }
+
+ ff->ph->env.nr_cpu_pmu_caps = nr_caps;
+
+ if (strbuf_init(&sb, 128) < 0)
+ return -1;
+
+ while (nr_caps--) {
+ name = do_read_string(ff);
+ if (!name)
+ goto error;
+
+ value = do_read_string(ff);
+ if (!value)
+ goto free_name;
+
+ if (strbuf_addf(&sb, "%s=%s", name, value) < 0)
+ goto free_value;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, "", 1) < 0)
+ goto free_value;
+
+ if (!strcmp(name, "branches"))
+ ff->ph->env.max_branches = atoi(value);
+
+ free(value);
+ free(name);
+ }
+ ff->ph->env.cpu_pmu_caps = strbuf_detach(&sb, NULL);
+ return 0;
+
+free_value:
+ free(value);
+free_name:
+ free(name);
+error:
+ strbuf_release(&sb);
+ return -1;
+}
+
#define FEAT_OPR(n, func, __full_only) \
[HEADER_##n] = { \
.name = __stringify(n), \
@@ -2903,6 +3123,8 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false),
FEAT_OPR(BPF_BTF, bpf_btf, false),
FEAT_OPR(COMPRESSED, compressed, false),
+ FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false),
+ FEAT_OPR(CLOCK_DATA, clock_data, false),
};
struct header_print_data {
@@ -3469,7 +3691,7 @@ static int perf_header__read_pipe(struct perf_session *session)
return -EINVAL;
}
- return 0;
+ return f_header.size == sizeof(f_header) ? 0 : -1;
}
static int read_attr(int fd, struct perf_header *ph,
@@ -3571,7 +3793,7 @@ int perf_session__read_header(struct perf_session *session)
struct perf_file_header f_header;
struct perf_file_attr f_attr;
u64 f_id;
- int nr_attrs, nr_ids, i, j;
+ int nr_attrs, nr_ids, i, j, err;
int fd = perf_data__fd(data);
session->evlist = evlist__new();
@@ -3580,8 +3802,16 @@ int perf_session__read_header(struct perf_session *session)
session->evlist->env = &header->env;
session->machines.host.env = &header->env;
- if (perf_data__is_pipe(data))
- return perf_header__read_pipe(session);
+
+ /*
+ * We can read 'pipe' data event from regular file,
+ * check for the pipe header regardless of source.
+ */
+ err = perf_header__read_pipe(session);
+ if (!err || (err && perf_data__is_pipe(data))) {
+ data->is_pipe = true;
+ return err;
+ }
if (perf_file_header__read(&f_header, header, fd) < 0)
return -EINVAL;
@@ -3842,12 +4072,22 @@ int perf_event__process_tracing_data(struct perf_session *session,
{
ssize_t size_read, padding, size = event->tracing_data.size;
int fd = perf_data__fd(session->data);
- off_t offset = lseek(fd, 0, SEEK_CUR);
char buf[BUFSIZ];
- /* setup for reading amidst mmap */
- lseek(fd, offset + sizeof(struct perf_record_header_tracing_data),
- SEEK_SET);
+ /*
+ * The pipe fd is already in proper place and in any case
+ * we can't move it, and we'd screw the case where we read
+ * 'pipe' data from regular file. The trace_report reads
+ * data from 'fd' so we need to set it directly behind the
+ * event, where the tracing data starts.
+ */
+ if (!perf_data__is_pipe(session->data)) {
+ off_t offset = lseek(fd, 0, SEEK_CUR);
+
+ /* setup for reading amidst mmap */
+ lseek(fd, offset + sizeof(struct perf_record_header_tracing_data),
+ SEEK_SET);
+ }
size_read = trace_report(fd, &session->tevent,
session->repipe);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 840f95cee349..2aca71763ecf 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -43,6 +43,8 @@ enum {
HEADER_BPF_PROG_INFO,
HEADER_BPF_BTF,
HEADER_COMPRESSED,
+ HEADER_CPU_PMU_CAPS,
+ HEADER_CLOCK_DATA,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 283a69ff6a3d..8a793e4c9400 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1070,6 +1070,20 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter,
return fill_callchain_info(al, node, iter->hide_unresolved);
}
+static bool
+hist_entry__fast__sym_diff(struct hist_entry *left,
+ struct hist_entry *right)
+{
+ struct symbol *sym_l = left->ms.sym;
+ struct symbol *sym_r = right->ms.sym;
+
+ if (!sym_l && !sym_r)
+ return left->ip != right->ip;
+
+ return !!_sort__sym_cmp(sym_l, sym_r);
+}
+
+
static int
iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al)
@@ -1096,6 +1110,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
};
int i;
struct callchain_cursor cursor;
+ bool fast = hists__has(he_tmp.hists, sym);
callchain_cursor_snapshot(&cursor, &callchain_cursor);
@@ -1106,6 +1121,14 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
* It's possible that it has cycles or recursive calls.
*/
for (i = 0; i < iter->curr; i++) {
+ /*
+ * For most cases, there are no duplicate entries in callchain.
+ * The symbols are usually different. Do a quick check for
+ * symbols first.
+ */
+ if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp))
+ continue;
+
if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
/* to avoid calling callback function */
iter->he = NULL;
@@ -1907,8 +1930,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
}
}
-void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
- hists__resort_cb_t cb, void *cb_arg)
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+ hists__resort_cb_t cb, void *cb_arg)
{
bool use_callchain;
@@ -1922,9 +1945,9 @@ void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg);
}
-void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog)
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog)
{
- return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL);
+ return evsel__output_resort_cb(evsel, prog, NULL, NULL);
}
void hists__output_resort(struct hists *hists, struct ui_progress *prog)
@@ -2637,7 +2660,7 @@ size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp)
size_t ret = 0;
evlist__for_each_entry(evlist, pos) {
- ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
+ ret += fprintf(fp, "%s stats:\n", evsel__name(pos));
ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp);
}
@@ -2661,7 +2684,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
u64 nr_events = hists->stats.total_period;
struct evsel *evsel = hists_to_evsel(hists);
- const char *ev_name = perf_evsel__name(evsel);
+ const char *ev_name = evsel__name(evsel);
char buf[512], sample_freq_str[64] = "";
size_t buflen = sizeof(buf);
char ref[30] = " show reference callgraph, ";
@@ -2672,10 +2695,10 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
nr_events = hists->stats.total_non_filtered_period;
}
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
struct evsel *pos;
- perf_evsel__group_desc(evsel, buf, buflen);
+ evsel__group_desc(evsel, buf, buflen);
ev_name = buf;
for_each_group_member(pos, evsel) {
@@ -2822,9 +2845,8 @@ static int hists_evsel__init(struct evsel *evsel)
int hists__init(void)
{
- int err = perf_evsel__object_config(sizeof(struct hists_evsel),
- hists_evsel__init,
- hists_evsel__exit);
+ int err = evsel__object_config(sizeof(struct hists_evsel),
+ hists_evsel__init, hists_evsel__exit);
if (err)
fputs("FATAL ERROR: Couldn't setup hists class\n", stderr);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 4141295a66fa..96b1c13bbccc 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -173,9 +173,9 @@ void hist_entry__delete(struct hist_entry *he);
typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg);
-void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
- hists__resort_cb_t cb, void *cb_arg);
-void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog);
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+ hists__resort_cb_t cb, void *cb_arg);
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog);
void hists__output_resort(struct hists *hists, struct ui_progress *prog);
void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
hists__resort_cb_t cb);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 34cb380d19a3..af1e78d76228 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -432,7 +432,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
le64_to_cpu(branch->from),
le64_to_cpu(branch->to),
btsq->intel_pt_insn.length,
- buffer->buffer_nr + 1);
+ buffer->buffer_nr + 1, true, 0, 0);
if (filter && !(filter & btsq->sample_flags))
continue;
err = intel_bts_synth_branch_sample(btsq, branch);
@@ -728,6 +728,15 @@ static void intel_bts_free(struct perf_session *session)
free(bts);
}
+static bool intel_bts_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ return evsel->core.attr.type == bts->pmu_type;
+}
+
struct intel_bts_synth {
struct perf_tool dummy_tool;
struct perf_session *session;
@@ -816,10 +825,10 @@ static int intel_bts_synth_events(struct intel_bts *bts,
bts->branches_id = id;
/*
* We only use sample types from PERF_SAMPLE_MASK so we can use
- * __perf_evsel__sample_size() here.
+ * __evsel__sample_size() here.
*/
bts->branches_event_size = sizeof(struct perf_record_sample) +
- __perf_evsel__sample_size(attr.sample_type);
+ __evsel__sample_size(attr.sample_type);
}
return 0;
@@ -883,6 +892,7 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
bts->auxtrace.flush_events = intel_bts_flush;
bts->auxtrace.free_events = intel_bts_free_events;
bts->auxtrace.free = intel_bts_free;
+ bts->auxtrace.evsel_is_auxtrace = intel_bts_evsel_is_auxtrace;
session->auxtrace = &bts->auxtrace;
intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f8ccfd6be0ee..697513f35154 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -55,6 +55,7 @@ enum intel_pt_pkt_state {
INTEL_PT_STATE_TIP_PGD,
INTEL_PT_STATE_FUP,
INTEL_PT_STATE_FUP_NO_TIP,
+ INTEL_PT_STATE_RESAMPLE,
};
static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
@@ -65,6 +66,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
case INTEL_PT_STATE_ERR_RESYNC:
case INTEL_PT_STATE_IN_SYNC:
case INTEL_PT_STATE_TNT_CONT:
+ case INTEL_PT_STATE_RESAMPLE:
return true;
case INTEL_PT_STATE_TNT:
case INTEL_PT_STATE_TIP:
@@ -109,6 +111,9 @@ struct intel_pt_decoder {
bool fixup_last_mtc;
bool have_last_ip;
bool in_psb;
+ bool hop;
+ bool hop_psb_fup;
+ bool leap;
enum intel_pt_param_flags flags;
uint64_t pos;
uint64_t last_ip;
@@ -235,6 +240,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->data = params->data;
decoder->return_compression = params->return_compression;
decoder->branch_enable = params->branch_enable;
+ decoder->hop = params->quick >= 1;
+ decoder->leap = params->quick >= 2;
decoder->flags = params->flags;
@@ -275,6 +282,9 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+ if (decoder->hop)
+ intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n");
+
return decoder;
}
@@ -1164,6 +1174,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
return 0;
if (err == -EAGAIN ||
intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
if (intel_pt_fup_event(decoder))
return 0;
return -EAGAIN;
@@ -1729,8 +1740,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_FUP:
decoder->pge = true;
- if (decoder->packet.count)
+ if (decoder->packet.count) {
intel_pt_set_last_ip(decoder);
+ if (decoder->hop) {
+ /* Act on FUP at PSBEND */
+ decoder->ip = decoder->last_ip;
+ decoder->hop_psb_fup = true;
+ }
+ }
break;
case INTEL_PT_MODE_TSX:
@@ -1874,6 +1891,127 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
}
}
+static int intel_pt_resample(struct intel_pt_decoder *decoder)
+{
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+}
+
+#define HOP_PROCESS 0
+#define HOP_IGNORE 1
+#define HOP_RETURN 2
+#define HOP_AGAIN 3
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder);
+
+/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
+static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
+{
+ /* Leap from PSB to PSB, getting ip from FUP within PSB+ */
+ if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) {
+ *err = intel_pt_scan_for_psb(decoder);
+ if (*err)
+ return HOP_RETURN;
+ }
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ return HOP_IGNORE;
+
+ case INTEL_PT_TIP_PGD:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ return HOP_RETURN;
+
+ case INTEL_PT_TIP:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+
+ case INTEL_PT_FUP:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ if (intel_pt_fup_event(decoder))
+ return HOP_RETURN;
+ if (!decoder->branch_enable)
+ *no_tip = true;
+ if (*no_tip) {
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+ }
+ *err = intel_pt_walk_fup_tip(decoder);
+ if (!*err)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ return HOP_RETURN;
+
+ case INTEL_PT_PSB:
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ decoder->hop_psb_fup = false;
+ *err = intel_pt_walk_psbend(decoder);
+ if (*err == -EAGAIN)
+ return HOP_AGAIN;
+ if (*err)
+ return HOP_RETURN;
+ if (decoder->hop_psb_fup) {
+ decoder->hop_psb_fup = false;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+ }
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
+ return HOP_RETURN;
+ }
+ return HOP_IGNORE;
+
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PIP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ default:
+ return HOP_PROCESS;
+ }
+}
+
static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
{
bool no_tip = false;
@@ -1884,6 +2022,19 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
if (err)
return err;
next:
+ if (decoder->hop) {
+ switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
+ case HOP_IGNORE:
+ continue;
+ case HOP_RETURN:
+ return err;
+ case HOP_AGAIN:
+ goto next;
+ default:
+ break;
+ }
+ }
+
switch (decoder->packet.type) {
case INTEL_PT_TNT:
if (!decoder->packet.count)
@@ -1913,6 +2064,12 @@ next:
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ /*
+ * In hop mode, resample to get the to_ip as an
+ * "instruction" sample.
+ */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
return 0;
}
@@ -1942,17 +2099,13 @@ next:
}
if (decoder->set_fup_mwait)
no_tip = true;
+ if (no_tip)
+ decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_FUP;
err = intel_pt_walk_fup(decoder);
- if (err != -EAGAIN) {
- if (err)
- return err;
- if (no_tip)
- decoder->pkt_state =
- INTEL_PT_STATE_FUP_NO_TIP;
- else
- decoder->pkt_state = INTEL_PT_STATE_FUP;
- return 0;
- }
+ if (err != -EAGAIN)
+ return err;
if (no_tip) {
no_tip = false;
break;
@@ -1980,8 +2133,10 @@ next:
* possibility of another CBR change that gets caught up
* in the PSB+.
*/
- if (decoder->cbr != decoder->cbr_seen)
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
return 0;
+ }
break;
case INTEL_PT_PIP:
@@ -2022,8 +2177,10 @@ next:
case INTEL_PT_CBR:
intel_pt_calc_cbr(decoder);
- if (decoder->cbr != decoder->cbr_seen)
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
return 0;
+ }
break;
case INTEL_PT_MODE_EXEC:
@@ -2032,7 +2189,7 @@ next:
case INTEL_PT_MODE_TSX:
/* MODE_TSX need not be followed by FUP */
- if (!decoder->pge) {
+ if (!decoder->pge || decoder->in_psb) {
intel_pt_update_in_tx(decoder);
break;
}
@@ -2423,7 +2580,11 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
if (err)
return err;
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ /* In hop mode, resample to get the to_ip as an "instruction" sample */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->overflow = false;
decoder->state.from_ip = 0;
@@ -2531,6 +2692,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
decoder->ip = 0;
intel_pt_clear_stack(&decoder->stack);
+leap:
err = intel_pt_scan_for_psb(decoder);
if (err)
return err;
@@ -2544,7 +2706,20 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
if (decoder->ip) {
decoder->state.type = 0; /* Do not have a sample */
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ /*
+ * In hop mode, resample to get the PSB FUP ip as an
+ * "instruction" sample.
+ */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ } else if (decoder->leap) {
+ /*
+ * In leap mode, only PSB+ is decoded, so keeping leaping to the
+ * next PSB until there is an ip.
+ */
+ goto leap;
} else {
return intel_pt_sync_ip(decoder);
}
@@ -2599,19 +2774,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
err = intel_pt_walk_tip(decoder);
break;
case INTEL_PT_STATE_FUP:
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
err = intel_pt_walk_fup(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_fup_tip(decoder);
- else if (!err)
- decoder->pkt_state = INTEL_PT_STATE_FUP;
break;
case INTEL_PT_STATE_FUP_NO_TIP:
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
err = intel_pt_walk_fup(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_trace(decoder);
break;
+ case INTEL_PT_STATE_RESAMPLE:
+ err = intel_pt_resample(decoder);
+ break;
default:
err = intel_pt_bug(decoder);
break;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index e289e463d635..8645fc265481 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -250,6 +250,7 @@ struct intel_pt_params {
uint32_t tsc_ctc_ratio_n;
uint32_t tsc_ctc_ratio_d;
enum intel_pt_param_flags flags;
+ unsigned int quick;
};
struct intel_pt_decoder;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 0ccf10a0bf44..4ce109993e74 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -552,7 +552,7 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
break;
default:
break;
- };
+ }
if (!(byte & BIT(0))) {
if (byte == 0)
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 23c8289c2472..3a0348caec7d 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -33,6 +33,7 @@
#include "tsc.h"
#include "intel-pt.h"
#include "config.h"
+#include "util/perf_api_probe.h"
#include "util/synthetic-events.h"
#include "time-utils.h"
@@ -68,6 +69,10 @@ struct intel_pt {
bool est_tsc;
bool sync_switch;
bool mispred_all;
+ bool use_thread_stack;
+ bool callstack;
+ unsigned int br_stack_sz;
+ unsigned int br_stack_sz_plus;
int have_sched_switch;
u32 pmu_type;
u64 kernel_start;
@@ -124,6 +129,9 @@ struct intel_pt {
struct range *time_ranges;
unsigned int range_cnt;
+
+ struct ip_callchain *chain;
+ struct branch_stack *br_stack;
};
enum switch_state {
@@ -143,8 +151,6 @@ struct intel_pt_queue {
const struct intel_pt_state *state;
struct ip_callchain *chain;
struct branch_stack *last_branch;
- struct branch_stack *last_branch_rb;
- size_t last_branch_pos;
union perf_event *event_buf;
bool on_heap;
bool stop;
@@ -230,7 +236,7 @@ static void intel_pt_log_event(union perf_event *event)
if (!intel_pt_enable_logging || !f)
return;
- perf_event__fprintf(event, f);
+ perf_event__fprintf(event, NULL, f);
}
static void intel_pt_dump_sample(struct perf_session *session,
@@ -243,6 +249,24 @@ static void intel_pt_dump_sample(struct perf_session *session,
intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size);
}
+static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
+{
+ struct perf_time_interval *range = pt->synth_opts.ptime_range;
+ int n = pt->synth_opts.range_num;
+
+ if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+ return true;
+
+ if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+ return false;
+
+ /* perf_time__ranges_skip_sample does not work if time is zero */
+ if (!tm)
+ tm = 1;
+
+ return !n || !perf_time__ranges_skip_sample(range, n, tm);
+}
+
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
struct auxtrace_buffer *b)
{
@@ -514,6 +538,17 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
}
+static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
+ u64 offset)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+ if (!c)
+ return;
+
+ auxtrace_cache__remove(dso->auxtrace_cache, offset);
+}
+
static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
{
return ip >= pt->kernel_start ?
@@ -868,6 +903,86 @@ static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
pt->tc.time_mult;
}
+static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt)
+{
+ size_t sz = sizeof(struct ip_callchain);
+
+ /* Add 1 to callchain_sz for callchain context */
+ sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
+ return zalloc(sz);
+}
+
+static int intel_pt_callchain_init(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN))
+ evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN;
+ }
+
+ pt->chain = intel_pt_alloc_chain(pt);
+ if (!pt->chain)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void intel_pt_add_callchain(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(pt->machine,
+ sample->pid,
+ sample->tid);
+
+ thread_stack__sample_late(thread, sample->cpu, pt->chain,
+ pt->synth_opts.callchain_sz + 1, sample->ip,
+ pt->kernel_start);
+
+ sample->callchain = pt->chain;
+}
+
+static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt)
+{
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += entry_cnt * sizeof(struct branch_entry);
+ return zalloc(sz);
+}
+
+static int intel_pt_br_stack_init(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+ evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ }
+
+ pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz);
+ if (!pt->br_stack)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void intel_pt_add_br_stack(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(pt->machine,
+ sample->pid,
+ sample->tid);
+
+ thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
+ pt->br_stack_sz, sample->ip,
+ pt->kernel_start);
+
+ sample->branch_stack = pt->br_stack;
+}
+
+/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
+#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
+
static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
unsigned int queue_nr)
{
@@ -880,26 +995,17 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
return NULL;
if (pt->synth_opts.callchain) {
- size_t sz = sizeof(struct ip_callchain);
-
- /* Add 1 to callchain_sz for callchain context */
- sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
- ptq->chain = zalloc(sz);
+ ptq->chain = intel_pt_alloc_chain(pt);
if (!ptq->chain)
goto out_free;
}
- if (pt->synth_opts.last_branch) {
- size_t sz = sizeof(struct branch_stack);
+ if (pt->synth_opts.last_branch || pt->synth_opts.other_events) {
+ unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz);
- sz += pt->synth_opts.last_branch_sz *
- sizeof(struct branch_entry);
- ptq->last_branch = zalloc(sz);
+ ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt);
if (!ptq->last_branch)
goto out_free;
- ptq->last_branch_rb = zalloc(sz);
- if (!ptq->last_branch_rb)
- goto out_free;
}
ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
@@ -924,6 +1030,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.mtc_period = intel_pt_mtc_period(pt);
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+ params.quick = pt->synth_opts.quick;
if (pt->filts.cnt > 0)
params.pgd_ip = intel_pt_pgd_ip;
@@ -968,7 +1075,6 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
out_free:
zfree(&ptq->event_buf);
zfree(&ptq->last_branch);
- zfree(&ptq->last_branch_rb);
zfree(&ptq->chain);
free(ptq);
return NULL;
@@ -984,7 +1090,6 @@ static void intel_pt_free_queue(void *priv)
intel_pt_decoder_free(ptq->decoder);
zfree(&ptq->event_buf);
zfree(&ptq->last_branch);
- zfree(&ptq->last_branch_rb);
zfree(&ptq->chain);
free(ptq);
}
@@ -996,6 +1101,8 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
if (queue->tid == -1 || pt->have_sched_switch) {
ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
+ if (ptq->tid == -1)
+ ptq->pid = -1;
thread__zput(ptq->thread);
}
@@ -1152,58 +1259,6 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
return 0;
}
-static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
-{
- struct branch_stack *bs_src = ptq->last_branch_rb;
- struct branch_stack *bs_dst = ptq->last_branch;
- size_t nr = 0;
-
- bs_dst->nr = bs_src->nr;
-
- if (!bs_src->nr)
- return;
-
- nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
- memcpy(&bs_dst->entries[0],
- &bs_src->entries[ptq->last_branch_pos],
- sizeof(struct branch_entry) * nr);
-
- if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
- memcpy(&bs_dst->entries[nr],
- &bs_src->entries[0],
- sizeof(struct branch_entry) * ptq->last_branch_pos);
- }
-}
-
-static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
-{
- ptq->last_branch_pos = 0;
- ptq->last_branch_rb->nr = 0;
-}
-
-static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
-{
- const struct intel_pt_state *state = ptq->state;
- struct branch_stack *bs = ptq->last_branch_rb;
- struct branch_entry *be;
-
- if (!ptq->last_branch_pos)
- ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
-
- ptq->last_branch_pos -= 1;
-
- be = &bs->entries[ptq->last_branch_pos];
- be->from = state->from_ip;
- be->to = state->to_ip;
- be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
- be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
- /* No support for mispredict */
- be->flags.mispred = ptq->pt->mispred_all;
-
- if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
- bs->nr += 1;
-}
-
static inline bool intel_pt_skip_event(struct intel_pt *pt)
{
return pt->synth_opts.initial_skip &&
@@ -1271,9 +1326,9 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt,
return intel_pt_inject_event(event, sample, type);
}
-static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
- union perf_event *event,
- struct perf_sample *sample, u64 type)
+static int intel_pt_deliver_synth_event(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample, u64 type)
{
int ret;
@@ -1333,8 +1388,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
}
- return intel_pt_deliver_synth_b_event(pt, event, &sample,
- pt->branches_sample_type);
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->branches_sample_type);
}
static void intel_pt_prep_sample(struct intel_pt *pt,
@@ -1352,27 +1407,12 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
}
if (pt->synth_opts.last_branch) {
- intel_pt_copy_last_branch_rb(ptq);
+ thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch,
+ pt->br_stack_sz);
sample->branch_stack = ptq->last_branch;
}
}
-static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
- struct intel_pt_queue *ptq,
- union perf_event *event,
- struct perf_sample *sample,
- u64 type)
-{
- int ret;
-
- ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
-
- if (pt->synth_opts.last_branch)
- intel_pt_reset_last_branch_rb(ptq);
-
- return ret;
-}
-
static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
@@ -1386,7 +1426,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.id = ptq->pt->instructions_id;
sample.stream_id = ptq->pt->instructions_id;
- sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+ if (pt->synth_opts.quick)
+ sample.period = 1;
+ else
+ sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
if (sample.cyc_cnt) {
@@ -1397,7 +1440,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->instructions_sample_type);
}
@@ -1415,7 +1458,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
sample.id = ptq->pt->transactions_id;
sample.stream_id = ptq->pt->transactions_id;
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->transactions_sample_type);
}
@@ -1456,7 +1499,7 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->ptwrites_sample_type);
}
@@ -1486,7 +1529,7 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1511,7 +1554,7 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1536,7 +1579,7 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1561,7 +1604,7 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1586,7 +1629,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1680,15 +1723,14 @@ static u64 intel_pt_lbr_flags(u64 info)
union {
struct branch_flags flags;
u64 result;
- } u = {
- .flags = {
- .mispred = !!(info & LBR_INFO_MISPRED),
- .predicted = !(info & LBR_INFO_MISPRED),
- .in_tx = !!(info & LBR_INFO_IN_TX),
- .abort = !!(info & LBR_INFO_ABORT),
- .cycles = info & LBR_INFO_CYCLES,
- }
- };
+ } u;
+
+ u.result = 0;
+ u.flags.mispred = !!(info & LBR_INFO_MISPRED);
+ u.flags.predicted = !(info & LBR_INFO_MISPRED);
+ u.flags.in_tx = !!(info & LBR_INFO_IN_TX);
+ u.flags.abort = !!(info & LBR_INFO_ABORT);
+ u.flags.cycles = info & LBR_INFO_CYCLES;
return u.result;
}
@@ -1718,9 +1760,6 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
}
}
-/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
-#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3)
-
static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
@@ -1731,6 +1770,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
u64 sample_type = evsel->core.attr.sample_type;
u64 id = evsel->core.id[0];
u8 cpumode;
+ u64 regs[8 * sizeof(sample.intr_regs.mask)];
if (intel_pt_skip_event(pt))
return 0;
@@ -1780,8 +1820,8 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
}
if (sample_type & PERF_SAMPLE_REGS_INTR &&
- items->mask[INTEL_PT_GP_REGS_POS]) {
- u64 regs[sizeof(sample.intr_regs.mask)];
+ (items->mask[INTEL_PT_GP_REGS_POS] ||
+ items->mask[INTEL_PT_XMM_POS])) {
u64 regs_mask = evsel->core.attr.sample_regs_intr;
u64 *pos;
@@ -1796,23 +1836,18 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
- struct {
- struct branch_stack br_stack;
- struct branch_entry entries[LBRS_MAX];
- } br;
-
if (items->mask[INTEL_PT_LBR_0_POS] ||
items->mask[INTEL_PT_LBR_1_POS] ||
items->mask[INTEL_PT_LBR_2_POS]) {
- intel_pt_add_lbrs(&br.br_stack, items);
- sample.branch_stack = &br.br_stack;
+ intel_pt_add_lbrs(ptq->last_branch, items);
} else if (pt->synth_opts.last_branch) {
- intel_pt_copy_last_branch_rb(ptq);
- sample.branch_stack = ptq->last_branch;
+ thread_stack__br_sample(ptq->thread, ptq->cpu,
+ ptq->last_branch,
+ pt->br_stack_sz);
} else {
- br.br_stack.nr = 0;
- sample.branch_stack = &br.br_stack;
+ ptq->last_branch->nr = 0;
}
+ sample.branch_stack = ptq->last_branch;
}
if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
@@ -1842,7 +1877,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
sample.transaction = txn;
}
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
+ return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
@@ -1852,6 +1887,15 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
char msg[MAX_AUXTRACE_ERROR_MSG];
int err;
+ if (pt->synth_opts.error_minus_flags) {
+ if (code == INTEL_PT_ERR_OVR &&
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW)
+ return 0;
+ if (code == INTEL_PT_ERR_LOST &&
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST)
+ return 0;
+ }
+
intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
@@ -1992,12 +2036,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
if (!(state->type & INTEL_PT_BRANCH))
return 0;
- if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
- thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
- state->to_ip, ptq->insn_len,
- state->trace_nr);
- else
+ if (pt->use_thread_stack) {
+ thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
+ state->from_ip, state->to_ip, ptq->insn_len,
+ state->trace_nr, pt->callstack,
+ pt->br_stack_sz_plus,
+ pt->mispred_all);
+ } else {
thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
+ }
if (pt->sample_branches) {
err = intel_pt_synth_branch_sample(ptq);
@@ -2005,9 +2052,6 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return err;
}
- if (pt->synth_opts.last_branch)
- intel_pt_update_last_branch_rb(ptq);
-
if (!ptq->sync_switch)
return 0;
@@ -2484,7 +2528,7 @@ static int intel_pt_process_switch(struct intel_pt *pt,
if (evsel != pt->switch_evsel)
return 0;
- tid = perf_evsel__intval(evsel, sample, "next_pid");
+ tid = evsel__intval(evsel, sample, "next_pid");
cpu = sample->cpu;
intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
@@ -2561,14 +2605,8 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
tid = sample->tid;
}
- if (tid == -1) {
- pr_err("context_switch event has no tid\n");
- return -EINVAL;
- }
-
- intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
- cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
- &pt->tc));
+ if (tid == -1)
+ intel_pt_log("context_switch event has no tid\n");
ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
if (ret <= 0)
@@ -2594,6 +2632,67 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
event->itrace_start.tid);
}
+static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ if (!al->map || addr < al->map->start || addr >= al->map->end) {
+ if (!thread__find_map(thread, cpumode, addr, al))
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Invalidate all instruction cache entries that overlap the text poke */
+static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
+{
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u64 addr = event->text_poke.addr + event->text_poke.new_len - 1;
+ /* Assume text poke begins in a basic block no more than 4096 bytes */
+ int cnt = 4096 + event->text_poke.new_len;
+ struct thread *thread = pt->unknown_thread;
+ struct addr_location al = { .map = NULL };
+ struct machine *machine = pt->machine;
+ struct intel_pt_cache_entry *e;
+ u64 offset;
+
+ if (!event->text_poke.new_len)
+ return 0;
+
+ for (; cnt; cnt--, addr--) {
+ if (intel_pt_find_map(thread, cpumode, addr, &al)) {
+ if (addr < event->text_poke.addr)
+ return 0;
+ continue;
+ }
+
+ if (!al.map->dso || !al.map->dso->auxtrace_cache)
+ continue;
+
+ offset = al.map->map_ip(al.map, addr);
+
+ e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+ if (!e)
+ continue;
+
+ if (addr + e->byte_cnt + e->length <= event->text_poke.addr) {
+ /*
+ * No overlap. Working backwards there cannot be another
+ * basic block that overlaps the text poke if there is a
+ * branch instruction before the text poke address.
+ */
+ if (e->branch != INTEL_PT_BR_NO_BRANCH)
+ return 0;
+ } else {
+ intel_pt_cache_invalidate(al.map->dso, machine, offset);
+ intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
+ al.map->dso->long_name, addr);
+ }
+ }
+
+ return 0;
+}
+
static int intel_pt_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
@@ -2639,6 +2738,13 @@ static int intel_pt_process_event(struct perf_session *session,
if (err)
return err;
+ if (event->header.type == PERF_RECORD_SAMPLE) {
+ if (pt->synth_opts.add_callchain && !sample->callchain)
+ intel_pt_add_callchain(pt, sample);
+ if (pt->synth_opts.add_last_branch && !sample->branch_stack)
+ intel_pt_add_br_stack(pt, sample);
+ }
+
if (event->header.type == PERF_RECORD_AUX &&
(event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
pt->synth_opts.errors) {
@@ -2655,9 +2761,14 @@ static int intel_pt_process_event(struct perf_session *session,
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
err = intel_pt_context_switch(pt, event, sample);
- intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
- event->header.type, sample->cpu, sample->time, timestamp);
- intel_pt_log_event(event);
+ if (!err && event->header.type == PERF_RECORD_TEXT_POKE)
+ err = intel_pt_text_poke(pt, event);
+
+ if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) {
+ intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+ event->header.type, sample->cpu, sample->time, timestamp);
+ intel_pt_log_event(event);
+ }
return err;
}
@@ -2710,11 +2821,21 @@ static void intel_pt_free(struct perf_session *session)
session->auxtrace = NULL;
thread__put(pt->unknown_thread);
addr_filters__exit(&pt->filts);
+ zfree(&pt->chain);
zfree(&pt->filter);
zfree(&pt->time_ranges);
free(pt);
}
+static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ return evsel->core.attr.type == pt->pmu_type;
+}
+
static int intel_pt_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool __maybe_unused)
@@ -2896,8 +3017,15 @@ static int intel_pt_synth_events(struct intel_pt *pt,
if (pt->synth_opts.callchain)
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
- if (pt->synth_opts.last_branch)
+ if (pt->synth_opts.last_branch) {
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
if (pt->synth_opts.instructions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
@@ -3016,7 +3144,7 @@ static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry_reverse(evlist, evsel) {
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
if (!strcmp(name, "sched:sched_switch"))
return evsel;
@@ -3310,6 +3438,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->auxtrace.flush_events = intel_pt_flush;
pt->auxtrace.free_events = intel_pt_free_events;
pt->auxtrace.free = intel_pt_free;
+ pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace;
session->auxtrace = &pt->auxtrace;
if (dump_trace)
@@ -3338,6 +3467,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
!session->itrace_synth_opts->inject) {
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;
+ pt->synth_opts.add_callchain = true;
}
pt->synth_opts.thread_stack =
session->itrace_synth_opts->thread_stack;
@@ -3370,14 +3500,54 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->branches_filter |= PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_TRACE_BEGIN;
- if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
+ if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) &&
+ !symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
if (callchain_register_param(&callchain_param) < 0) {
symbol_conf.use_callchain = false;
pt->synth_opts.callchain = false;
+ pt->synth_opts.add_callchain = false;
}
}
+ if (pt->synth_opts.add_callchain) {
+ err = intel_pt_callchain_init(pt);
+ if (err)
+ goto err_delete_thread;
+ }
+
+ if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
+ pt->br_stack_sz = pt->synth_opts.last_branch_sz;
+ pt->br_stack_sz_plus = pt->br_stack_sz;
+ }
+
+ if (pt->synth_opts.add_last_branch) {
+ err = intel_pt_br_stack_init(pt);
+ if (err)
+ goto err_delete_thread;
+ /*
+ * Additional branch stack size to cater for tracing from the
+ * actual sample ip to where the sample time is recorded.
+ * Measured at about 200 branches, but generously set to 1024.
+ * If kernel space is not being traced, then add just 1 for the
+ * branch to kernel space.
+ */
+ if (intel_pt_tracing_kernel(pt))
+ pt->br_stack_sz_plus += 1024;
+ else
+ pt->br_stack_sz_plus += 1;
+ }
+
+ pt->use_thread_stack = pt->synth_opts.callchain ||
+ pt->synth_opts.add_callchain ||
+ pt->synth_opts.thread_stack ||
+ pt->synth_opts.last_branch ||
+ pt->synth_opts.add_last_branch;
+
+ pt->callstack = pt->synth_opts.callchain ||
+ pt->synth_opts.add_callchain ||
+ pt->synth_opts.thread_stack;
+
err = intel_pt_synth_events(pt, session);
if (err)
goto err_delete_thread;
@@ -3400,6 +3570,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
return 0;
err_delete_thread:
+ zfree(&pt->chain);
thread__zput(pt->unknown_thread);
err_free_queues:
intel_pt_log_disable();
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index e3ccb0ce1938..055bab7a92b3 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -26,6 +26,7 @@
#include "jit.h"
#include "jitdump.h"
#include "genelf.h"
+#include "thread.h"
#include <linux/ctype.h>
#include <linux/zalloc.h>
@@ -57,7 +58,7 @@ struct debug_line_info {
unsigned long vma;
unsigned int lineno;
/* The filename format is unspecified, absolute path, relative etc. */
- char const filename[0];
+ char const filename[];
};
struct jit_tool {
@@ -373,11 +374,15 @@ static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
if (!jd->use_arch_timestamp)
return timestamp;
- tc.time_shift = jd->session->time_conv.time_shift;
- tc.time_mult = jd->session->time_conv.time_mult;
- tc.time_zero = jd->session->time_conv.time_zero;
+ tc.time_shift = jd->session->time_conv.time_shift;
+ tc.time_mult = jd->session->time_conv.time_mult;
+ tc.time_zero = jd->session->time_conv.time_zero;
+ tc.time_cycles = jd->session->time_conv.time_cycles;
+ tc.time_mask = jd->session->time_conv.time_mask;
+ tc.cap_user_time_zero = jd->session->time_conv.cap_user_time_zero;
+ tc.cap_user_time_short = jd->session->time_conv.cap_user_time_short;
- if (!tc.time_mult)
+ if (!tc.cap_user_time_zero)
return 0;
return tsc_to_perf_time(timestamp, &tc);
@@ -749,6 +754,28 @@ jit_detect(char *mmap_name, pid_t pid)
return 0;
}
+static void jit_add_pid(struct machine *machine, pid_t pid)
+{
+ struct thread *thread = machine__findnew_thread(machine, pid, pid);
+
+ if (!thread) {
+ pr_err("%s: thread %d not found or created\n", __func__, pid);
+ return;
+ }
+
+ thread->priv = (void *)1;
+}
+
+static bool jit_has_pid(struct machine *machine, pid_t pid)
+{
+ struct thread *thread = machine__find_thread(machine, pid, pid);
+
+ if (!thread)
+ return 0;
+
+ return (bool)thread->priv;
+}
+
int
jit_process(struct perf_session *session,
struct perf_data *output,
@@ -764,8 +791,13 @@ jit_process(struct perf_session *session,
/*
* first, detect marker mmap (i.e., the jitdump mmap)
*/
- if (jit_detect(filename, pid))
+ if (jit_detect(filename, pid)) {
+ // Strip //anon* mmaps if we processed a jitdump for this pid
+ if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0))
+ return 1;
+
return 0;
+ }
memset(&jd, 0, sizeof(jd));
@@ -784,6 +816,7 @@ jit_process(struct perf_session *session,
ret = jit_inject(&jd, filename);
if (!ret) {
+ jit_add_pid(machine, pid);
*nbytes = jd.bytes_written;
ret = 1;
}
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
index f2c3823cc81a..ab2842def83d 100644
--- a/tools/perf/util/jitdump.h
+++ b/tools/perf/util/jitdump.h
@@ -93,7 +93,7 @@ struct debug_entry {
uint64_t addr;
int lineno; /* source line number starting at 1 */
int discrim; /* column discriminator, 0 is default */
- const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */
+ const char name[]; /* null terminated filename, \xff\0 if same as previous entry */
};
struct jr_code_debug_info {
@@ -101,7 +101,7 @@ struct jr_code_debug_info {
uint64_t code_addr;
uint64_t nr_entry;
- struct debug_entry entries[0];
+ struct debug_entry entries[];
};
struct jr_code_unwinding_info {
@@ -110,7 +110,7 @@ struct jr_code_unwinding_info {
uint64_t unwinding_size;
uint64_t eh_frame_hdr_size;
uint64_t mapped_size;
- const char unwinding_data[0];
+ const char unwinding_data[];
};
union jr_entry {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 97142e9671be..7d4194ffc5b0 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -703,7 +703,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
dso__set_module_info(dso, m, machine);
dso__set_long_name(dso, strdup(filename), true);
- dso->kernel = DSO_TYPE_KERNEL;
+ dso->kernel = DSO_SPACE__KERNEL;
}
dso__get(dso);
@@ -747,7 +747,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
struct dso *dso = dso__new(event->ksymbol.name);
if (dso) {
- dso->kernel = DSO_TYPE_KERNEL;
+ dso->kernel = DSO_SPACE__KERNEL;
map = map__new2(0, dso);
}
@@ -756,9 +756,21 @@ static int machine__process_ksymbol_register(struct machine *machine,
return -ENOMEM;
}
+ if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) {
+ map->dso->binary_type = DSO_BINARY_TYPE__OOL;
+ map->dso->data.file_size = event->ksymbol.len;
+ dso__set_loaded(map->dso);
+ }
+
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
maps__insert(&machine->kmaps, map);
+ dso__set_loaded(dso);
+
+ if (is_bpf_image(event->ksymbol.name)) {
+ dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE;
+ dso__set_long_name(dso, "", false);
+ }
}
sym = symbol__new(map->map_ip(map, map->start),
@@ -796,6 +808,47 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
return machine__process_ksymbol_register(machine, event, sample);
}
+int machine__process_text_poke(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct map *map = maps__find(&machine->kmaps, event->text_poke.addr);
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ if (dump_trace)
+ perf_event__fprintf_text_poke(event, machine, stdout);
+
+ if (!event->text_poke.new_len)
+ return 0;
+
+ if (cpumode != PERF_RECORD_MISC_KERNEL) {
+ pr_debug("%s: unsupported cpumode - ignoring\n", __func__);
+ return 0;
+ }
+
+ if (map && map->dso) {
+ u8 *new_bytes = event->text_poke.bytes + event->text_poke.old_len;
+ int ret;
+
+ /*
+ * Kernel maps might be changed when loading symbols so loading
+ * must be done prior to using kernel maps.
+ */
+ map__load(map);
+ ret = dso__data_write_cache_addr(map->dso, map, machine,
+ event->text_poke.addr,
+ new_bytes,
+ event->text_poke.new_len);
+ if (ret != event->text_poke.new_len)
+ pr_debug("Failed to write kernel text poke at %#" PRI_lx64 "\n",
+ event->text_poke.addr);
+ } else {
+ pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n",
+ event->text_poke.addr);
+ }
+
+ return 0;
+}
+
static struct map *machine__addnew_module_map(struct machine *machine, u64 start,
const char *filename)
{
@@ -912,14 +965,14 @@ static struct dso *machine__get_kernel(struct machine *machine)
vmlinux_name = symbol_conf.vmlinux_name;
kernel = machine__findnew_kernel(machine, vmlinux_name,
- "[kernel]", DSO_TYPE_KERNEL);
+ "[kernel]", DSO_SPACE__KERNEL);
} else {
if (symbol_conf.default_guest_vmlinux_name)
vmlinux_name = symbol_conf.default_guest_vmlinux_name;
kernel = machine__findnew_kernel(machine, vmlinux_name,
"[guest.kernel]",
- DSO_TYPE_GUEST_KERNEL);
+ DSO_SPACE__KERNEL_GUEST);
}
if (kernel != NULL && (!kernel->has_build_id))
@@ -1547,7 +1600,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
union perf_event *event)
{
struct map *map;
- enum dso_kernel_type kernel_type;
+ enum dso_space_type dso_space;
bool is_kernel_mmap;
/* If we have maps from kcore then we do not need or want any others */
@@ -1555,9 +1608,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
return 0;
if (machine__is_host(machine))
- kernel_type = DSO_TYPE_KERNEL;
+ dso_space = DSO_SPACE__KERNEL;
else
- kernel_type = DSO_TYPE_GUEST_KERNEL;
+ dso_space = DSO_SPACE__KERNEL_GUEST;
is_kernel_mmap = memcmp(event->mmap.filename,
machine->mmap_name,
@@ -1617,7 +1670,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (kernel == NULL)
goto out_problem;
- kernel->kernel = kernel_type;
+ kernel->kernel = dso_space;
if (__machine__create_kernel_maps(machine, kernel) < 0) {
dso__put(kernel);
goto out_problem;
@@ -1918,6 +1971,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_ksymbol(machine, event, sample); break;
case PERF_RECORD_BPF_EVENT:
ret = machine__process_bpf(machine, event, sample); break;
+ case PERF_RECORD_TEXT_POKE:
+ ret = machine__process_text_poke(machine, event, sample); break;
default:
ret = -1;
break;
@@ -2178,6 +2233,303 @@ static int remove_loops(struct branch_entry *l, int nr,
return nr;
}
+static int lbr_callchain_add_kernel_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u64 branch_from,
+ bool callee, int end)
+{
+ struct ip_callchain *chain = sample->callchain;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int err, i;
+
+ if (callee) {
+ for (i = 0; i < end + 1; i++) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, chain->ips[i],
+ false, NULL, NULL, branch_from);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ for (i = end; i >= 0; i--) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, chain->ips[i],
+ false, NULL, NULL, branch_from);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void save_lbr_cursor_node(struct thread *thread,
+ struct callchain_cursor *cursor,
+ int idx)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+
+ if (!lbr_stitch)
+ return;
+
+ if (cursor->pos == cursor->nr) {
+ lbr_stitch->prev_lbr_cursor[idx].valid = false;
+ return;
+ }
+
+ if (!cursor->curr)
+ cursor->curr = cursor->first;
+ else
+ cursor->curr = cursor->curr->next;
+ memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
+ sizeof(struct callchain_cursor_node));
+
+ lbr_stitch->prev_lbr_cursor[idx].valid = true;
+ cursor->pos++;
+}
+
+static int lbr_callchain_add_lbr_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u64 *branch_from,
+ bool callee)
+{
+ struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int lbr_nr = lbr_stack->nr;
+ struct branch_flags *flags;
+ int err, i;
+ u64 ip;
+
+ /*
+ * The curr and pos are not used in writing session. They are cleared
+ * in callchain_cursor_commit() when the writing session is closed.
+ * Using curr and pos to track the current cursor node.
+ */
+ if (thread->lbr_stitch) {
+ cursor->curr = NULL;
+ cursor->pos = cursor->nr;
+ if (cursor->nr) {
+ cursor->curr = cursor->first;
+ for (i = 0; i < (int)(cursor->nr - 1); i++)
+ cursor->curr = cursor->curr->next;
+ }
+ }
+
+ if (callee) {
+ /* Add LBR ip from first entries.to */
+ ip = entries[0].to;
+ flags = &entries[0].flags;
+ *branch_from = entries[0].from;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+
+ /*
+ * The number of cursor node increases.
+ * Move the current cursor node.
+ * But does not need to save current cursor node for entry 0.
+ * It's impossible to stitch the whole LBRs of previous sample.
+ */
+ if (thread->lbr_stitch && (cursor->pos != cursor->nr)) {
+ if (!cursor->curr)
+ cursor->curr = cursor->first;
+ else
+ cursor->curr = cursor->curr->next;
+ cursor->pos++;
+ }
+
+ /* Add LBR ip from entries.from one by one. */
+ for (i = 0; i < lbr_nr; i++) {
+ ip = entries[i].from;
+ flags = &entries[i].flags;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+ save_lbr_cursor_node(thread, cursor, i);
+ }
+ return 0;
+ }
+
+ /* Add LBR ip from entries.from one by one. */
+ for (i = lbr_nr - 1; i >= 0; i--) {
+ ip = entries[i].from;
+ flags = &entries[i].flags;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+ save_lbr_cursor_node(thread, cursor, i);
+ }
+
+ /* Add LBR ip from first entries.to */
+ ip = entries[0].to;
+ flags = &entries[0].flags;
+ *branch_from = entries[0].from;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
+ struct callchain_cursor *cursor)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct callchain_cursor_node *cnode;
+ struct stitch_list *stitch_node;
+ int err;
+
+ list_for_each_entry(stitch_node, &lbr_stitch->lists, node) {
+ cnode = &stitch_node->cursor;
+
+ err = callchain_cursor_append(cursor, cnode->ip,
+ &cnode->ms,
+ cnode->branch,
+ &cnode->branch_flags,
+ cnode->nr_loop_iter,
+ cnode->iter_cycles,
+ cnode->branch_from,
+ cnode->srcline);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static struct stitch_list *get_stitch_node(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct stitch_list *stitch_node;
+
+ if (!list_empty(&lbr_stitch->free_lists)) {
+ stitch_node = list_first_entry(&lbr_stitch->free_lists,
+ struct stitch_list, node);
+ list_del(&stitch_node->node);
+
+ return stitch_node;
+ }
+
+ return malloc(sizeof(struct stitch_list));
+}
+
+static bool has_stitched_lbr(struct thread *thread,
+ struct perf_sample *cur,
+ struct perf_sample *prev,
+ unsigned int max_lbr,
+ bool callee)
+{
+ struct branch_stack *cur_stack = cur->branch_stack;
+ struct branch_entry *cur_entries = perf_sample__branch_entries(cur);
+ struct branch_stack *prev_stack = prev->branch_stack;
+ struct branch_entry *prev_entries = perf_sample__branch_entries(prev);
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ int i, j, nr_identical_branches = 0;
+ struct stitch_list *stitch_node;
+ u64 cur_base, distance;
+
+ if (!cur_stack || !prev_stack)
+ return false;
+
+ /* Find the physical index of the base-of-stack for current sample. */
+ cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1;
+
+ distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) :
+ (max_lbr + prev_stack->hw_idx - cur_base);
+ /* Previous sample has shorter stack. Nothing can be stitched. */
+ if (distance + 1 > prev_stack->nr)
+ return false;
+
+ /*
+ * Check if there are identical LBRs between two samples.
+ * Identicall LBRs must have same from, to and flags values. Also,
+ * they have to be saved in the same LBR registers (same physical
+ * index).
+ *
+ * Starts from the base-of-stack of current sample.
+ */
+ for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) {
+ if ((prev_entries[i].from != cur_entries[j].from) ||
+ (prev_entries[i].to != cur_entries[j].to) ||
+ (prev_entries[i].flags.value != cur_entries[j].flags.value))
+ break;
+ nr_identical_branches++;
+ }
+
+ if (!nr_identical_branches)
+ return false;
+
+ /*
+ * Save the LBRs between the base-of-stack of previous sample
+ * and the base-of-stack of current sample into lbr_stitch->lists.
+ * These LBRs will be stitched later.
+ */
+ for (i = prev_stack->nr - 1; i > (int)distance; i--) {
+
+ if (!lbr_stitch->prev_lbr_cursor[i].valid)
+ continue;
+
+ stitch_node = get_stitch_node(thread);
+ if (!stitch_node)
+ return false;
+
+ memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
+ sizeof(struct callchain_cursor_node));
+
+ if (callee)
+ list_add(&stitch_node->node, &lbr_stitch->lists);
+ else
+ list_add_tail(&stitch_node->node, &lbr_stitch->lists);
+ }
+
+ return true;
+}
+
+static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
+{
+ if (thread->lbr_stitch)
+ return true;
+
+ thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch));
+ if (!thread->lbr_stitch)
+ goto err;
+
+ thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node));
+ if (!thread->lbr_stitch->prev_lbr_cursor)
+ goto free_lbr_stitch;
+
+ INIT_LIST_HEAD(&thread->lbr_stitch->lists);
+ INIT_LIST_HEAD(&thread->lbr_stitch->free_lists);
+
+ return true;
+
+free_lbr_stitch:
+ zfree(&thread->lbr_stitch);
+err:
+ pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n");
+ thread->lbr_stitch_enable = false;
+ return false;
+}
+
/*
* Recolve LBR callstack chain sample
* Return:
@@ -2190,12 +2542,16 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
- int max_stack)
+ int max_stack,
+ unsigned int max_lbr)
{
+ bool callee = (callchain_param.order == ORDER_CALLEE);
struct ip_callchain *chain = sample->callchain;
int chain_nr = min(max_stack, (int)chain->nr), i;
- u8 cpumode = PERF_RECORD_MISC_USER;
- u64 ip, branch_from = 0;
+ struct lbr_stitch *lbr_stitch;
+ bool stitched_lbr = false;
+ u64 branch_from = 0;
+ int err;
for (i = 0; i < chain_nr; i++) {
if (chain->ips[i] == PERF_CONTEXT_USER)
@@ -2203,71 +2559,65 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
}
/* LBR only affects the user callchain */
- if (i != chain_nr) {
- struct branch_stack *lbr_stack = sample->branch_stack;
- struct branch_entry *entries = perf_sample__branch_entries(sample);
- int lbr_nr = lbr_stack->nr, j, k;
- bool branch;
- struct branch_flags *flags;
- /*
- * LBR callstack can only get user call chain.
- * The mix_chain_nr is kernel call chain
- * number plus LBR user call chain number.
- * i is kernel call chain number,
- * 1 is PERF_CONTEXT_USER,
- * lbr_nr + 1 is the user call chain number.
- * For details, please refer to the comments
- * in callchain__printf
- */
- int mix_chain_nr = i + 1 + lbr_nr + 1;
+ if (i == chain_nr)
+ return 0;
- for (j = 0; j < mix_chain_nr; j++) {
- int err;
- branch = false;
- flags = NULL;
+ if (thread->lbr_stitch_enable && !sample->no_hw_idx &&
+ (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
+ lbr_stitch = thread->lbr_stitch;
- if (callchain_param.order == ORDER_CALLEE) {
- if (j < i + 1)
- ip = chain->ips[j];
- else if (j > i + 1) {
- k = j - i - 2;
- ip = entries[k].from;
- branch = true;
- flags = &entries[k].flags;
- } else {
- ip = entries[0].to;
- branch = true;
- flags = &entries[0].flags;
- branch_from = entries[0].from;
- }
- } else {
- if (j < lbr_nr) {
- k = lbr_nr - j - 1;
- ip = entries[k].from;
- branch = true;
- flags = &entries[k].flags;
- }
- else if (j > lbr_nr)
- ip = chain->ips[i + 1 - (j - lbr_nr)];
- else {
- ip = entries[0].to;
- branch = true;
- flags = &entries[0].flags;
- branch_from = entries[0].from;
- }
- }
+ stitched_lbr = has_stitched_lbr(thread, sample,
+ &lbr_stitch->prev_sample,
+ max_lbr, callee);
- err = add_callchain_ip(thread, cursor, parent,
- root_al, &cpumode, ip,
- branch, flags, NULL,
- branch_from);
+ if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
+ list_replace_init(&lbr_stitch->lists,
+ &lbr_stitch->free_lists);
+ }
+ memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
+ }
+
+ if (callee) {
+ /* Add kernel ip */
+ err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+ parent, root_al, branch_from,
+ true, i);
+ if (err)
+ goto error;
+
+ err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+ root_al, &branch_from, true);
+ if (err)
+ goto error;
+
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
if (err)
- return (err < 0) ? err : 0;
+ goto error;
}
- return 1;
+
+ } else {
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+ if (err)
+ goto error;
+ }
+ err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+ root_al, &branch_from, false);
+ if (err)
+ goto error;
+
+ /* Add kernel ip */
+ err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+ parent, root_al, branch_from,
+ false, i);
+ if (err)
+ goto error;
}
+ return 1;
- return 0;
+error:
+ return (err < 0) ? err : 0;
}
static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
@@ -2311,9 +2661,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
if (chain)
chain_nr = chain->nr;
- if (perf_evsel__has_branch_callstack(evsel)) {
+ if (evsel__has_branch_callstack(evsel)) {
+ struct perf_env *env = evsel__env(evsel);
+
err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
- root_al, max_stack);
+ root_al, max_stack,
+ !env ? 0 : env->max_branches);
if (err)
return (err < 0) ? err : 0;
}
@@ -2747,3 +3100,15 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch
*addrp = map->unmap_ip(map, sym->start);
return sym->name;
}
+
+int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv)
+{
+ struct dso *pos;
+ int err = 0;
+
+ list_for_each_entry(pos, &machine->dsos.head, node) {
+ if (fn(pos, machine, priv))
+ err = -1;
+ }
+ return err;
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index fa1be9ea00fa..26368d3c1754 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -138,6 +138,9 @@ int machine__process_mmap2_event(struct machine *machine, union perf_event *even
int machine__process_ksymbol(struct machine *machine,
union perf_event *event,
struct perf_sample *sample);
+int machine__process_text_poke(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
int machine__process_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
@@ -247,6 +250,10 @@ void machines__destroy_kernel_maps(struct machines *machines);
size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
+typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv);
+
+int machine__for_each_dso(struct machine *machine, machine__dso_t fn,
+ void *priv);
int machine__for_each_thread(struct machine *machine,
int (*fn)(struct thread *thread, void *p),
void *priv);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 53d96611e6a6..f44ede437dc7 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -27,21 +27,6 @@
static void __maps__insert(struct maps *maps, struct map *map);
-static inline int is_anon_memory(const char *filename, u32 flags)
-{
- return flags & MAP_HUGETLB ||
- !strcmp(filename, "//anon") ||
- !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
- !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
-}
-
-static inline int is_no_dso_memory(const char *filename)
-{
- return !strncmp(filename, "[stack", 6) ||
- !strncmp(filename, "/SYSV",5) ||
- !strcmp(filename, "[heap]");
-}
-
static inline int is_android_lib(const char *filename)
{
return strstarts(filename, "/data/app-lib/") ||
@@ -158,7 +143,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
int anon, no_dso, vdso, android;
android = is_android_lib(filename);
- anon = is_anon_memory(filename, flags);
+ anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
vdso = is_vdso_map(filename);
no_dso = is_no_dso_memory(filename);
map->prot = prot;
@@ -267,6 +252,27 @@ bool __map__is_bpf_prog(const struct map *map)
return name && (strstr(name, "bpf_prog_") == name);
}
+bool __map__is_bpf_image(const struct map *map)
+{
+ const char *name;
+
+ if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE)
+ return true;
+
+ /*
+ * If PERF_RECORD_KSYMBOL is not included, the dso will not have
+ * type of DSO_BINARY_TYPE__BPF_IMAGE. In such cases, we can
+ * guess the type based on name.
+ */
+ name = map->dso->short_name;
+ return name && is_bpf_image(name);
+}
+
+bool __map__is_ool(const struct map *map)
+{
+ return map->dso && map->dso->binary_type == DSO_BINARY_TYPE__OOL;
+}
+
bool map__has_symbols(const struct map *map)
{
return dso__has_symbols(map->dso);
@@ -325,9 +331,7 @@ int map__load(struct map *map)
if (map->dso->has_build_id) {
char sbuild_id[SBUILD_ID_SIZE];
- build_id__sprintf(map->dso->build_id,
- sizeof(map->dso->build_id),
- sbuild_id);
+ build_id__sprintf(&map->dso->bid, sbuild_id);
pr_debug("%s with build id %s not found", name, sbuild_id);
} else
pr_debug("Failed to open %s", name);
@@ -481,7 +485,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip)
* kernel modules also have DSO_TYPE_USER in dso->kernel,
* but all kernel modules are ET_REL, so won't get here.
*/
- if (map->dso->kernel == DSO_TYPE_USER)
+ if (map->dso->kernel == DSO_SPACE__USER)
return rip + map->dso->text_offset;
return map->unmap_ip(map, rip) - map->reloc;
@@ -511,7 +515,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
* kernel modules also have DSO_TYPE_USER in dso->kernel,
* but all kernel modules are ET_REL, so won't get here.
*/
- if (map->dso->kernel == DSO_TYPE_USER)
+ if (map->dso->kernel == DSO_SPACE__USER)
return map->unmap_ip(map, ip - map->dso->text_offset);
return ip + map->reloc;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 067036e8970c..b1c0686db1b7 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -147,11 +147,14 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
bool __map__is_kernel(const struct map *map);
bool __map__is_extra_kernel_map(const struct map *map);
bool __map__is_bpf_prog(const struct map *map);
+bool __map__is_bpf_image(const struct map *map);
+bool __map__is_ool(const struct map *map);
static inline bool __map__is_kmodule(const struct map *map)
{
return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
- !__map__is_bpf_prog(map);
+ !__map__is_bpf_prog(map) && !__map__is_ool(map) &&
+ !__map__is_bpf_image(map);
}
bool map__has_symbols(const struct map *map);
@@ -163,4 +166,23 @@ static inline bool is_entry_trampoline(const char *name)
return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
}
+static inline bool is_bpf_image(const char *name)
+{
+ return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
+ strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
+}
+
+static inline int is_anon_memory(const char *filename)
+{
+ return !strcmp(filename, "//anon") ||
+ !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
+ !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
+}
+
+static inline int is_no_dso_memory(const char *filename)
+{
+ return !strncmp(filename, "[stack", 6) ||
+ !strncmp(filename, "/SYSV", 5) ||
+ !strcmp(filename, "[heap]");
+}
#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index aa29589f6904..ea0af0bc4314 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -103,6 +103,21 @@ int perf_mem_events__init(void)
return found ? 0 : -ENOENT;
}
+void perf_mem_events__list(void)
+{
+ int j;
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ struct perf_mem_event *e = &perf_mem_events[j];
+
+ fprintf(stderr, "%-13s%-*s%s\n",
+ e->tag,
+ verbose > 0 ? 25 : 0,
+ verbose > 0 ? perf_mem_events__name(j) : "",
+ e->supported ? ": available" : "");
+ }
+}
+
static const char * const tlb_access[] = {
"N/A",
"HIT",
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index f1389bdae7bf..904dad34f7f7 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -39,6 +39,8 @@ int perf_mem_events__init(void);
char *perf_mem_events__name(int i);
+void perf_mem_events__list(void);
+
struct mem_info;
int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c
index 797d86a1ab09..c84f5841c7ab 100644
--- a/tools/perf/util/mem2node.c
+++ b/tools/perf/util/mem2node.c
@@ -1,5 +1,6 @@
#include <errno.h>
#include <inttypes.h>
+#include <asm/bug.h>
#include <linux/bitmap.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
@@ -95,7 +96,7 @@ int mem2node__init(struct mem2node *map, struct perf_env *env)
/* Cut unused entries, due to merging. */
tmp_entries = realloc(entries, sizeof(*entries) * j);
- if (tmp_entries)
+ if (tmp_entries || WARN_ON_ONCE(j == 0))
entries = tmp_entries;
for (i = 0; i < j; i++) {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 926449a7cdbf..060454a17293 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -15,7 +15,6 @@
#include "rblist.h"
#include <string.h>
#include <errno.h>
-#include "pmu-events/pmu-events.h"
#include "strlist.h"
#include <assert.h>
#include <linux/ctype.h>
@@ -24,6 +23,8 @@
#include <subcmd/parse-options.h>
#include <api/fs/fs.h>
#include "util.h"
+#include <asm/bug.h>
+#include "cgroup.h"
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
@@ -76,84 +77,222 @@ static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
return &me->nd;
}
+static void metric_event_delete(struct rblist *rblist __maybe_unused,
+ struct rb_node *rb_node)
+{
+ struct metric_event *me = container_of(rb_node, struct metric_event, nd);
+ struct metric_expr *expr, *tmp;
+
+ list_for_each_entry_safe(expr, tmp, &me->head, nd) {
+ free(expr->metric_refs);
+ free(expr->metric_events);
+ free(expr);
+ }
+
+ free(me);
+}
+
static void metricgroup__rblist_init(struct rblist *metric_events)
{
rblist__init(metric_events);
metric_events->node_cmp = metric_event_cmp;
metric_events->node_new = metric_event_new;
+ metric_events->node_delete = metric_event_delete;
}
-struct egroup {
+void metricgroup__rblist_exit(struct rblist *metric_events)
+{
+ rblist__exit(metric_events);
+}
+
+/*
+ * A node in the list of referenced metrics. metric_expr
+ * is held as a convenience to avoid a search through the
+ * metric list.
+ */
+struct metric_ref_node {
+ const char *metric_name;
+ const char *metric_expr;
+ struct list_head list;
+};
+
+struct metric {
struct list_head nd;
- int idnum;
- const char **ids;
+ struct expr_parse_ctx pctx;
const char *metric_name;
const char *metric_expr;
const char *metric_unit;
+ struct list_head metric_refs;
+ int metric_refs_cnt;
+ int runtime;
+ bool has_constraint;
+};
+
+#define RECURSION_ID_MAX 1000
+
+struct expr_ids {
+ struct expr_id id[RECURSION_ID_MAX];
+ int cnt;
};
+static struct expr_id *expr_ids__alloc(struct expr_ids *ids)
+{
+ if (ids->cnt >= RECURSION_ID_MAX)
+ return NULL;
+ return &ids->id[ids->cnt++];
+}
+
+static void expr_ids__exit(struct expr_ids *ids)
+{
+ int i;
+
+ for (i = 0; i < ids->cnt; i++)
+ free(ids->id[i].id);
+}
+
+static bool contains_event(struct evsel **metric_events, int num_events,
+ const char *event_name)
+{
+ int i;
+
+ for (i = 0; i < num_events; i++) {
+ if (!strcmp(metric_events[i]->name, event_name))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Find a group of events in perf_evlist that correspond to those from a parsed
+ * metric expression. Note, as find_evsel_group is called in the same order as
+ * perf_evlist was constructed, metric_no_merge doesn't need to test for
+ * underfilling a group.
+ * @perf_evlist: a list of events something like: {metric1 leader, metric1
+ * sibling, metric1 sibling}:W,duration_time,{metric2 leader, metric2 sibling,
+ * metric2 sibling}:W,duration_time
+ * @pctx: the parse context for the metric expression.
+ * @metric_no_merge: don't attempt to share events for the metric with other
+ * metrics.
+ * @has_constraint: is there a contraint on the group of events? In which case
+ * the events won't be grouped.
+ * @metric_events: out argument, null terminated array of evsel's associated
+ * with the metric.
+ * @evlist_used: in/out argument, bitmap tracking which evlist events are used.
+ * @return the first metric event or NULL on failure.
+ */
static struct evsel *find_evsel_group(struct evlist *perf_evlist,
- const char **ids,
- int idnum,
+ struct expr_parse_ctx *pctx,
+ bool metric_no_merge,
+ bool has_constraint,
struct evsel **metric_events,
- bool *evlist_used)
+ unsigned long *evlist_used)
{
- struct evsel *ev;
- int i = 0, j = 0;
- bool leader_found;
+ struct evsel *ev, *current_leader = NULL;
+ struct expr_id_data *val_ptr;
+ int i = 0, matched_events = 0, events_to_match;
+ const int idnum = (int)hashmap__size(&pctx->ids);
+
+ /*
+ * duration_time is always grouped separately, when events are grouped
+ * (ie has_constraint is false) then ignore it in the matching loop and
+ * add it to metric_events at the end.
+ */
+ if (!has_constraint &&
+ hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr))
+ events_to_match = idnum - 1;
+ else
+ events_to_match = idnum;
evlist__for_each_entry (perf_evlist, ev) {
- if (evlist_used[j++])
+ /*
+ * Events with a constraint aren't grouped and match the first
+ * events available.
+ */
+ if (has_constraint && ev->weak_group)
continue;
- if (!strcmp(ev->name, ids[i])) {
- if (!metric_events[i])
- metric_events[i] = ev;
- i++;
- if (i == idnum)
- break;
- } else {
- /* Discard the whole match and start again */
- i = 0;
+ /* Ignore event if already used and merging is disabled. */
+ if (metric_no_merge && test_bit(ev->idx, evlist_used))
+ continue;
+ if (!has_constraint && ev->leader != current_leader) {
+ /*
+ * Start of a new group, discard the whole match and
+ * start again.
+ */
+ matched_events = 0;
memset(metric_events, 0,
sizeof(struct evsel *) * idnum);
+ current_leader = ev->leader;
+ }
+ /*
+ * Check for duplicate events with the same name. For example,
+ * uncore_imc/cas_count_read/ will turn into 6 events per socket
+ * on skylakex. Only the first such event is placed in
+ * metric_events. If events aren't grouped then this also
+ * ensures that the same event in different sibling groups
+ * aren't both added to metric_events.
+ */
+ if (contains_event(metric_events, matched_events, ev->name))
+ continue;
+ /* Does this event belong to the parse context? */
+ if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr))
+ metric_events[matched_events++] = ev;
- if (!strcmp(ev->name, ids[i])) {
- if (!metric_events[i])
- metric_events[i] = ev;
- i++;
- if (i == idnum)
- break;
+ if (matched_events == events_to_match)
+ break;
+ }
+
+ if (events_to_match != idnum) {
+ /* Add the first duration_time. */
+ evlist__for_each_entry(perf_evlist, ev) {
+ if (!strcmp(ev->name, "duration_time")) {
+ metric_events[matched_events++] = ev;
+ break;
}
}
}
- if (i != idnum) {
- /* Not whole match */
+ if (matched_events != idnum) {
+ /* Not a whole match */
return NULL;
}
metric_events[idnum] = NULL;
for (i = 0; i < idnum; i++) {
- leader_found = false;
- evlist__for_each_entry(perf_evlist, ev) {
- if (!leader_found && (ev == metric_events[i]))
- leader_found = true;
-
- if (leader_found &&
- !strcmp(ev->name, metric_events[i]->name)) {
+ ev = metric_events[i];
+ /* Don't free the used events. */
+ set_bit(ev->idx, evlist_used);
+ /*
+ * The metric leader points to the identically named event in
+ * metric_events.
+ */
+ ev->metric_leader = ev;
+ /*
+ * Mark two events with identical names in the same group (or
+ * globally) as being in use as uncore events may be duplicated
+ * for each pmu. Set the metric leader of such events to be the
+ * event that appears in metric_events.
+ */
+ evlist__for_each_entry_continue(perf_evlist, ev) {
+ /*
+ * If events are grouped then the search can terminate
+ * when then group is left.
+ */
+ if (!has_constraint &&
+ ev->leader != metric_events[i]->leader)
+ break;
+ if (!strcmp(metric_events[i]->name, ev->name)) {
+ set_bit(ev->idx, evlist_used);
ev->metric_leader = metric_events[i];
}
- j++;
}
- ev = metric_events[i];
- evlist_used[ev->idx] = true;
}
return metric_events[0];
}
static int metricgroup__setup_events(struct list_head *groups,
+ bool metric_no_merge,
struct evlist *perf_evlist,
struct rblist *metric_events_list)
{
@@ -161,51 +300,94 @@ static int metricgroup__setup_events(struct list_head *groups,
struct metric_expr *expr;
int i = 0;
int ret = 0;
- struct egroup *eg;
- struct evsel *evsel;
- bool *evlist_used;
+ struct metric *m;
+ struct evsel *evsel, *tmp;
+ unsigned long *evlist_used;
- evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool));
- if (!evlist_used) {
- ret = -ENOMEM;
- return ret;
- }
+ evlist_used = bitmap_alloc(perf_evlist->core.nr_entries);
+ if (!evlist_used)
+ return -ENOMEM;
- list_for_each_entry (eg, groups, nd) {
+ list_for_each_entry (m, groups, nd) {
struct evsel **metric_events;
+ struct metric_ref *metric_refs = NULL;
- metric_events = calloc(sizeof(void *), eg->idnum + 1);
+ metric_events = calloc(sizeof(void *),
+ hashmap__size(&m->pctx.ids) + 1);
if (!metric_events) {
ret = -ENOMEM;
break;
}
- evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum,
- metric_events, evlist_used);
+ evsel = find_evsel_group(perf_evlist, &m->pctx,
+ metric_no_merge,
+ m->has_constraint, metric_events,
+ evlist_used);
if (!evsel) {
pr_debug("Cannot resolve %s: %s\n",
- eg->metric_name, eg->metric_expr);
+ m->metric_name, m->metric_expr);
+ free(metric_events);
continue;
}
- for (i = 0; i < eg->idnum; i++)
+ for (i = 0; metric_events[i]; i++)
metric_events[i]->collect_stat = true;
me = metricgroup__lookup(metric_events_list, evsel, true);
if (!me) {
ret = -ENOMEM;
+ free(metric_events);
break;
}
expr = malloc(sizeof(struct metric_expr));
if (!expr) {
ret = -ENOMEM;
+ free(metric_events);
break;
}
- expr->metric_expr = eg->metric_expr;
- expr->metric_name = eg->metric_name;
- expr->metric_unit = eg->metric_unit;
+
+ /*
+ * Collect and store collected nested expressions
+ * for metric processing.
+ */
+ if (m->metric_refs_cnt) {
+ struct metric_ref_node *ref;
+
+ metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1));
+ if (!metric_refs) {
+ ret = -ENOMEM;
+ free(metric_events);
+ free(expr);
+ break;
+ }
+
+ i = 0;
+ list_for_each_entry(ref, &m->metric_refs, list) {
+ /*
+ * Intentionally passing just const char pointers,
+ * originally from 'struct pmu_event' object.
+ * We don't need to change them, so there's no
+ * need to create our own copy.
+ */
+ metric_refs[i].metric_name = ref->metric_name;
+ metric_refs[i].metric_expr = ref->metric_expr;
+ i++;
+ }
+ };
+
+ expr->metric_refs = metric_refs;
+ expr->metric_expr = m->metric_expr;
+ expr->metric_name = m->metric_name;
+ expr->metric_unit = m->metric_unit;
expr->metric_events = metric_events;
+ expr->runtime = m->runtime;
list_add(&expr->nd, &me->head);
}
- free(evlist_used);
+ evlist__for_each_entry_safe(perf_evlist, tmp, evsel) {
+ if (!test_bit(evsel->idx, evlist_used)) {
+ evlist__remove(perf_evlist, evsel);
+ evsel__delete(evsel);
+ }
+ }
+ bitmap_free(evlist_used);
return ret;
}
@@ -387,15 +569,20 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
continue;
strlist__add(me->metrics, s);
}
+
+ if (!raw)
+ free(s);
}
free(omg);
}
}
- if (metricgroups && !raw)
- printf("\nMetric Groups:\n\n");
- else if (metrics && !raw)
- printf("\nMetrics:\n\n");
+ if (!filter || !rblist__empty(&groups)) {
+ if (metricgroups && !raw)
+ printf("\nMetric Groups:\n\n");
+ else if (metrics && !raw)
+ printf("\nMetrics:\n\n");
+ }
for (node = rb_first_cached(&groups.entries); node; node = next) {
struct mep *me = container_of(node, struct mep, nd);
@@ -413,43 +600,49 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
}
static void metricgroup__add_metric_weak_group(struct strbuf *events,
- const char **ids,
- int idnum)
+ struct expr_parse_ctx *ctx)
{
- bool no_group = false;
- int i;
+ struct hashmap_entry *cur;
+ size_t bkt;
+ bool no_group = true, has_duration = false;
- for (i = 0; i < idnum; i++) {
- pr_debug("found event %s\n", ids[i]);
+ hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+ pr_debug("found event %s\n", (const char *)cur->key);
/*
* Duration time maps to a software event and can make
* groups not count. Always use it outside a
* group.
*/
- if (!strcmp(ids[i], "duration_time")) {
- if (i > 0)
- strbuf_addf(events, "}:W,");
- strbuf_addf(events, "duration_time");
- no_group = true;
+ if (!strcmp(cur->key, "duration_time")) {
+ has_duration = true;
continue;
}
strbuf_addf(events, "%s%s",
- i == 0 || no_group ? "{" : ",",
- ids[i]);
+ no_group ? "{" : ",",
+ (const char *)cur->key);
no_group = false;
}
- if (!no_group)
+ if (!no_group) {
strbuf_addf(events, "}:W");
+ if (has_duration)
+ strbuf_addf(events, ",duration_time");
+ } else if (has_duration)
+ strbuf_addf(events, "duration_time");
}
static void metricgroup__add_metric_non_group(struct strbuf *events,
- const char **ids,
- int idnum)
+ struct expr_parse_ctx *ctx)
{
- int i;
-
- for (i = 0; i < idnum; i++)
- strbuf_addf(events, ",%s", ids[i]);
+ struct hashmap_entry *cur;
+ size_t bkt;
+ bool first = true;
+
+ hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+ if (!first)
+ strbuf_addf(events, ",");
+ strbuf_addf(events, "%s", (const char *)cur->key);
+ first = false;
+ }
}
static void metricgroup___watchdog_constraint_hint(const char *name, bool foot)
@@ -485,61 +678,354 @@ static bool metricgroup__has_constraint(struct pmu_event *pe)
return false;
}
-static int metricgroup__add_metric(const char *metric, struct strbuf *events,
- struct list_head *group_list)
+int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused)
{
- struct pmu_events_map *map = perf_pmu__find_map(NULL);
- struct pmu_event *pe;
- int i, ret = -EINVAL;
+ return 1;
+}
- if (!map)
+static int __add_metric(struct list_head *metric_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ int runtime,
+ struct metric **mp,
+ struct expr_id *parent,
+ struct expr_ids *ids)
+{
+ struct metric_ref_node *ref;
+ struct metric *m;
+
+ if (*mp == NULL) {
+ /*
+ * We got in here for the parent group,
+ * allocate it and put it on the list.
+ */
+ m = zalloc(sizeof(*m));
+ if (!m)
+ return -ENOMEM;
+
+ expr__ctx_init(&m->pctx);
+ m->metric_name = pe->metric_name;
+ m->metric_expr = pe->metric_expr;
+ m->metric_unit = pe->unit;
+ m->runtime = runtime;
+ m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+ INIT_LIST_HEAD(&m->metric_refs);
+ m->metric_refs_cnt = 0;
+
+ parent = expr_ids__alloc(ids);
+ if (!parent) {
+ free(m);
+ return -EINVAL;
+ }
+
+ parent->id = strdup(pe->metric_name);
+ if (!parent->id) {
+ free(m);
+ return -ENOMEM;
+ }
+ *mp = m;
+ } else {
+ /*
+ * We got here for the referenced metric, via the
+ * recursive metricgroup__add_metric call, add
+ * it to the parent group.
+ */
+ m = *mp;
+
+ ref = malloc(sizeof(*ref));
+ if (!ref)
+ return -ENOMEM;
+
+ /*
+ * Intentionally passing just const char pointers,
+ * from 'pe' object, so they never go away. We don't
+ * need to change them, so there's no need to create
+ * our own copy.
+ */
+ ref->metric_name = pe->metric_name;
+ ref->metric_expr = pe->metric_expr;
+
+ list_add(&ref->list, &m->metric_refs);
+ m->metric_refs_cnt++;
+ }
+
+ /* Force all found IDs in metric to have us as parent ID. */
+ WARN_ON_ONCE(!parent);
+ m->pctx.parent = parent;
+
+ /*
+ * For both the parent and referenced metrics, we parse
+ * all the metric's IDs and add it to the parent context.
+ */
+ if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) {
+ if (m->metric_refs_cnt == 0) {
+ expr__ctx_clear(&m->pctx);
+ free(m);
+ *mp = NULL;
+ }
+ return -EINVAL;
+ }
+
+ /*
+ * We add new group only in the 'parent' call,
+ * so bail out for referenced metric case.
+ */
+ if (m->metric_refs_cnt)
return 0;
- for (i = 0; ; i++) {
- pe = &map->table[i];
+ if (list_empty(metric_list))
+ list_add(&m->nd, metric_list);
+ else {
+ struct list_head *pos;
- if (!pe->name && !pe->metric_group && !pe->metric_name)
- break;
- if (!pe->metric_expr)
- continue;
- if (match_metric(pe->metric_group, metric) ||
- match_metric(pe->metric_name, metric)) {
- const char **ids;
- int idnum;
- struct egroup *eg;
+ /* Place the largest groups at the front. */
+ list_for_each_prev(pos, metric_list) {
+ struct metric *old = list_entry(pos, struct metric, nd);
- pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+ if (hashmap__size(&m->pctx.ids) <=
+ hashmap__size(&old->pctx.ids))
+ break;
+ }
+ list_add(&m->nd, pos);
+ }
- if (expr__find_other(pe->metric_expr,
- NULL, &ids, &idnum) < 0)
+ return 0;
+}
+
+#define map_for_each_event(__pe, __idx, __map) \
+ for (__idx = 0, __pe = &__map->table[__idx]; \
+ __pe->name || __pe->metric_group || __pe->metric_name; \
+ __pe = &__map->table[++__idx])
+
+#define map_for_each_metric(__pe, __idx, __map, __metric) \
+ map_for_each_event(__pe, __idx, __map) \
+ if (__pe->metric_expr && \
+ (match_metric(__pe->metric_group, __metric) || \
+ match_metric(__pe->metric_name, __metric)))
+
+static struct pmu_event *find_metric(const char *metric, struct pmu_events_map *map)
+{
+ struct pmu_event *pe;
+ int i;
+
+ map_for_each_event(pe, i, map) {
+ if (match_metric(pe->metric_name, metric))
+ return pe;
+ }
+
+ return NULL;
+}
+
+static int recursion_check(struct metric *m, const char *id, struct expr_id **parent,
+ struct expr_ids *ids)
+{
+ struct expr_id_data *data;
+ struct expr_id *p;
+ int ret;
+
+ /*
+ * We get the parent referenced by 'id' argument and
+ * traverse through all the parent object IDs to check
+ * if we already processed 'id', if we did, it's recursion
+ * and we fail.
+ */
+ ret = expr__get_id(&m->pctx, id, &data);
+ if (ret)
+ return ret;
+
+ p = data->parent;
+
+ while (p->parent) {
+ if (!strcmp(p->id, id)) {
+ pr_err("failed: recursion detected for %s\n", id);
+ return -1;
+ }
+ p = p->parent;
+ }
+
+ /*
+ * If we are over the limit of static entris, the metric
+ * is too difficult/nested to process, fail as well.
+ */
+ p = expr_ids__alloc(ids);
+ if (!p) {
+ pr_err("failed: too many nested metrics\n");
+ return -EINVAL;
+ }
+
+ p->id = strdup(id);
+ p->parent = data->parent;
+ *parent = p;
+
+ return p->id ? 0 : -ENOMEM;
+}
+
+static int add_metric(struct list_head *metric_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ struct metric **mp,
+ struct expr_id *parent,
+ struct expr_ids *ids);
+
+static int __resolve_metric(struct metric *m,
+ bool metric_no_group,
+ struct list_head *metric_list,
+ struct pmu_events_map *map,
+ struct expr_ids *ids)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ bool all;
+ int ret;
+
+ /*
+ * Iterate all the parsed IDs and if there's metric,
+ * add it to the context.
+ */
+ do {
+ all = true;
+ hashmap__for_each_entry((&m->pctx.ids), cur, bkt) {
+ struct expr_id *parent;
+ struct pmu_event *pe;
+
+ pe = find_metric(cur->key, map);
+ if (!pe)
continue;
- if (events->len > 0)
- strbuf_addf(events, ",");
- if (metricgroup__has_constraint(pe))
- metricgroup__add_metric_non_group(events, ids, idnum);
- else
- metricgroup__add_metric_weak_group(events, ids, idnum);
+ ret = recursion_check(m, cur->key, &parent, ids);
+ if (ret)
+ return ret;
- eg = malloc(sizeof(struct egroup));
- if (!eg) {
- ret = -ENOMEM;
- break;
- }
- eg->ids = ids;
- eg->idnum = idnum;
- eg->metric_name = pe->metric_name;
- eg->metric_expr = pe->metric_expr;
- eg->metric_unit = pe->unit;
- list_add_tail(&eg->nd, group_list);
- ret = 0;
+ all = false;
+ /* The metric key itself needs to go out.. */
+ expr__del_id(&m->pctx, cur->key);
+
+ /* ... and it gets resolved to the parent context. */
+ ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids);
+ if (ret)
+ return ret;
+
+ /*
+ * We added new metric to hashmap, so we need
+ * to break the iteration and start over.
+ */
+ break;
}
+ } while (!all);
+
+ return 0;
+}
+
+static int resolve_metric(bool metric_no_group,
+ struct list_head *metric_list,
+ struct pmu_events_map *map,
+ struct expr_ids *ids)
+{
+ struct metric *m;
+ int err;
+
+ list_for_each_entry(m, metric_list, nd) {
+ err = __resolve_metric(m, metric_no_group, metric_list, map, ids);
+ if (err)
+ return err;
}
+ return 0;
+}
+
+static int add_metric(struct list_head *metric_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ struct metric **m,
+ struct expr_id *parent,
+ struct expr_ids *ids)
+{
+ struct metric *orig = *m;
+ int ret = 0;
+
+ pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+
+ if (!strstr(pe->metric_expr, "?")) {
+ ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids);
+ } else {
+ int j, count;
+
+ count = arch_get_runtimeparam(pe);
+
+ /* This loop is added to create multiple
+ * events depend on count value and add
+ * those events to metric_list.
+ */
+
+ for (j = 0; j < count && !ret; j++, *m = orig)
+ ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids);
+ }
+
return ret;
}
-static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
- struct list_head *group_list)
+static int metricgroup__add_metric(const char *metric, bool metric_no_group,
+ struct strbuf *events,
+ struct list_head *metric_list,
+ struct pmu_events_map *map)
+{
+ struct expr_ids ids = { .cnt = 0, };
+ struct pmu_event *pe;
+ struct metric *m;
+ LIST_HEAD(list);
+ int i, ret;
+ bool has_match = false;
+
+ map_for_each_metric(pe, i, map, metric) {
+ has_match = true;
+ m = NULL;
+
+ ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids);
+ if (ret)
+ goto out;
+
+ /*
+ * Process any possible referenced metrics
+ * included in the expression.
+ */
+ ret = resolve_metric(metric_no_group,
+ &list, map, &ids);
+ if (ret)
+ goto out;
+ }
+
+ /* End of pmu events. */
+ if (!has_match) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ list_for_each_entry(m, &list, nd) {
+ if (events->len > 0)
+ strbuf_addf(events, ",");
+
+ if (m->has_constraint) {
+ metricgroup__add_metric_non_group(events,
+ &m->pctx);
+ } else {
+ metricgroup__add_metric_weak_group(events,
+ &m->pctx);
+ }
+ }
+
+out:
+ /*
+ * add to metric_list so that they can be released
+ * even if it's failed
+ */
+ list_splice(&list, metric_list);
+ expr_ids__exit(&ids);
+ return ret;
+}
+
+static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
+ struct strbuf *events,
+ struct list_head *metric_list,
+ struct pmu_events_map *map)
{
char *llist, *nlist, *p;
int ret = -EINVAL;
@@ -553,7 +1039,8 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
strbuf_addf(events, "%s", "");
while ((p = strsep(&llist, ",")) != NULL) {
- ret = metricgroup__add_metric(p, events, group_list);
+ ret = metricgroup__add_metric(p, metric_no_group, events,
+ metric_list, map);
if (ret == -EINVAL) {
fprintf(stderr, "Cannot find metric or group `%s'\n",
p);
@@ -568,50 +1055,88 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
return ret;
}
-static void metricgroup__free_egroups(struct list_head *group_list)
+static void metric__free_refs(struct metric *metric)
{
- struct egroup *eg, *egtmp;
- int i;
+ struct metric_ref_node *ref, *tmp;
- list_for_each_entry_safe (eg, egtmp, group_list, nd) {
- for (i = 0; i < eg->idnum; i++)
- zfree(&eg->ids[i]);
- zfree(&eg->ids);
- list_del_init(&eg->nd);
- free(eg);
+ list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) {
+ list_del(&ref->list);
+ free(ref);
}
}
-int metricgroup__parse_groups(const struct option *opt,
- const char *str,
- struct rblist *metric_events)
+static void metricgroup__free_metrics(struct list_head *metric_list)
+{
+ struct metric *m, *tmp;
+
+ list_for_each_entry_safe (m, tmp, metric_list, nd) {
+ metric__free_refs(m);
+ expr__ctx_clear(&m->pctx);
+ list_del_init(&m->nd);
+ free(m);
+ }
+}
+
+static int parse_groups(struct evlist *perf_evlist, const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct perf_pmu *fake_pmu,
+ struct rblist *metric_events,
+ struct pmu_events_map *map)
{
struct parse_events_error parse_error;
- struct evlist *perf_evlist = *(struct evlist **)opt->value;
struct strbuf extra_events;
- LIST_HEAD(group_list);
+ LIST_HEAD(metric_list);
int ret;
if (metric_events->nr_entries == 0)
metricgroup__rblist_init(metric_events);
- ret = metricgroup__add_metric_list(str, &extra_events, &group_list);
+ ret = metricgroup__add_metric_list(str, metric_no_group,
+ &extra_events, &metric_list, map);
if (ret)
- return ret;
+ goto out;
pr_debug("adding %s\n", extra_events.buf);
bzero(&parse_error, sizeof(parse_error));
- ret = parse_events(perf_evlist, extra_events.buf, &parse_error);
+ ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu);
if (ret) {
parse_events_print_error(&parse_error, extra_events.buf);
goto out;
}
- strbuf_release(&extra_events);
- ret = metricgroup__setup_events(&group_list, perf_evlist,
- metric_events);
+ ret = metricgroup__setup_events(&metric_list, metric_no_merge,
+ perf_evlist, metric_events);
out:
- metricgroup__free_egroups(&group_list);
+ metricgroup__free_metrics(&metric_list);
+ strbuf_release(&extra_events);
return ret;
}
+int metricgroup__parse_groups(const struct option *opt,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events)
+{
+ struct evlist *perf_evlist = *(struct evlist **)opt->value;
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
+
+ if (!map)
+ return 0;
+
+ return parse_groups(perf_evlist, str, metric_no_group,
+ metric_no_merge, NULL, metric_events, map);
+}
+
+int metricgroup__parse_groups_test(struct evlist *evlist,
+ struct pmu_events_map *map,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events)
+{
+ return parse_groups(evlist, str, metric_no_group,
+ metric_no_merge, &perf_pmu__fake, metric_events, map);
+}
+
bool metricgroup__has_metric(const char *metric)
{
struct pmu_events_map *map = perf_pmu__find_map(NULL);
@@ -633,3 +1158,87 @@ bool metricgroup__has_metric(const char *metric)
}
return false;
}
+
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+ struct rblist *new_metric_events,
+ struct rblist *old_metric_events)
+{
+ unsigned i;
+
+ for (i = 0; i < rblist__nr_entries(old_metric_events); i++) {
+ struct rb_node *nd;
+ struct metric_event *old_me, *new_me;
+ struct metric_expr *old_expr, *new_expr;
+ struct evsel *evsel;
+ size_t alloc_size;
+ int idx, nr;
+
+ nd = rblist__entry(old_metric_events, i);
+ old_me = container_of(nd, struct metric_event, nd);
+
+ evsel = evlist__find_evsel(evlist, old_me->evsel->idx);
+ if (!evsel)
+ return -EINVAL;
+ new_me = metricgroup__lookup(new_metric_events, evsel, true);
+ if (!new_me)
+ return -ENOMEM;
+
+ pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n",
+ cgrp ? cgrp->name : "root", evsel->name, evsel->idx);
+
+ list_for_each_entry(old_expr, &old_me->head, nd) {
+ new_expr = malloc(sizeof(*new_expr));
+ if (!new_expr)
+ return -ENOMEM;
+
+ new_expr->metric_expr = old_expr->metric_expr;
+ new_expr->metric_name = old_expr->metric_name;
+ new_expr->metric_unit = old_expr->metric_unit;
+ new_expr->runtime = old_expr->runtime;
+
+ if (old_expr->metric_refs) {
+ /* calculate number of metric_events */
+ for (nr = 0; old_expr->metric_refs[nr].metric_name; nr++)
+ continue;
+ alloc_size = sizeof(*new_expr->metric_refs);
+ new_expr->metric_refs = calloc(nr + 1, alloc_size);
+ if (!new_expr->metric_refs) {
+ free(new_expr);
+ return -ENOMEM;
+ }
+
+ memcpy(new_expr->metric_refs, old_expr->metric_refs,
+ nr * alloc_size);
+ } else {
+ new_expr->metric_refs = NULL;
+ }
+
+ /* calculate number of metric_events */
+ for (nr = 0; old_expr->metric_events[nr]; nr++)
+ continue;
+ alloc_size = sizeof(*new_expr->metric_events);
+ new_expr->metric_events = calloc(nr + 1, alloc_size);
+ if (!new_expr->metric_events) {
+ free(new_expr->metric_refs);
+ free(new_expr);
+ return -ENOMEM;
+ }
+
+ /* copy evsel in the same position */
+ for (idx = 0; idx < nr; idx++) {
+ evsel = old_expr->metric_events[idx];
+ evsel = evlist__find_evsel(evlist, evsel->idx);
+ if (evsel == NULL) {
+ free(new_expr->metric_events);
+ free(new_expr->metric_refs);
+ free(new_expr);
+ return -EINVAL;
+ }
+ new_expr->metric_events[idx] = evsel;
+ }
+
+ list_add(&new_expr->nd, &new_me->head);
+ }
+ }
+ return 0;
+}
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 475c7f912864..ed1b9392e624 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -5,10 +5,15 @@
#include <linux/list.h>
#include <linux/rbtree.h>
#include <stdbool.h>
+#include "pmu-events/pmu-events.h"
+struct evlist;
struct evsel;
+struct evlist;
struct option;
struct rblist;
+struct pmu_events_map;
+struct cgroup;
struct metric_event {
struct rb_node nd;
@@ -16,22 +21,44 @@ struct metric_event {
struct list_head head; /* list of metric_expr */
};
+struct metric_ref {
+ const char *metric_name;
+ const char *metric_expr;
+};
+
struct metric_expr {
struct list_head nd;
const char *metric_expr;
const char *metric_name;
const char *metric_unit;
struct evsel **metric_events;
+ struct metric_ref *metric_refs;
+ int runtime;
};
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
bool create);
int metricgroup__parse_groups(const struct option *opt,
- const char *str,
- struct rblist *metric_events);
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events);
+
+int metricgroup__parse_groups_test(struct evlist *evlist,
+ struct pmu_events_map *map,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events);
void metricgroup__print(bool metrics, bool groups, char *filter,
bool raw, bool details);
bool metricgroup__has_metric(const char *metric);
+int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused);
+void metricgroup__rblist_exit(struct rblist *metric_events);
+
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+ struct rblist *new_metric_events,
+ struct rblist *old_metric_events);
#endif
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 359db2b1fcef..48c8f609441b 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -314,7 +314,7 @@ static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how,
case OE_FLUSH__NONE:
default:
break;
- };
+ }
pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE %s, nr_events %u\n",
str[how], oe->nr_events);
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 0920fb0ec6cc..75345946c4b9 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -29,7 +29,7 @@ typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
struct ordered_events_buffer {
struct list_head list;
- struct ordered_event event[0];
+ struct ordered_event event[];
};
struct ordered_events {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 10107747b361..3b273580fb84 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -26,7 +26,7 @@
#include <api/fs/tracing_path.h>
#include <perf/cpumap.h>
#include "parse-events-bison.h"
-#define YY_EXTRA_TYPE int
+#define YY_EXTRA_TYPE void*
#include "parse-events-flex.h"
#include "pmu.h"
#include "thread_map.h"
@@ -36,6 +36,8 @@
#include "metricgroup.h"
#include "util/evsel_config.h"
#include "util/event.h"
+#include "util/pfm.h"
+#include "perf.h"
#define MAX_NAME_LEN 100
@@ -204,7 +206,8 @@ void parse_events__handle_error(struct parse_events_error *err, int idx,
err->help = help;
break;
default:
- WARN_ONCE(1, "WARNING: multiple event parsing errors\n");
+ pr_debug("Multiple errors dropping message: %s (%s)\n",
+ err->str, err->help);
free(err->str);
err->str = str;
free(err->help);
@@ -344,22 +347,26 @@ static char *get_config_name(struct list_head *head_terms)
static struct evsel *
__add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr,
+ bool init_attr,
char *name, struct perf_pmu *pmu,
struct list_head *config_terms, bool auto_merge_stats,
const char *cpu_list)
{
struct evsel *evsel;
- struct perf_cpu_map *cpus = pmu ? pmu->cpus :
+ struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
- event_attr_init(attr);
+ if (init_attr)
+ event_attr_init(attr);
- evsel = perf_evsel__new_idx(attr, *idx);
- if (!evsel)
+ evsel = evsel__new_idx(attr, *idx);
+ if (!evsel) {
+ perf_cpu_map__put(cpus);
return NULL;
+ }
(*idx)++;
- evsel->core.cpus = perf_cpu_map__get(cpus);
+ evsel->core.cpus = cpus;
evsel->core.own_cpus = perf_cpu_map__get(cpus);
evsel->core.system_wide = pmu ? pmu->is_uncore : false;
evsel->auto_merge_stats = auto_merge_stats;
@@ -370,15 +377,25 @@ __add_event(struct list_head *list, int *idx,
if (config_terms)
list_splice(config_terms, &evsel->config_terms);
- list_add_tail(&evsel->core.node, list);
+ if (list)
+ list_add_tail(&evsel->core.node, list);
+
return evsel;
}
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+ char *name, struct perf_pmu *pmu)
+{
+ return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false,
+ NULL);
+}
+
static int add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr, char *name,
struct list_head *config_terms)
{
- return __add_event(list, idx, attr, name, NULL, config_terms, false, NULL) ? 0 : -ENOMEM;
+ return __add_event(list, idx, attr, true, name, NULL, config_terms,
+ false, NULL) ? 0 : -ENOMEM;
}
static int add_event_tool(struct list_head *list, int *idx,
@@ -390,22 +407,23 @@ static int add_event_tool(struct list_head *list, int *idx,
.config = PERF_COUNT_SW_DUMMY,
};
- evsel = __add_event(list, idx, &attr, NULL, NULL, NULL, false, "0");
+ evsel = __add_event(list, idx, &attr, true, NULL, NULL, NULL, false,
+ "0");
if (!evsel)
return -ENOMEM;
evsel->tool_event = tool_event;
if (tool_event == PERF_TOOL_DURATION_TIME)
- evsel->unit = strdup("ns");
+ evsel->unit = "ns";
return 0;
}
-static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
+static int parse_aliases(char *str, const char *names[][EVSEL__MAX_ALIASES], int size)
{
int i, j;
int n, longest = -1;
for (i = 0; i < size; i++) {
- for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) {
+ for (j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) {
n = strlen(names[i][j]);
if (n > longest && !strncasecmp(str, names[i][j], n))
longest = n;
@@ -444,8 +462,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
* No fallback - if we cannot get a clear cache type
* then bail out:
*/
- cache_type = parse_aliases(type, perf_evsel__hw_cache,
- PERF_COUNT_HW_CACHE_MAX);
+ cache_type = parse_aliases(type, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX);
if (cache_type == -1)
return -EINVAL;
@@ -458,17 +475,17 @@ int parse_events_add_cache(struct list_head *list, int *idx,
n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str);
if (cache_op == -1) {
- cache_op = parse_aliases(str, perf_evsel__hw_cache_op,
+ cache_op = parse_aliases(str, evsel__hw_cache_op,
PERF_COUNT_HW_CACHE_OP_MAX);
if (cache_op >= 0) {
- if (!perf_evsel__is_cache_op_valid(cache_type, cache_op))
+ if (!evsel__is_cache_op_valid(cache_type, cache_op))
return -EINVAL;
continue;
}
}
if (cache_result == -1) {
- cache_result = parse_aliases(str, perf_evsel__hw_cache_result,
+ cache_result = parse_aliases(str, evsel__hw_cache_result,
PERF_COUNT_HW_CACHE_RESULT_MAX);
if (cache_result >= 0)
continue;
@@ -538,9 +555,8 @@ static int add_tracepoint(struct list_head *list, int *idx,
struct parse_events_error *err,
struct list_head *head_config)
{
- struct evsel *evsel;
+ struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++);
- evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++);
if (IS_ERR(evsel)) {
tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name);
return PTR_ERR(evsel);
@@ -754,8 +770,8 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
return 0;
errout:
- parse_state->error->help = strdup("(add -v to see detail)");
- parse_state->error->str = strdup(errbuf);
+ parse_events__handle_error(parse_state->error, 0,
+ strdup(errbuf), strdup("(add -v to see detail)"));
return err;
}
@@ -771,36 +787,38 @@ parse_events_config_bpf(struct parse_events_state *parse_state,
return 0;
list_for_each_entry(term, head_config, list) {
- char errbuf[BUFSIZ];
int err;
if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
- snprintf(errbuf, sizeof(errbuf),
- "Invalid config term for BPF object");
- errbuf[BUFSIZ - 1] = '\0';
-
- parse_state->error->idx = term->err_term;
- parse_state->error->str = strdup(errbuf);
+ parse_events__handle_error(parse_state->error, term->err_term,
+ strdup("Invalid config term for BPF object"),
+ NULL);
return -EINVAL;
}
err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos);
if (err) {
+ char errbuf[BUFSIZ];
+ int idx;
+
bpf__strerror_config_obj(obj, term, parse_state->evlist,
&error_pos, err, errbuf,
sizeof(errbuf));
- parse_state->error->help = strdup(
+
+ if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+ idx = term->err_val;
+ else
+ idx = term->err_term + error_pos;
+
+ parse_events__handle_error(parse_state->error, idx,
+ strdup(errbuf),
+ strdup(
"Hint:\tValid config terms:\n"
" \tmap:[<arraymap>].value<indices>=[value]\n"
" \tmap:[<eventmap>].event<indices>=[event]\n"
"\n"
" \twhere <indices> is something like [0,3...5] or [all]\n"
-" \t(add -v to see detail)");
- parse_state->error->str = strdup(errbuf);
- if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
- parse_state->error->idx = term->err_val;
- else
- parse_state->error->idx = term->err_term + error_pos;
+" \t(add -v to see detail)"));
return err;
}
}
@@ -864,8 +882,8 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
-err, errbuf,
sizeof(errbuf));
- parse_state->error->help = strdup("(add -v to see detail)");
- parse_state->error->str = strdup(errbuf);
+ parse_events__handle_error(parse_state->error, 0,
+ strdup(errbuf), strdup("(add -v to see detail)"));
return err;
}
@@ -924,12 +942,12 @@ do { \
}
int parse_events_add_breakpoint(struct list_head *list, int *idx,
- void *ptr, char *type, u64 len)
+ u64 addr, char *type, u64 len)
{
struct perf_event_attr attr;
memset(&attr, 0, sizeof(attr));
- attr.bp_addr = (unsigned long) ptr;
+ attr.bp_addr = addr;
if (parse_breakpoint_type(type, &attr))
return -EINVAL;
@@ -1214,14 +1232,14 @@ static int get_config_terms(struct list_head *head_config,
struct list_head *head_terms __maybe_unused)
{
#define ADD_CONFIG_TERM(__type, __weak) \
- struct perf_evsel_config_term *__t; \
+ struct evsel_config_term *__t; \
\
__t = zalloc(sizeof(*__t)); \
if (!__t) \
return -ENOMEM; \
\
INIT_LIST_HEAD(&__t->list); \
- __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
+ __t->type = EVSEL__CONFIG_TERM_ ## __type; \
__t->weak = __weak; \
list_add_tail(&__t->list, head_terms)
@@ -1312,7 +1330,7 @@ do { \
}
/*
- * Add PERF_EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for
+ * Add EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for
* each bit of attr->config that the user has changed.
*/
static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
@@ -1400,10 +1418,10 @@ int parse_events_add_tool(struct parse_events_state *parse_state,
static bool config_term_percore(struct list_head *config_terms)
{
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
list_for_each_entry(term, config_terms, list) {
- if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE)
+ if (term->type == EVSEL__CONFIG_TERM_PERCORE)
return term->val.percore;
}
@@ -1424,7 +1442,20 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
bool use_uncore_alias;
LIST_HEAD(config_terms);
- pmu = perf_pmu__find(name);
+ if (verbose > 1) {
+ fprintf(stderr, "Attempting to add event pmu '%s' with '",
+ name);
+ if (head_config) {
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_config, list) {
+ fprintf(stderr, "%s,", term->config);
+ }
+ }
+ fprintf(stderr, "' that may result in non-fatal errors\n");
+ }
+
+ pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
if (!pmu) {
char *err_str;
@@ -1446,8 +1477,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (!head_config) {
attr.type = pmu->type;
- evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL,
- auto_merge_stats, NULL);
+ evsel = __add_event(list, &parse_state->idx, &attr, true, NULL,
+ pmu, NULL, auto_merge_stats, NULL);
if (evsel) {
evsel->pmu_name = name ? strdup(name) : NULL;
evsel->use_uncore_alias = use_uncore_alias;
@@ -1457,9 +1488,22 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
}
}
- if (perf_pmu__check_alias(pmu, head_config, &info))
+ if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info))
return -EINVAL;
+ if (verbose > 1) {
+ fprintf(stderr, "After aliases, add event pmu '%s' with '",
+ name);
+ if (head_config) {
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_config, list) {
+ fprintf(stderr, "%s,", term->config);
+ }
+ }
+ fprintf(stderr, "' that may result in non-fatal errors\n");
+ }
+
/*
* Configure hardcoded terms first, no need to check
* return value when called with fail == 0 ;)
@@ -1477,32 +1521,38 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms))
return -ENOMEM;
- if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
- struct perf_evsel_config_term *pos, *tmp;
+ if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
+ struct evsel_config_term *pos, *tmp;
list_for_each_entry_safe(pos, tmp, &config_terms, list) {
list_del_init(&pos->list);
+ if (pos->free_str)
+ zfree(&pos->val.str);
free(pos);
}
return -EINVAL;
}
- evsel = __add_event(list, &parse_state->idx, &attr,
+ evsel = __add_event(list, &parse_state->idx, &attr, true,
get_config_name(head_config), pmu,
&config_terms, auto_merge_stats, NULL);
- if (evsel) {
- evsel->unit = info.unit;
- evsel->scale = info.scale;
- evsel->per_pkg = info.per_pkg;
- evsel->snapshot = info.snapshot;
- evsel->metric_expr = info.metric_expr;
- evsel->metric_name = info.metric_name;
- evsel->pmu_name = name ? strdup(name) : NULL;
- evsel->use_uncore_alias = use_uncore_alias;
- evsel->percore = config_term_percore(&evsel->config_terms);
- }
+ if (!evsel)
+ return -ENOMEM;
+
+ evsel->pmu_name = name ? strdup(name) : NULL;
+ evsel->use_uncore_alias = use_uncore_alias;
+ evsel->percore = config_term_percore(&evsel->config_terms);
+
+ if (parse_state->fake_pmu)
+ return 0;
- return evsel ? 0 : -ENOMEM;
+ evsel->unit = info.unit;
+ evsel->scale = info.scale;
+ evsel->per_pkg = info.per_pkg;
+ evsel->snapshot = info.snapshot;
+ evsel->metric_expr = info.metric_expr;
+ evsel->metric_name = info.metric_name;
+ return 0;
}
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
@@ -1629,12 +1679,11 @@ parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
* event. That can be used to distinguish the leader from
* other members, even they have the same event name.
*/
- if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) {
+ if ((leader != evsel) &&
+ !strcmp(leader->pmu_name, evsel->pmu_name)) {
is_leader = false;
continue;
}
- /* The name is always alias name */
- WARN_ON(strcmp(leader->name, evsel->name));
/* Store the leader event for each PMU */
leaders[nr_pmu++] = (uintptr_t) evsel;
@@ -1726,6 +1775,7 @@ struct event_modifier {
int sample_read;
int pinned;
int weak;
+ int exclusive;
};
static int get_event_modifier(struct event_modifier *mod, char *str,
@@ -1741,6 +1791,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
int precise_max = 0;
int sample_read = 0;
int pinned = evsel ? evsel->core.attr.pinned : 0;
+ int exclusive = evsel ? evsel->core.attr.exclusive : 0;
int exclude = eu | ek | eh;
int exclude_GH = evsel ? evsel->exclude_GH : 0;
@@ -1752,6 +1803,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
if (*str == 'u') {
if (!exclude)
exclude = eu = ek = eh = 1;
+ if (!exclude_GH && !perf_guest)
+ eG = 1;
eu = 0;
} else if (*str == 'k') {
if (!exclude)
@@ -1782,6 +1835,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
sample_read = 1;
} else if (*str == 'D') {
pinned = 1;
+ } else if (*str == 'e') {
+ exclusive = 1;
} else if (*str == 'W') {
weak = 1;
} else
@@ -1815,6 +1870,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
mod->sample_read = sample_read;
mod->pinned = pinned;
mod->weak = weak;
+ mod->exclusive = exclusive;
return 0;
}
@@ -1828,7 +1884,7 @@ static int check_modifier(char *str)
char *p = str;
/* The sizeof includes 0 byte as well. */
- if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1))
+ if (strlen(str) > (sizeof("ukhGHpppPSDIWe") - 1))
return -1;
while (*p) {
@@ -1870,8 +1926,10 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
evsel->precise_max = mod.precise_max;
evsel->weak_group = mod.weak;
- if (perf_evsel__is_group_leader(evsel))
+ if (evsel__is_group_leader(evsel)) {
evsel->core.attr.pinned = mod.pinned;
+ evsel->core.attr.exclusive = mod.exclusive;
+ }
}
return 0;
@@ -1977,6 +2035,32 @@ err:
perf_pmu__parse_cleanup();
}
+/*
+ * This function injects special term in
+ * perf_pmu_events_list so the test code
+ * can check on this functionality.
+ */
+int perf_pmu__test_parse_init(void)
+{
+ struct perf_pmu_event_symbol *list;
+
+ list = malloc(sizeof(*list) * 1);
+ if (!list)
+ return -ENOMEM;
+
+ list->type = PMU_EVENT_SYMBOL;
+ list->symbol = strdup("read");
+
+ if (!list->symbol) {
+ free(list);
+ return -ENOMEM;
+ }
+
+ perf_pmu_events_list = list;
+ perf_pmu_events_list_num = 1;
+ return 0;
+}
+
enum perf_pmu_event_symbol_type
perf_pmu__parse_check(const char *name)
{
@@ -2001,13 +2085,14 @@ perf_pmu__parse_check(const char *name)
return r ? r->type : PMU_EVENT_SYMBOL_ERR;
}
-static int parse_events__scanner(const char *str, void *parse_state, int start_token)
+static int parse_events__scanner(const char *str,
+ struct parse_events_state *parse_state)
{
YY_BUFFER_STATE buffer;
void *scanner;
int ret;
- ret = parse_events_lex_init_extra(start_token, &scanner);
+ ret = parse_events_lex_init_extra(parse_state, &scanner);
if (ret)
return ret;
@@ -2015,6 +2100,7 @@ static int parse_events__scanner(const char *str, void *parse_state, int start_t
#ifdef PARSER_DEBUG
parse_events_debug = 1;
+ parse_events_set_debug(1, scanner);
#endif
ret = parse_events_parse(parse_state, scanner);
@@ -2030,11 +2116,14 @@ static int parse_events__scanner(const char *str, void *parse_state, int start_t
int parse_events_terms(struct list_head *terms, const char *str)
{
struct parse_events_state parse_state = {
- .terms = NULL,
+ .terms = NULL,
+ .stoken = PE_START_TERMS,
};
int ret;
- ret = parse_events__scanner(str, &parse_state, PE_START_TERMS);
+ ret = parse_events__scanner(str, &parse_state);
+ perf_pmu__parse_cleanup();
+
if (!ret) {
list_splice(parse_state.terms, terms);
zfree(&parse_state.terms);
@@ -2045,18 +2134,20 @@ int parse_events_terms(struct list_head *terms, const char *str)
return ret;
}
-int parse_events(struct evlist *evlist, const char *str,
- struct parse_events_error *err)
+int __parse_events(struct evlist *evlist, const char *str,
+ struct parse_events_error *err, struct perf_pmu *fake_pmu)
{
struct parse_events_state parse_state = {
- .list = LIST_HEAD_INIT(parse_state.list),
- .idx = evlist->core.nr_entries,
- .error = err,
- .evlist = evlist,
+ .list = LIST_HEAD_INIT(parse_state.list),
+ .idx = evlist->core.nr_entries,
+ .error = err,
+ .evlist = evlist,
+ .stoken = PE_START_EVENTS,
+ .fake_pmu = fake_pmu,
};
int ret;
- ret = parse_events__scanner(str, &parse_state, PE_START_EVENTS);
+ ret = parse_events__scanner(str, &parse_state);
perf_pmu__parse_cleanup();
if (!ret && list_empty(&parse_state.list)) {
@@ -2190,6 +2281,29 @@ int parse_events_option(const struct option *opt, const char *str,
return ret;
}
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset)
+{
+ struct evlist **evlistp = opt->value;
+ int ret;
+
+ if (*evlistp == NULL) {
+ *evlistp = evlist__new();
+
+ if (*evlistp == NULL) {
+ fprintf(stderr, "Not enough memory to create evlist\n");
+ return -1;
+ }
+ }
+
+ ret = parse_events_option(opt, str, unset);
+ if (ret) {
+ evlist__delete(*evlistp);
+ *evlistp = NULL;
+ }
+
+ return ret;
+}
+
static int
foreach_evsel_in_last_glob(struct evlist *evlist,
int (*func)(struct evsel *evsel,
@@ -2237,7 +2351,7 @@ static int set_filter(struct evsel *evsel, const void *arg)
}
if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
- if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+ if (evsel__append_tp_filter(evsel, str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
@@ -2262,7 +2376,7 @@ static int set_filter(struct evsel *evsel, const void *arg)
return -1;
}
- if (perf_evsel__append_addr_filter(evsel, str) < 0) {
+ if (evsel__append_addr_filter(evsel, str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
@@ -2293,7 +2407,7 @@ static int add_exclude_perf_filter(struct evsel *evsel,
snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
- if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) {
+ if (evsel__append_tp_filter(evsel, new_filter) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
@@ -2603,12 +2717,11 @@ restart:
for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
/* skip invalid cache type */
- if (!perf_evsel__is_cache_op_valid(type, op))
+ if (!evsel__is_cache_op_valid(type, op))
continue;
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- __perf_evsel__hw_cache_type_op_res_name(type, op, i,
- name, sizeof(name));
+ __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
if (event_glob != NULL && !strglobmatch(name, event_glob))
continue;
@@ -2794,6 +2907,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
print_sdt_events(NULL, NULL, name_only);
metricgroup__print(true, true, NULL, name_only, details_flag);
+
+ print_libpfm_events(name_only, long_desc);
}
int parse_events__is_hardcoded_term(struct parse_events_term *term)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 27596cbd0ba0..e80c9b74f2f2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -17,6 +17,7 @@ struct evlist;
struct parse_events_error;
struct option;
+struct perf_pmu;
struct tracepoint_path {
char *system;
@@ -31,8 +32,16 @@ bool have_tracepoints(struct list_head *evlist);
const char *event_type(int type);
int parse_events_option(const struct option *opt, const char *str, int unset);
-int parse_events(struct evlist *evlist, const char *str,
- struct parse_events_error *error);
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
+int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error,
+ struct perf_pmu *fake_pmu);
+
+static inline int parse_events(struct evlist *evlist, const char *str,
+ struct parse_events_error *err)
+{
+ return __parse_events(evlist, str, err, NULL);
+}
+
int parse_events_terms(struct list_head *terms, const char *str);
int parse_filter(const struct option *opt, const char *str, int unset);
int exclude_perf(const struct option *opt, const char *arg, int unset);
@@ -125,8 +134,10 @@ struct parse_events_state {
int idx;
int nr_groups;
struct parse_events_error *error;
- struct evlist *evlist;
+ struct evlist *evlist;
struct list_head *terms;
+ int stoken;
+ struct perf_pmu *fake_pmu;
};
void parse_events__handle_error(struct parse_events_error *err, int idx,
@@ -179,13 +190,16 @@ int parse_events_add_cache(struct list_head *list, int *idx,
struct parse_events_error *error,
struct list_head *head_config);
int parse_events_add_breakpoint(struct list_head *list, int *idx,
- void *ptr, char *type, u64 len);
+ u64 addr, char *type, u64 len);
int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
struct list_head *head_config,
bool auto_merge_stats,
bool use_alias);
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+ char *name, struct perf_pmu *pmu);
+
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
char *str,
struct list_head **listp);
@@ -247,4 +261,6 @@ static inline bool is_sdt_event(char *str __maybe_unused)
}
#endif /* HAVE_LIBELF_SUPPORT */
+int perf_pmu__test_parse_init(void);
+
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index baa48f28d57d..9db5097317f4 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -41,14 +41,6 @@ static int value(yyscan_t scanner, int base)
return __value(yylval, text, base, PE_VALUE);
}
-static int raw(yyscan_t scanner)
-{
- YYSTYPE *yylval = parse_events_get_lval(scanner);
- char *text = parse_events_get_text(scanner);
-
- return __value(yylval, text + 1, 16, PE_RAW);
-}
-
static int str(yyscan_t scanner, int token)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -72,6 +64,17 @@ static int str(yyscan_t scanner, int token)
return token;
}
+static int raw(yyscan_t scanner)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL)
+ return str(scanner, PE_NAME);
+
+ return __value(yylval, text + 1, 16, PE_RAW);
+}
+
static bool isbpf_suffix(char *text)
{
int len = strlen(text);
@@ -129,12 +132,16 @@ do { \
yyless(0); \
} while (0)
-static int pmu_str_check(yyscan_t scanner)
+static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
char *text = parse_events_get_text(scanner);
yylval->str = strdup(text);
+
+ if (parse_state->fake_pmu)
+ return PE_PMU_EVENT_FAKE;
+
switch (perf_pmu__parse_check(text)) {
case PMU_EVENT_SYMBOL_PREFIX:
return PE_PMU_EVENT_PRE;
@@ -203,16 +210,16 @@ name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
/* If you add a modifier you need to update check_modifier() */
-modifier_event [ukhpPGHSDIW]+
+modifier_event [ukhpPGHSDIWe]+
modifier_bp [rwx]{1,3}
%%
%{
- {
- int start_token;
+ struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner);
- start_token = parse_events_get_extra(yyscanner);
+ {
+ int start_token = _parse_state->stoken;
if (start_token == PE_START_TERMS)
BEGIN(config);
@@ -220,7 +227,7 @@ modifier_bp [rwx]{1,3}
BEGIN(event);
if (start_token) {
- parse_events_set_extra(NULL, yyscanner);
+ _parse_state->stoken = 0;
/*
* The flex parser does not init locations variable
* via the scan_string interface, so we need do the
@@ -252,7 +259,9 @@ modifier_bp [rwx]{1,3}
BEGIN(INITIAL);
REWIND(0);
}
-
+, {
+ return ',';
+ }
}
<array>{
@@ -286,6 +295,8 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
+r{num_raw_hex} { return raw(yyscanner); }
+r0x{num_raw_hex} { return raw(yyscanner); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
@@ -373,7 +384,7 @@ r{num_raw_hex} { return raw(yyscanner); }
{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
-{name} { return pmu_str_check(yyscanner); }
+{name} { return pmu_str_check(yyscanner, _parse_state); }
{name_tag} { return str(yyscanner, PE_NAME); }
"/" { BEGIN(config); return '/'; }
- { return '-'; }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 94f8bcd83582..d5b6aff82f21 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -26,7 +26,7 @@ do { \
YYABORT; \
} while (0)
-static struct list_head* alloc_list()
+static struct list_head* alloc_list(void)
{
struct list_head *list;
@@ -44,7 +44,7 @@ static void free_list_evsel(struct list_head* list_evsel)
list_for_each_entry_safe(evsel, tmp, list_evsel, core.node) {
list_del_init(&evsel->core.node);
- perf_evsel__delete(evsel);
+ evsel__delete(evsel);
}
free(list_evsel);
}
@@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list,
%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
-%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%token PE_ARRAY_ALL PE_ARRAY_RANGE
%token PE_DRV_CFG_TERM
%type <num> PE_VALUE
@@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_MODIFIER_EVENT
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
-%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
%destructor { free ($$); } <str>
%type <term> event_term
@@ -326,6 +326,7 @@ PE_NAME opt_pmu_config
}
parse_events_terms__delete($2);
parse_events_terms__delete(orig_terms);
+ free(pattern);
free($1);
$$ = list;
#undef CLEANUP_YYABORT
@@ -348,13 +349,50 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
struct list_head *list;
char pmu_name[128];
- snprintf(&pmu_name, 128, "%s-%s", $1, $3);
+ snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3);
free($1);
free($3);
if (parse_events_multi_pmu_add(_parse_state, pmu_name, &list) < 0)
YYABORT;
$$ = list;
}
+|
+PE_PMU_EVENT_FAKE sep_dc
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYABORT;
+
+ err = parse_events_add_pmu(_parse_state, list, $1, NULL, false, false);
+ free($1);
+ if (err < 0) {
+ free(list);
+ YYABORT;
+ }
+ $$ = list;
+}
+|
+PE_PMU_EVENT_FAKE opt_pmu_config
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYABORT;
+
+ err = parse_events_add_pmu(_parse_state, list, $1, $2, false, false);
+ free($1);
+ parse_events_terms__delete($2);
+ if (err < 0) {
+ free(list);
+ YYABORT;
+ }
+ $$ = list;
+}
value_sym:
PE_VALUE_SYM_HW
@@ -473,7 +511,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc
list = alloc_list();
ABORT_ON(!list);
err = parse_events_add_breakpoint(list, &parse_state->idx,
- (void *) $2, $6, $4);
+ $2, $6, $4);
free($6);
if (err) {
free(list);
@@ -490,7 +528,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc
list = alloc_list();
ABORT_ON(!list);
if (parse_events_add_breakpoint(list, &parse_state->idx,
- (void *) $2, NULL, $4)) {
+ $2, NULL, $4)) {
free(list);
YYABORT;
}
@@ -506,7 +544,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
list = alloc_list();
ABORT_ON(!list);
err = parse_events_add_breakpoint(list, &parse_state->idx,
- (void *) $2, $4, 0);
+ $2, $4, 0);
free($4);
if (err) {
free(list);
@@ -523,7 +561,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc
list = alloc_list();
ABORT_ON(!list);
if (parse_events_add_breakpoint(list, &parse_state->idx,
- (void *) $2, NULL, 0)) {
+ $2, NULL, 0)) {
free(list);
YYABORT;
}
@@ -706,6 +744,15 @@ event_term
}
event_term:
+PE_RAW
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG,
+ NULL, $1, false, &@1, NULL));
+ $$ = term;
+}
+|
PE_NAME '=' PE_NAME
{
struct parse_events_term *term;
diff --git a/tools/perf/util/parse-sublevel-options.c b/tools/perf/util/parse-sublevel-options.c
new file mode 100644
index 000000000000..a841d17ffd57
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.c
@@ -0,0 +1,70 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "util/debug.h"
+#include "util/parse-sublevel-options.h"
+
+static int parse_one_sublevel_option(const char *str,
+ struct sublevel_option *opts)
+{
+ struct sublevel_option *opt = opts;
+ char *vstr, *s = strdup(str);
+ int v = 1;
+
+ if (!s) {
+ pr_err("no memory\n");
+ return -1;
+ }
+
+ vstr = strchr(s, '=');
+ if (vstr)
+ *vstr++ = 0;
+
+ while (opt->name) {
+ if (!strcmp(s, opt->name))
+ break;
+ opt++;
+ }
+
+ if (!opt->name) {
+ pr_err("Unknown option name '%s'\n", s);
+ free(s);
+ return -1;
+ }
+
+ if (vstr)
+ v = atoi(vstr);
+
+ *opt->value_ptr = v;
+ free(s);
+ return 0;
+}
+
+/* parse options like --foo a=<n>,b,c... */
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts)
+{
+ char *s = strdup(str);
+ char *p = NULL;
+ int ret;
+
+ if (!s) {
+ pr_err("no memory\n");
+ return -1;
+ }
+
+ p = strtok(s, ",");
+ while (p) {
+ ret = parse_one_sublevel_option(p, opts);
+ if (ret) {
+ free(s);
+ return ret;
+ }
+
+ p = strtok(NULL, ",");
+ }
+
+ free(s);
+ return 0;
+}
diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h
new file mode 100644
index 000000000000..9b9efcc2aaad
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.h
@@ -0,0 +1,11 @@
+#ifndef _PERF_PARSE_SUBLEVEL_OPTIONS_H
+#define _PERF_PARSE_SUBLEVEL_OPTIONS_H
+
+struct sublevel_option {
+ const char *name;
+ int *value_ptr;
+};
+
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts);
+
+#endif \ No newline at end of file
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
new file mode 100644
index 000000000000..3840d02f0f7b
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.c
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "perf-sys.h"
+#include "util/cloexec.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/perf_api_probe.h"
+#include <perf/cpumap.h>
+#include <errno.h>
+
+typedef void (*setup_probe_fn_t)(struct evsel *evsel);
+
+static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+{
+ struct evlist *evlist;
+ struct evsel *evsel;
+ unsigned long flags = perf_event_open_cloexec_flag();
+ int err = -EAGAIN, fd;
+ static pid_t pid = -1;
+
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ if (parse_events(evlist, str, NULL))
+ goto out_delete;
+
+ evsel = evlist__first(evlist);
+
+ while (1) {
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ if (fd < 0) {
+ if (pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ goto out_delete;
+ }
+ break;
+ }
+ close(fd);
+
+ fn(evsel);
+
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ if (fd < 0) {
+ if (errno == EINVAL)
+ err = -EINVAL;
+ goto out_delete;
+ }
+ close(fd);
+ err = 0;
+
+out_delete:
+ evlist__delete(evlist);
+ return err;
+}
+
+static bool perf_probe_api(setup_probe_fn_t fn)
+{
+ const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
+ struct perf_cpu_map *cpus;
+ int cpu, ret, i = 0;
+
+ cpus = perf_cpu_map__new(NULL);
+ if (!cpus)
+ return false;
+ cpu = cpus->map[0];
+ perf_cpu_map__put(cpus);
+
+ do {
+ ret = perf_do_probe_api(fn, cpu, try[i++]);
+ if (!ret)
+ return true;
+ } while (ret == -EAGAIN && try[i]);
+
+ return false;
+}
+
+static void perf_probe_sample_identifier(struct evsel *evsel)
+{
+ evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+}
+
+static void perf_probe_comm_exec(struct evsel *evsel)
+{
+ evsel->core.attr.comm_exec = 1;
+}
+
+static void perf_probe_context_switch(struct evsel *evsel)
+{
+ evsel->core.attr.context_switch = 1;
+}
+
+static void perf_probe_text_poke(struct evsel *evsel)
+{
+ evsel->core.attr.text_poke = 1;
+}
+
+bool perf_can_sample_identifier(void)
+{
+ return perf_probe_api(perf_probe_sample_identifier);
+}
+
+bool perf_can_comm_exec(void)
+{
+ return perf_probe_api(perf_probe_comm_exec);
+}
+
+bool perf_can_record_switch_events(void)
+{
+ return perf_probe_api(perf_probe_context_switch);
+}
+
+bool perf_can_record_text_poke_events(void)
+{
+ return perf_probe_api(perf_probe_text_poke);
+}
+
+bool perf_can_record_cpu_wide(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ .exclude_kernel = 1,
+ };
+ struct perf_cpu_map *cpus;
+ int cpu, fd;
+
+ cpus = perf_cpu_map__new(NULL);
+ if (!cpus)
+ return false;
+ cpu = cpus->map[0];
+ perf_cpu_map__put(cpus);
+
+ fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+ if (fd < 0)
+ return false;
+ close(fd);
+
+ return true;
+}
+
+/*
+ * Architectures are expected to know if AUX area sampling is supported by the
+ * hardware. Here we check for kernel support.
+ */
+bool perf_can_aux_sample(void)
+{
+ struct perf_event_attr attr = {
+ .size = sizeof(struct perf_event_attr),
+ .exclude_kernel = 1,
+ /*
+ * Non-zero value causes the kernel to calculate the effective
+ * attribute size up to that byte.
+ */
+ .aux_sample_size = 1,
+ };
+ int fd;
+
+ fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+ /*
+ * If the kernel attribute is big enough to contain aux_sample_size
+ * then we assume that it is supported. We are relying on the kernel to
+ * validate the attribute size before anything else that could be wrong.
+ */
+ if (fd < 0 && errno == E2BIG)
+ return false;
+ if (fd >= 0)
+ close(fd);
+
+ return true;
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
new file mode 100644
index 000000000000..d5506a983a94
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.h
@@ -0,0 +1,15 @@
+
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_API_PROBE_H
+#define __PERF_API_PROBE_H
+
+#include <stdbool.h>
+
+bool perf_can_aux_sample(void);
+bool perf_can_comm_exec(void);
+bool perf_can_record_cpu_wide(void);
+bool perf_can_record_switch_events(void);
+bool perf_can_record_text_poke_events(void);
+bool perf_can_sample_identifier(void);
+
+#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index b94fa07f5d32..e67a227c0ce7 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -147,6 +147,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(aux_watermark, p_unsigned);
PRINT_ATTRf(sample_max_stack, p_unsigned);
PRINT_ATTRf(aux_sample_size, p_unsigned);
+ PRINT_ATTRf(text_poke, p_unsigned);
return ret;
}
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
new file mode 100644
index 000000000000..d735acb6c29c
--- /dev/null
+++ b/tools/perf/util/pfm.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include "util/cpumap.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/pmu.h"
+#include "util/pfm.h"
+
+#include <string.h>
+#include <linux/kernel.h>
+#include <perfmon/pfmlib_perf_event.h>
+
+static void libpfm_initialize(void)
+{
+ int ret;
+
+ ret = pfm_initialize();
+ if (ret != PFM_SUCCESS) {
+ ui__warning("libpfm failed to initialize: %s\n",
+ pfm_strerror(ret));
+ }
+}
+
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct evlist *evlist = *(struct evlist **)opt->value;
+ struct perf_event_attr attr;
+ struct perf_pmu *pmu;
+ struct evsel *evsel, *grp_leader = NULL;
+ char *p, *q, *p_orig;
+ const char *sep;
+ int grp_evt = -1;
+ int ret;
+
+ libpfm_initialize();
+
+ p_orig = p = strdup(str);
+ if (!p)
+ return -1;
+ /*
+ * force loading of the PMU list
+ */
+ perf_pmu__scan(NULL);
+
+ for (q = p; strsep(&p, ",{}"); q = p) {
+ sep = p ? str + (p - p_orig - 1) : "";
+ if (*sep == '{') {
+ if (grp_evt > -1) {
+ ui__error(
+ "nested event groups not supported\n");
+ goto error;
+ }
+ grp_evt++;
+ }
+
+ /* no event */
+ if (*q == '\0')
+ continue;
+
+ memset(&attr, 0, sizeof(attr));
+ event_attr_init(&attr);
+
+ ret = pfm_get_perf_event_encoding(q, PFM_PLM0|PFM_PLM3,
+ &attr, NULL, NULL);
+
+ if (ret != PFM_SUCCESS) {
+ ui__error("failed to parse event %s : %s\n", str,
+ pfm_strerror(ret));
+ goto error;
+ }
+
+ pmu = perf_pmu__find_by_type((unsigned int)attr.type);
+ evsel = parse_events__add_event(evlist->core.nr_entries,
+ &attr, q, pmu);
+ if (evsel == NULL)
+ goto error;
+
+ evsel->is_libpfm_event = true;
+
+ evlist__add(evlist, evsel);
+
+ if (grp_evt == 0)
+ grp_leader = evsel;
+
+ if (grp_evt > -1) {
+ evsel->leader = grp_leader;
+ grp_leader->core.nr_members++;
+ grp_evt++;
+ }
+
+ if (*sep == '}') {
+ if (grp_evt < 0) {
+ ui__error(
+ "cannot close a non-existing event group\n");
+ goto error;
+ }
+ evlist->nr_groups++;
+ grp_leader = NULL;
+ grp_evt = -1;
+ }
+ }
+ return 0;
+error:
+ free(p_orig);
+ return -1;
+}
+
+static const char *srcs[PFM_ATTR_CTRL_MAX] = {
+ [PFM_ATTR_CTRL_UNKNOWN] = "???",
+ [PFM_ATTR_CTRL_PMU] = "PMU",
+ [PFM_ATTR_CTRL_PERF_EVENT] = "perf_event",
+};
+
+static void
+print_attr_flags(pfm_event_attr_info_t *info)
+{
+ int n = 0;
+
+ if (info->is_dfl) {
+ printf("[default] ");
+ n++;
+ }
+
+ if (info->is_precise) {
+ printf("[precise] ");
+ n++;
+ }
+
+ if (!n)
+ printf("- ");
+}
+
+static void
+print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc)
+{
+ pfm_event_attr_info_t ainfo;
+ const char *src;
+ int j, ret;
+
+ ainfo.size = sizeof(ainfo);
+
+ printf(" %s\n", info->name);
+ printf(" [%s]\n", info->desc);
+ if (long_desc) {
+ if (info->equiv)
+ printf(" Equiv: %s\n", info->equiv);
+
+ printf(" Code : 0x%"PRIx64"\n", info->code);
+ }
+ pfm_for_each_event_attr(j, info) {
+ ret = pfm_get_event_attr_info(info->idx, j,
+ PFM_OS_PERF_EVENT_EXT, &ainfo);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ if (ainfo.type == PFM_ATTR_UMASK) {
+ printf(" %s:%s\n", info->name, ainfo.name);
+ printf(" [%s]\n", ainfo.desc);
+ }
+
+ if (!long_desc)
+ continue;
+
+ if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
+ ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN;
+
+ src = srcs[ainfo.ctrl];
+ switch (ainfo.type) {
+ case PFM_ATTR_UMASK:
+ printf(" Umask : 0x%02"PRIx64" : %s: ",
+ ainfo.code, src);
+ print_attr_flags(&ainfo);
+ putchar('\n');
+ break;
+ case PFM_ATTR_MOD_BOOL:
+ printf(" Modif : %s: [%s] : %s (boolean)\n", src,
+ ainfo.name, ainfo.desc);
+ break;
+ case PFM_ATTR_MOD_INTEGER:
+ printf(" Modif : %s: [%s] : %s (integer)\n", src,
+ ainfo.name, ainfo.desc);
+ break;
+ case PFM_ATTR_NONE:
+ case PFM_ATTR_RAW_UMASK:
+ case PFM_ATTR_MAX:
+ default:
+ printf(" Attr : %s: [%s] : %s\n", src,
+ ainfo.name, ainfo.desc);
+ }
+ }
+}
+
+/*
+ * list all pmu::event:umask, pmu::event
+ * printed events may not be all valid combinations of umask for an event
+ */
+static void
+print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info)
+{
+ pfm_event_attr_info_t ainfo;
+ int j, ret;
+ bool has_umask = false;
+
+ ainfo.size = sizeof(ainfo);
+
+ pfm_for_each_event_attr(j, info) {
+ ret = pfm_get_event_attr_info(info->idx, j,
+ PFM_OS_PERF_EVENT_EXT, &ainfo);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ if (ainfo.type != PFM_ATTR_UMASK)
+ continue;
+
+ printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name);
+ has_umask = true;
+ }
+ if (!has_umask)
+ printf("%s::%s\n", pinfo->name, info->name);
+}
+
+void print_libpfm_events(bool name_only, bool long_desc)
+{
+ pfm_event_info_t info;
+ pfm_pmu_info_t pinfo;
+ int i, p, ret;
+
+ libpfm_initialize();
+
+ /* initialize to zero to indicate ABI version */
+ info.size = sizeof(info);
+ pinfo.size = sizeof(pinfo);
+
+ if (!name_only)
+ puts("\nList of pre-defined events (to be used in --pfm-events):\n");
+
+ pfm_for_all_pmus(p) {
+ bool printed_pmu = false;
+
+ ret = pfm_get_pmu_info(p, &pinfo);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ /* only print events that are supported by host HW */
+ if (!pinfo.is_present)
+ continue;
+
+ /* handled by perf directly */
+ if (pinfo.pmu == PFM_PMU_PERF_EVENT)
+ continue;
+
+ for (i = pinfo.first_event; i != -1;
+ i = pfm_get_event_next(i)) {
+
+ ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT,
+ &info);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ if (!name_only && !printed_pmu) {
+ printf("%s:\n", pinfo.name);
+ printed_pmu = true;
+ }
+
+ if (!name_only)
+ print_libpfm_events_detailed(&info, long_desc);
+ else
+ print_libpfm_events_raw(&pinfo, &info);
+ }
+ if (!name_only && printed_pmu)
+ putchar('\n');
+ }
+}
diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h
new file mode 100644
index 000000000000..7d70dda87012
--- /dev/null
+++ b/tools/perf/util/pfm.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#ifndef __PERF_PFM_H
+#define __PERF_PFM_H
+
+#include <subcmd/parse-options.h>
+
+#ifdef HAVE_LIBPFM
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+ int unset);
+
+void print_libpfm_events(bool name_only, bool long_desc);
+
+#else
+#include <linux/compiler.h>
+
+static inline int parse_libpfm_events_option(
+ const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ return 0;
+}
+
+static inline void print_libpfm_events(bool name_only __maybe_unused,
+ bool long_desc __maybe_unused)
+{
+}
+
+#endif
+
+
+#endif /* __PERF_PFM_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ef6a63f3d386..d41caeb35cf6 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -18,6 +18,7 @@
#include <regex.h>
#include <perf/cpumap.h>
#include "debug.h"
+#include "evsel.h"
#include "pmu.h"
#include "parse-events.h"
#include "header.h"
@@ -25,6 +26,8 @@
#include "strbuf.h"
#include "fncache.h"
+struct perf_pmu perf_pmu__fake;
+
struct perf_pmu_format {
char *name;
int value;
@@ -271,7 +274,7 @@ static void perf_pmu_update_alias(struct perf_pmu_alias *old,
}
/* Delete an alias entry. */
-static void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
{
zfree(&newalias->name);
zfree(&newalias->desc);
@@ -849,6 +852,7 @@ static struct perf_pmu *pmu_lookup(const char *name)
INIT_LIST_HEAD(&pmu->format);
INIT_LIST_HEAD(&pmu->aliases);
+ INIT_LIST_HEAD(&pmu->caps);
list_splice(&format, &pmu->format);
list_splice(&aliases, &pmu->aliases);
list_add_tail(&pmu->list, &pmus);
@@ -869,6 +873,17 @@ static struct perf_pmu *pmu_find(const char *name)
return NULL;
}
+struct perf_pmu *perf_pmu__find_by_type(unsigned int type)
+{
+ struct perf_pmu *pmu;
+
+ list_for_each_entry(pmu, &pmus, list)
+ if (pmu->type == type)
+ return pmu;
+
+ return NULL;
+}
+
struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
{
/*
@@ -884,6 +899,25 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
return NULL;
}
+struct perf_pmu *evsel__find_pmu(struct evsel *evsel)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ if (pmu->type == evsel->core.attr.type)
+ break;
+ }
+
+ return pmu;
+}
+
+bool evsel__is_aux_event(struct evsel *evsel)
+{
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+ return pmu && pmu->auxtrace;
+}
+
struct perf_pmu *perf_pmu__find(const char *name)
{
struct perf_pmu *pmu;
@@ -1024,7 +1058,8 @@ error:
* Setup one of config[12] attr members based on the
* user input data - term parameter.
*/
-static int pmu_config_term(struct list_head *formats,
+static int pmu_config_term(const char *pmu_name,
+ struct list_head *formats,
struct perf_event_attr *attr,
struct parse_events_term *term,
struct list_head *head_terms,
@@ -1050,16 +1085,24 @@ static int pmu_config_term(struct list_head *formats,
format = pmu_find_format(formats, term->config);
if (!format) {
- if (verbose > 0)
- printf("Invalid event/parameter '%s'\n", term->config);
+ char *pmu_term = pmu_formats_string(formats);
+ char *unknown_term;
+ char *help_msg;
+
+ if (asprintf(&unknown_term,
+ "unknown term '%s' for pmu '%s'",
+ term->config, pmu_name) < 0)
+ unknown_term = NULL;
+ help_msg = parse_events_formats_error_string(pmu_term);
if (err) {
- char *pmu_term = pmu_formats_string(formats);
-
parse_events__handle_error(err, term->err_term,
- strdup("unknown term"),
- parse_events_formats_error_string(pmu_term));
- free(pmu_term);
+ unknown_term,
+ help_msg);
+ } else {
+ pr_debug("%s (%s)\n", unknown_term, help_msg);
+ free(unknown_term);
}
+ free(pmu_term);
return -EINVAL;
}
@@ -1136,7 +1179,7 @@ static int pmu_config_term(struct list_head *formats,
return 0;
}
-int perf_pmu__config_terms(struct list_head *formats,
+int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
struct perf_event_attr *attr,
struct list_head *head_terms,
bool zero, struct parse_events_error *err)
@@ -1144,7 +1187,7 @@ int perf_pmu__config_terms(struct list_head *formats,
struct parse_events_term *term;
list_for_each_entry(term, head_terms, list) {
- if (pmu_config_term(formats, attr, term, head_terms,
+ if (pmu_config_term(pmu_name, formats, attr, term, head_terms,
zero, err))
return -EINVAL;
}
@@ -1164,8 +1207,8 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
bool zero = !!pmu->default_config;
attr->type = pmu->type;
- return perf_pmu__config_terms(&pmu->format, attr, head_terms,
- zero, err);
+ return perf_pmu__config_terms(pmu->name, &pmu->format, attr,
+ head_terms, zero, err);
}
static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
@@ -1311,6 +1354,17 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to)
set_bit(b, bits);
}
+void perf_pmu__del_formats(struct list_head *formats)
+{
+ struct perf_pmu_format *fmt, *tmp;
+
+ list_for_each_entry_safe(fmt, tmp, formats, list) {
+ list_del(&fmt->list);
+ free(fmt->name);
+ free(fmt);
+ }
+}
+
static int sub_non_neg(int a, int b)
{
if (b > a)
@@ -1359,6 +1413,7 @@ struct sevent {
char *pmu;
char *metric_expr;
char *metric_name;
+ int is_cpu;
};
static int cmp_sevent(const void *a, const void *b)
@@ -1375,6 +1430,11 @@ static int cmp_sevent(const void *a, const void *b)
if (n)
return n;
}
+
+ /* Order CPU core events to be first */
+ if (as->is_cpu != bs->is_cpu)
+ return bs->is_cpu - as->is_cpu;
+
return strcmp(as->name, bs->name);
}
@@ -1434,7 +1494,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
list_for_each_entry(alias, &pmu->aliases, list) {
char *name = alias->desc ? alias->name :
format_alias(buf, sizeof(buf), pmu, alias);
- bool is_cpu = !strcmp(pmu->name, "cpu");
+ bool is_cpu = is_pmu_core(pmu->name);
if (alias->deprecated && !deprecated)
continue;
@@ -1466,6 +1526,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
aliases[j].pmu = pmu->name;
aliases[j].metric_expr = alias->metric_expr;
aliases[j].metric_name = alias->metric_name;
+ aliases[j].is_cpu = is_cpu;
j++;
}
if (pmu->selectable &&
@@ -1574,3 +1635,84 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
va_end(args);
return ret;
}
+
+static int perf_pmu__new_caps(struct list_head *list, char *name, char *value)
+{
+ struct perf_pmu_caps *caps = zalloc(sizeof(*caps));
+
+ if (!caps)
+ return -ENOMEM;
+
+ caps->name = strdup(name);
+ if (!caps->name)
+ goto free_caps;
+ caps->value = strndup(value, strlen(value) - 1);
+ if (!caps->value)
+ goto free_name;
+ list_add_tail(&caps->list, list);
+ return 0;
+
+free_name:
+ zfree(caps->name);
+free_caps:
+ free(caps);
+
+ return -ENOMEM;
+}
+
+/*
+ * Reading/parsing the given pmu capabilities, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes.
+ * Return the number of capabilities
+ */
+int perf_pmu__caps_parse(struct perf_pmu *pmu)
+{
+ struct stat st;
+ char caps_path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+ DIR *caps_dir;
+ struct dirent *evt_ent;
+ int nr_caps = 0;
+
+ if (!sysfs)
+ return -1;
+
+ snprintf(caps_path, PATH_MAX,
+ "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name);
+
+ if (stat(caps_path, &st) < 0)
+ return 0; /* no error if caps does not exist */
+
+ caps_dir = opendir(caps_path);
+ if (!caps_dir)
+ return -EINVAL;
+
+ while ((evt_ent = readdir(caps_dir)) != NULL) {
+ char path[PATH_MAX + NAME_MAX + 1];
+ char *name = evt_ent->d_name;
+ char value[128];
+ FILE *file;
+
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", caps_path, name);
+
+ file = fopen(path, "r");
+ if (!file)
+ continue;
+
+ if (!fgets(value, sizeof(value), file) ||
+ (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) {
+ fclose(file);
+ continue;
+ }
+
+ nr_caps++;
+ fclose(file);
+ }
+
+ closedir(caps_dir);
+
+ return nr_caps;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 5fb3f16828df..a64e9c9ce731 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -9,7 +9,7 @@
#include "parse-events.h"
#include "pmu-events/pmu-events.h"
-struct perf_evsel_config_term;
+struct evsel_config_term;
enum {
PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -22,6 +22,12 @@ enum {
struct perf_event_attr;
+struct perf_pmu_caps {
+ char *name;
+ char *value;
+ struct list_head list;
+};
+
struct perf_pmu {
char *name;
__u32 type;
@@ -33,9 +39,12 @@ struct perf_pmu {
struct perf_cpu_map *cpus;
struct list_head format; /* HEAD struct perf_pmu_format -> list */
struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
+ struct list_head caps; /* HEAD struct perf_pmu_caps -> list */
struct list_head list; /* ELEM */
};
+extern struct perf_pmu perf_pmu__fake;
+
struct perf_pmu_info {
const char *unit;
const char *metric_expr;
@@ -65,10 +74,11 @@ struct perf_pmu_alias {
};
struct perf_pmu *perf_pmu__find(const char *name);
+struct perf_pmu *perf_pmu__find_by_type(unsigned int type);
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
struct list_head *head_terms,
struct parse_events_error *error);
-int perf_pmu__config_terms(struct list_head *formats,
+int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
struct perf_event_attr *attr,
struct list_head *head_terms,
bool zero, struct parse_events_error *error);
@@ -78,13 +88,13 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
struct perf_pmu_info *info);
struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
struct list_head *head_terms);
-int perf_pmu_wrap(void);
void perf_pmu_error(struct list_head *list, char *name, char const *msg);
int perf_pmu__new_format(struct list_head *list, char *name,
int config, unsigned long *bits);
void perf_pmu__set_format(unsigned long *bits, long from, long to);
int perf_pmu__format_parse(char *dir, struct list_head *head);
+void perf_pmu__del_formats(struct list_head *formats);
struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
@@ -104,7 +114,10 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
+void perf_pmu_free_alias(struct perf_pmu_alias *alias);
int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
+int perf_pmu__caps_parse(struct perf_pmu *pmu);
+
#endif /* __PMU_H */
diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c
index 599a1543871d..13fdc51c61d9 100644
--- a/tools/perf/util/print_binary.c
+++ b/tools/perf/util/print_binary.c
@@ -50,7 +50,7 @@ int is_printable_array(char *p, unsigned int len)
len--;
- for (i = 0; i < len; i++) {
+ for (i = 0; i < len && p[i]; i++) {
if (!isprint(p[i]) && !isspace(p[i]))
return 0;
}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index eea132f512b0..8eae2afff71a 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -43,6 +43,10 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
#define PERFPROBE_GROUP "probe"
bool probe_event_dry_run; /* Dry run flag */
@@ -102,7 +106,7 @@ void exit_probe_symbol_maps(void)
symbol__exit();
}
-static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
+static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap)
{
/* kmap->ref_reloc_sym should be set if host_machine is initialized */
struct kmap *kmap;
@@ -114,6 +118,10 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
kmap = map__kmap(map);
if (!kmap)
return NULL;
+
+ if (pmap)
+ *pmap = map;
+
return kmap->ref_reloc_sym;
}
@@ -125,9 +133,10 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
struct map *map;
/* ref_reloc_sym is just a label. Need a special fix*/
- reloc_sym = kernel_get_ref_reloc_sym();
+ reloc_sym = kernel_get_ref_reloc_sym(&map);
if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
- *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
+ *addr = (!map->reloc || reloc) ? reloc_sym->addr :
+ reloc_sym->unrelocated_addr;
else {
sym = machine__find_kernel_symbol_by_name(host_machine, name, &map);
if (!sym)
@@ -232,21 +241,22 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
static bool kprobe_blacklist__listed(unsigned long address);
static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
{
- u64 etext_addr = 0;
- int ret;
-
- /* Get the address of _etext for checking non-probable text symbol */
- ret = kernel_get_symbol_address_by_name("_etext", &etext_addr,
- false, false);
+ struct map *map;
+ bool ret = false;
- if (ret == 0 && etext_addr < address)
- pr_warning("%s is out of .text, skip it.\n", symbol);
- else if (kprobe_blacklist__listed(address))
+ map = kernel_get_module_map(NULL);
+ if (map) {
+ ret = address <= map->start || map->end < address;
+ if (ret)
+ pr_warning("%s is out of .text, skip it.\n", symbol);
+ map__put(map);
+ }
+ if (!ret && kprobe_blacklist__listed(address)) {
pr_warning("%s is blacklisted function, skip it.\n", symbol);
- else
- return false;
+ ret = true;
+ }
- return true;
+ return ret;
}
/*
@@ -332,6 +342,8 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
map = machine__kernel_map(host_machine);
dso = map->dso;
+ if (!dso->has_build_id)
+ dso__read_running_kernel_build_id(dso, host_machine);
vmlinux_name = symbol_conf.vmlinux_name;
dso->load_errno = 0;
@@ -370,9 +382,13 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
/* Find the address of given function */
map__for_each_symbol_by_name(map, pp->function, sym) {
- if (uprobes)
+ if (uprobes) {
address = sym->start;
- else
+ if (sym->type == STT_GNU_IFUNC)
+ pr_warning("Warning: The probe function (%s) is a GNU indirect function.\n"
+ "Consider identifying the final function used at run time and set the probe directly on that.\n",
+ pp->function);
+ } else
address = map->unmap_ip(map, sym->start) - map->reloc;
break;
}
@@ -443,6 +459,49 @@ static int get_alternative_line_range(struct debuginfo *dinfo,
return ret;
}
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *nsi,
+ bool silent)
+{
+ debuginfod_client *c = debuginfod_begin();
+ char sbuild_id[SBUILD_ID_SIZE + 1];
+ struct debuginfo *ret = NULL;
+ struct nscookie nsc;
+ char *path;
+ int fd;
+
+ if (!c)
+ return NULL;
+
+ build_id__sprintf(&dso->bid, sbuild_id);
+ fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id,
+ 0, &path);
+ if (fd >= 0)
+ close(fd);
+ debuginfod_end(c);
+ if (fd < 0) {
+ if (!silent)
+ pr_debug("Failed to find debuginfo in debuginfod.\n");
+ return NULL;
+ }
+ if (!silent)
+ pr_debug("Load debuginfo from debuginfod (%s)\n", path);
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = debuginfo__new((const char *)path);
+ nsinfo__mountns_exit(&nsc);
+ return ret;
+}
+#else
+static inline
+struct debuginfo *open_from_debuginfod(struct dso *dso __maybe_unused,
+ struct nsinfo *nsi __maybe_unused,
+ bool silent __maybe_unused)
+{
+ return NULL;
+}
+#endif
+
/* Open new debuginfo of given module */
static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi,
bool silent)
@@ -462,6 +521,10 @@ static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi,
strcpy(reason, "(unknown)");
} else
dso__strerror_load(dso, reason, STRERR_BUFSIZE);
+ if (dso)
+ ret = open_from_debuginfod(dso, nsi, silent);
+ if (ret)
+ return ret;
if (!silent) {
if (module)
pr_err("Module %s is not loaded, please specify its full path name.\n", module);
@@ -745,6 +808,7 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
int ntevs)
{
struct ref_reloc_sym *reloc_sym;
+ struct map *map;
char *tmp;
int i, skipped = 0;
@@ -753,7 +817,7 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
return post_process_offline_probe_trace_events(tevs, ntevs,
symbol_conf.vmlinux_name);
- reloc_sym = kernel_get_ref_reloc_sym();
+ reloc_sym = kernel_get_ref_reloc_sym(&map);
if (!reloc_sym) {
pr_warning("Relocated base symbol is not found!\n");
return -EINVAL;
@@ -764,9 +828,13 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
continue;
if (tevs[i].point.retprobe && !kretprobe_offset_is_supported())
continue;
- /* If we found a wrong one, mark it by NULL symbol */
+ /*
+ * If we found a wrong one, mark it by NULL symbol.
+ * Since addresses in debuginfo is same as objdump, we need
+ * to convert it to addresses on memory.
+ */
if (kprobe_warn_out_range(tevs[i].point.symbol,
- tevs[i].point.address)) {
+ map__objdump_2mem(map, tevs[i].point.address))) {
tmp = NULL;
skipped++;
} else {
@@ -781,7 +849,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
free(tevs[i].point.symbol);
tevs[i].point.symbol = tmp;
tevs[i].point.offset = tevs[i].point.address -
- reloc_sym->unrelocated_addr;
+ (map->reloc ? reloc_sym->unrelocated_addr :
+ reloc_sym->addr);
}
return skipped;
}
@@ -936,6 +1005,7 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
static int __show_line_range(struct line_range *lr, const char *module,
bool user)
{
+ struct build_id bid;
int l = 1;
struct int_node *ln;
struct debuginfo *dinfo;
@@ -943,6 +1013,7 @@ static int __show_line_range(struct line_range *lr, const char *module,
int ret;
char *tmp;
char sbuf[STRERR_BUFSIZE];
+ char sbuild_id[SBUILD_ID_SIZE] = "";
/* Search a line range */
dinfo = open_debuginfo(module, NULL, false);
@@ -955,6 +1026,10 @@ static int __show_line_range(struct line_range *lr, const char *module,
if (!ret)
ret = debuginfo__find_line_range(dinfo, lr);
}
+ if (dinfo->build_id) {
+ build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE);
+ build_id__sprintf(&bid, sbuild_id);
+ }
debuginfo__delete(dinfo);
if (ret == 0 || ret == -ENOENT) {
pr_warning("Specified source line is not found.\n");
@@ -966,7 +1041,7 @@ static int __show_line_range(struct line_range *lr, const char *module,
/* Convert source file path */
tmp = lr->path;
- ret = get_real_path(tmp, lr->comp_dir, &lr->path);
+ ret = find_source_path(tmp, sbuild_id, lr->comp_dir, &lr->path);
/* Free old path when new path is assigned */
if (tmp != lr->path)
@@ -1565,7 +1640,7 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
}
tmp = strchr(str, '@');
- if (tmp && tmp != str && strcmp(tmp + 1, "user")) { /* user attr */
+ if (tmp && tmp != str && !strcmp(tmp + 1, "user")) { /* user attr */
if (!user_access_is_supported()) {
semantic_error("ftrace does not support user access\n");
return -EINVAL;
@@ -1765,8 +1840,7 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
fmt1_str = strtok_r(argv0_str, ":", &fmt);
fmt2_str = strtok_r(NULL, "/", &fmt);
fmt3_str = strtok_r(NULL, " \t", &fmt);
- if (fmt1_str == NULL || strlen(fmt1_str) != 1 || fmt2_str == NULL
- || fmt3_str == NULL) {
+ if (fmt1_str == NULL || fmt2_str == NULL || fmt3_str == NULL) {
semantic_error("Failed to parse event name: %s\n", argv[0]);
ret = -EINVAL;
goto out;
@@ -1986,7 +2060,10 @@ static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
if (depth < 0)
return depth;
}
- err = strbuf_addf(buf, "%+ld(", ref->offset);
+ if (ref->user_access)
+ err = strbuf_addf(buf, "%s%ld(", "+u", ref->offset);
+ else
+ err = strbuf_addf(buf, "%+ld(", ref->offset);
return (err < 0) ? err : depth;
}
@@ -2936,7 +3013,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
/* Note that the symbols in the kmodule are not relocated */
if (!pev->uprobes && !pev->target &&
(!pp->retprobe || kretprobe_offset_is_supported())) {
- reloc_sym = kernel_get_ref_reloc_sym();
+ reloc_sym = kernel_get_ref_reloc_sym(NULL);
if (!reloc_sym) {
pr_warning("Relocated base symbol is not found!\n");
ret = -EINVAL;
@@ -2956,6 +3033,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
for (j = 0; j < num_matched_functions; j++) {
sym = syms[j];
+ /* There can be duplicated symbols in the map */
+ for (i = 0; i < j; i++)
+ if (sym->start == syms[i]->start) {
+ pr_debug("Found duplicated symbol %s @ %" PRIx64 "\n",
+ sym->name, sym->start);
+ break;
+ }
+ if (i != j)
+ continue;
+
tev = (*tevs) + ret;
tp = &tev->point;
if (ret == num_matched_functions) {
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 8c852948513e..064b63a6a3f3 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -1044,7 +1044,7 @@ static struct {
DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"),
- DEFINE_TYPE(FTRACE_README_USER_ACCESS, "*[u]<offset>*"),
+ DEFINE_TYPE(FTRACE_README_USER_ACCESS, "*u]<offset>*"),
DEFINE_TYPE(FTRACE_README_MULTIPROBE_EVENT, "*Create/append/*"),
DEFINE_TYPE(FTRACE_README_IMMEDIATE_VALUE, "*\\imm-value,*"),
};
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index e4cff49384f4..2c4061035f77 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -31,6 +31,10 @@
#include "probe-file.h"
#include "string2.h"
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
/* Kprobe tracer basic type is up to u64 */
#define MAX_BASIC_TYPE_BITS 64
@@ -51,6 +55,7 @@ static const Dwfl_Callbacks offline_callbacks = {
static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
const char *path)
{
+ GElf_Addr dummy;
int fd;
fd = open(path, O_RDONLY);
@@ -70,6 +75,8 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
if (!dbg->dbg)
goto error;
+ dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy);
+
dwfl_report_end(dbg->dwfl, NULL, NULL);
return 0;
@@ -101,6 +108,7 @@ enum dso_binary_type distro_dwarf_types[] = {
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__NOT_FOUND,
};
@@ -941,6 +949,8 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
/* Find probe points from lazy pattern */
static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
{
+ struct build_id bid;
+ char sbuild_id[SBUILD_ID_SIZE] = "";
int ret = 0;
char *fpath;
@@ -948,7 +958,11 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
const char *comp_dir;
comp_dir = cu_get_comp_dir(&pf->cu_die);
- ret = get_real_path(pf->fname, comp_dir, &fpath);
+ if (pf->dbg->build_id) {
+ build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE);
+ build_id__sprintf(&bid, sbuild_id);
+ }
+ ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath);
if (ret < 0) {
pr_warning("Failed to find source file path.\n");
return ret;
@@ -1407,6 +1421,9 @@ static int fill_empty_trace_arg(struct perf_probe_event *pev,
char *type;
int i, j, ret;
+ if (!ntevs)
+ return -ENOENT;
+
for (i = 0; i < pev->nargs; i++) {
type = NULL;
for (j = 0; j < ntevs; j++) {
@@ -1444,7 +1461,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
struct probe_trace_event **tevs)
{
struct trace_event_finder tf = {
- .pf = {.pev = pev, .callback = add_probe_trace_event},
+ .pf = {.pev = pev, .dbg = dbg, .callback = add_probe_trace_event},
.max_tevs = probe_conf.max_probes, .mod = dbg->mod};
int ret, i;
@@ -1463,7 +1480,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
if (ret >= 0 && tf.pf.skip_empty_arg)
ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs);
- if (ret < 0) {
+ if (ret < 0 || tf.ntevs == 0) {
for (i = 0; i < tf.ntevs; i++)
clear_probe_trace_event(&tf.tevs[i]);
zfree(tevs);
@@ -1614,7 +1631,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg,
struct variable_list **vls)
{
struct available_var_finder af = {
- .pf = {.pev = pev, .callback = add_available_vars},
+ .pf = {.pev = pev, .dbg = dbg, .callback = add_available_vars},
.mod = dbg->mod,
.max_vls = probe_conf.max_probes};
int ret;
@@ -1969,17 +1986,57 @@ found:
return (ret < 0) ? ret : lf.found;
}
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+/* debuginfod doesn't require the comp_dir but buildid is required */
+static int get_source_from_debuginfod(const char *raw_path,
+ const char *sbuild_id, char **new_path)
+{
+ debuginfod_client *c = debuginfod_begin();
+ const char *p = raw_path;
+ int fd;
+
+ if (!c)
+ return -ENOMEM;
+
+ fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id,
+ 0, p, new_path);
+ pr_debug("Search %s from debuginfod -> %d\n", p, fd);
+ if (fd >= 0)
+ close(fd);
+ debuginfod_end(c);
+ if (fd < 0) {
+ pr_debug("Failed to find %s in debuginfod (%s)\n",
+ raw_path, sbuild_id);
+ return -ENOENT;
+ }
+ pr_debug("Got a source %s\n", *new_path);
+
+ return 0;
+}
+#else
+static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused,
+ const char *sbuild_id __maybe_unused,
+ char **new_path __maybe_unused)
+{
+ return -ENOTSUP;
+}
+#endif
/*
* Find a src file from a DWARF tag path. Prepend optional source path prefix
* and chop off leading directories that do not exist. Result is passed back as
* a newly allocated path on success.
* Return 0 if file was found and readable, -errno otherwise.
*/
-int get_real_path(const char *raw_path, const char *comp_dir,
- char **new_path)
+int find_source_path(const char *raw_path, const char *sbuild_id,
+ const char *comp_dir, char **new_path)
{
const char *prefix = symbol_conf.source_prefix;
+ if (sbuild_id && !prefix) {
+ if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path))
+ return 0;
+ }
+
if (!prefix) {
if (raw_path[0] != '/' && comp_dir)
/* If not an absolute path, try to use comp_dir */
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 11be10080613..2febb5875678 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -4,6 +4,7 @@
#include <stdbool.h>
#include "intlist.h"
+#include "build-id.h"
#include "probe-event.h"
#include <linux/ctype.h>
@@ -32,6 +33,7 @@ struct debuginfo {
Dwfl_Module *mod;
Dwfl *dwfl;
Dwarf_Addr bias;
+ const unsigned char *build_id;
};
/* This also tries to open distro debuginfo */
@@ -59,11 +61,12 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg,
struct variable_list **vls);
/* Find a src file from a DWARF tag path */
-int get_real_path(const char *raw_path, const char *comp_dir,
- char **new_path);
+int find_source_path(const char *raw_path, const char *sbuild_id,
+ const char *comp_dir, char **new_path);
struct probe_finder {
struct perf_probe_event *pev; /* Target probe event */
+ struct debuginfo *dbg;
/* Callback when a probe point is found */
int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf);
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index 80ff41fc45be..a1d1e4ef6257 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -15,7 +15,7 @@
struct pstack {
unsigned short top;
unsigned short max_nr_entries;
- void *entries[0];
+ void *entries[];
};
struct pstack *pstack__new(unsigned short max_nr_entries)
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 83212c65848b..ae8edde7c50e 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -15,9 +15,11 @@
#include "thread_map.h"
#include "trace-event.h"
#include "mmap.h"
+#include "stat.h"
+#include "metricgroup.h"
#include "util/env.h"
#include <internal/lib.h>
-#include "../perf-sys.h"
+#include "util.h"
#if PY_MAJOR_VERSION < 3
#define _PyUnicode_FromString(arg) \
@@ -61,6 +63,23 @@ int parse_callchain_record(const char *arg __maybe_unused,
struct perf_env perf_env;
/*
+ * Add this one here not to drag util/stat-shadow.c
+ */
+void perf_stat__collect_metric_expr(struct evlist *evsel_list)
+{
+}
+
+/*
+ * Add this one here not to drag util/metricgroup.c
+ */
+int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
+ struct rblist *new_metric_events,
+ struct rblist *old_metric_events)
+{
+ return 0;
+}
+
+/*
* Support debug printing even though util/debug.c is not linked. That means
* implementing 'verbose' and 'eprintf'.
*/
@@ -801,7 +820,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
{
- perf_evsel__exit(&pevsel->evsel);
+ evsel__exit(&pevsel->evsel);
Py_TYPE(pevsel)->tp_free((PyObject*)pevsel);
}
@@ -1044,7 +1063,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
pevent->evsel = evsel;
- err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
+ err = evsel__parse_sample(evsel, event, &pevent->sample);
/* Consume the even only after we parsed it out. */
perf_mmap__consume(&md->core);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 7def66168503..07e4b96a6625 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -2,6 +2,7 @@
#include "debug.h"
#include "evlist.h"
#include "evsel.h"
+#include "evsel_config.h"
#include "parse-events.h"
#include <errno.h>
#include <limits.h>
@@ -10,161 +11,82 @@
#include <subcmd/parse-options.h>
#include <perf/cpumap.h>
#include "cloexec.h"
+#include "util/perf_api_probe.h"
#include "record.h"
#include "../perf-sys.h"
+#include "topdown.h"
-typedef void (*setup_probe_fn_t)(struct evsel *evsel);
-
-static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+/*
+ * evsel__config_leader_sampling() uses special rules for leader sampling.
+ * However, if the leader is an AUX area event, then assume the event to sample
+ * is the next event.
+ */
+static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist)
{
- struct evlist *evlist;
- struct evsel *evsel;
- unsigned long flags = perf_event_open_cloexec_flag();
- int err = -EAGAIN, fd;
- static pid_t pid = -1;
-
- evlist = evlist__new();
- if (!evlist)
- return -ENOMEM;
-
- if (parse_events(evlist, str, NULL))
- goto out_delete;
-
- evsel = evlist__first(evlist);
+ struct evsel *leader = evsel->leader;
- while (1) {
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
- if (fd < 0) {
- if (pid == -1 && errno == EACCES) {
- pid = 0;
- continue;
- }
- goto out_delete;
+ if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader)) {
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->leader == leader && evsel != evsel->leader)
+ return evsel;
}
- break;
- }
- close(fd);
-
- fn(evsel);
-
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
- if (fd < 0) {
- if (errno == EINVAL)
- err = -EINVAL;
- goto out_delete;
}
- close(fd);
- err = 0;
-
-out_delete:
- evlist__delete(evlist);
- return err;
-}
-
-static bool perf_probe_api(setup_probe_fn_t fn)
-{
- const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
- struct perf_cpu_map *cpus;
- int cpu, ret, i = 0;
-
- cpus = perf_cpu_map__new(NULL);
- if (!cpus)
- return false;
- cpu = cpus->map[0];
- perf_cpu_map__put(cpus);
- do {
- ret = perf_do_probe_api(fn, cpu, try[i++]);
- if (!ret)
- return true;
- } while (ret == -EAGAIN && try[i]);
-
- return false;
-}
-
-static void perf_probe_sample_identifier(struct evsel *evsel)
-{
- evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+ return leader;
}
-static void perf_probe_comm_exec(struct evsel *evsel)
+static u64 evsel__config_term_mask(struct evsel *evsel)
{
- evsel->core.attr.comm_exec = 1;
-}
-
-static void perf_probe_context_switch(struct evsel *evsel)
-{
- evsel->core.attr.context_switch = 1;
-}
+ struct evsel_config_term *term;
+ struct list_head *config_terms = &evsel->config_terms;
+ u64 term_types = 0;
-bool perf_can_sample_identifier(void)
-{
- return perf_probe_api(perf_probe_sample_identifier);
-}
-
-static bool perf_can_comm_exec(void)
-{
- return perf_probe_api(perf_probe_comm_exec);
-}
-
-bool perf_can_record_switch_events(void)
-{
- return perf_probe_api(perf_probe_context_switch);
+ list_for_each_entry(term, config_terms, list) {
+ term_types |= 1 << term->type;
+ }
+ return term_types;
}
-bool perf_can_record_cpu_wide(void)
+static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
{
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_CPU_CLOCK,
- .exclude_kernel = 1,
- };
- struct perf_cpu_map *cpus;
- int cpu, fd;
-
- cpus = perf_cpu_map__new(NULL);
- if (!cpus)
- return false;
- cpu = cpus->map[0];
- perf_cpu_map__put(cpus);
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel *leader = evsel->leader;
+ struct evsel *read_sampler;
+ u64 term_types, freq_mask;
- fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
- if (fd < 0)
- return false;
- close(fd);
+ if (!leader->sample_read)
+ return;
- return true;
-}
+ read_sampler = evsel__read_sampler(evsel, evlist);
-/*
- * Architectures are expected to know if AUX area sampling is supported by the
- * hardware. Here we check for kernel support.
- */
-bool perf_can_aux_sample(void)
-{
- struct perf_event_attr attr = {
- .size = sizeof(struct perf_event_attr),
- .exclude_kernel = 1,
- /*
- * Non-zero value causes the kernel to calculate the effective
- * attribute size up to that byte.
- */
- .aux_sample_size = 1,
- };
- int fd;
+ if (evsel == read_sampler)
+ return;
- fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+ term_types = evsel__config_term_mask(evsel);
/*
- * If the kernel attribute is big enough to contain aux_sample_size
- * then we assume that it is supported. We are relying on the kernel to
- * validate the attribute size before anything else that could be wrong.
+ * Disable sampling for all group members except those with explicit
+ * config terms or the leader. In the case of an AUX area event, the 2nd
+ * event in the group is the one that 'leads' the sampling.
*/
- if (fd < 0 && errno == E2BIG)
- return false;
- if (fd >= 0)
- close(fd);
+ freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD);
+ if ((term_types & freq_mask) == 0) {
+ attr->freq = 0;
+ attr->sample_freq = 0;
+ attr->sample_period = 0;
+ }
+ if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0)
+ attr->write_backward = 0;
- return true;
+ /*
+ * We don't get a sample for slave events, we make them when delivering
+ * the group leader sample. Set the slave event to follow the master
+ * sample_type to ease up reporting.
+ * An AUX area event also has sample_type requirements, so also include
+ * the sample type bits from the leader's sample_type to cover that
+ * case.
+ */
+ attr->sample_type = read_sampler->core.attr.sample_type |
+ leader->core.attr.sample_type;
}
void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
@@ -188,11 +110,15 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
use_comm_exec = perf_can_comm_exec();
evlist__for_each_entry(evlist, evsel) {
- perf_evsel__config(evsel, opts, callchain);
+ evsel__config(evsel, opts, callchain);
if (evsel->tracking && use_comm_exec)
evsel->core.attr.comm_exec = 1;
}
+ /* Configure leader sampling here now that the sample type is known */
+ evlist__for_each_entry(evlist, evsel)
+ evsel__config_leader_sampling(evsel, evlist);
+
if (opts->full_auxtrace) {
/*
* Need to be able to synthesize and parse selected events with
@@ -215,7 +141,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
if (sample_id) {
evlist__for_each_entry(evlist, evsel)
- perf_evsel__set_sample_id(evsel, use_sample_identifier);
+ evsel__set_sample_id(evsel, use_sample_identifier);
}
perf_evlist__set_id_pos(evlist);
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 24316458be20..266760ac9143 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -36,6 +36,7 @@ struct record_opts {
bool record_namespaces;
bool record_cgroup;
bool record_switch_events;
+ bool record_switch_events_set;
bool all_kernel;
bool all_user;
bool kernel_callchains;
@@ -47,6 +48,7 @@ struct record_opts {
bool sample_id;
bool no_bpf_event;
bool kcore;
+ bool text_poke;
unsigned int freq;
unsigned int mmap_pages;
unsigned int auxtrace_mmap_pages;
@@ -60,7 +62,7 @@ struct record_opts {
const char *auxtrace_snapshot_opts;
const char *auxtrace_sample_opts;
bool sample_transaction;
- unsigned initial_delay;
+ int initial_delay;
bool use_clockid;
clockid_t clockid;
u64 clockid_res_ns;
@@ -68,6 +70,10 @@ struct record_opts {
int affinity;
int mmap_flush;
unsigned int comp_level;
+ unsigned int nr_threads_synthesize;
+ int ctl_fd;
+ int ctl_fd_ack;
+ bool ctl_fd_close;
};
extern const char * const *record_usage;
@@ -75,4 +81,9 @@ extern struct option *record_options;
int record__parse_freq(const struct option *opt, const char *str, int unset);
+static inline bool record_opts__no_switch_events(const struct record_opts *opts)
+{
+ return opts->record_switch_events_set && !opts->record_switch_events;
+}
+
#endif // _PERF_RECORD_H
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h
index d4356030b504..f55ca07f3ca1 100644
--- a/tools/perf/util/s390-cpumcf-kernel.h
+++ b/tools/perf/util/s390-cpumcf-kernel.h
@@ -11,6 +11,7 @@
#define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */
#define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */
struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
unsigned int def:16; /* 0-15 Data Entry Format */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 6785cd87aa4d..f8861998e5bd 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -1047,6 +1047,14 @@ static void s390_cpumsf_free(struct perf_session *session)
free(sf);
}
+static bool
+s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+ struct evsel *evsel)
+{
+ return evsel->core.attr.type == PERF_TYPE_RAW &&
+ evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG;
+}
+
static int s390_cpumsf_get_type(const char *cpuid)
{
int ret, family = 0;
@@ -1071,7 +1079,8 @@ static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
itops->pwr_events || itops->errors ||
itops->dont_decode || itops->calls || itops->returns ||
itops->callchain || itops->thread_stack ||
- itops->last_branch;
+ itops->last_branch || itops->add_callchain ||
+ itops->add_last_branch;
if (!ison)
return true;
pr_err("Unsupported --itrace options specified\n");
@@ -1142,6 +1151,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event,
sf->auxtrace.flush_events = s390_cpumsf_flush;
sf->auxtrace.free_events = s390_cpumsf_free_events;
sf->auxtrace.free = s390_cpumsf_free;
+ sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace;
session->auxtrace = &sf->auxtrace;
if (dump_trace)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 2c372cf5495e..7cbd024e3e63 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -741,7 +741,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
if (!dict_sample)
Py_FatalError("couldn't create Python dictionary");
- pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel)));
+ pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel)));
pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr)));
pydict_set_item_string_decref(dict_sample, "pid",
@@ -968,7 +968,7 @@ static int python_export_evsel(struct db_export *dbe, struct evsel *evsel)
t = tuple_new(2);
tuple_set_u64(t, 0, evsel->db_id);
- tuple_set_string(t, 1, perf_evsel__name(evsel));
+ tuple_set_string(t, 1, evsel__name(evsel));
call_object(tables->evsel_handler, t, "evsel_table");
@@ -1064,7 +1064,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso,
char sbuild_id[SBUILD_ID_SIZE];
PyObject *t;
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
t = tuple_new(5);
@@ -1349,7 +1349,7 @@ static void get_handler_name(char *str, size_t size,
{
char *p = str;
- scnprintf(str, size, "stat__%s", perf_evsel__name(evsel));
+ scnprintf(str, size, "stat__%s", evsel__name(evsel));
while ((p = strchr(p, ':'))) {
*p = '_';
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0b0bfe5bef17..7a5f03764702 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -33,7 +33,6 @@
#include "../perf.h"
#include "arch/common.h"
#include <internal/lib.h>
-#include <linux/err.h>
#ifdef HAVE_ZSTD_SUPPORT
static int perf_session__process_compressed_event(struct perf_session *session,
@@ -88,7 +87,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
session->decomp_last = decomp;
}
- pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
+ pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
return 0;
}
@@ -116,12 +115,12 @@ static int perf_session__open(struct perf_session *session)
if (perf_header__has_feat(&session->header, HEADER_STAT))
return 0;
- if (!perf_evlist__valid_sample_type(session->evlist)) {
+ if (!evlist__valid_sample_type(session->evlist)) {
pr_err("non matching sample_type\n");
return -1;
}
- if (!perf_evlist__valid_sample_id_all(session->evlist)) {
+ if (!evlist__valid_sample_id_all(session->evlist)) {
pr_err("non matching sample_id_all\n");
return -1;
}
@@ -253,10 +252,10 @@ struct perf_session *perf_session__new(struct perf_data *data,
/*
* In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
- * processed, so perf_evlist__sample_id_all is not meaningful here.
+ * processed, so evlist__sample_id_all is not meaningful here.
*/
if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
- tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) {
+ tool->ordered_events && !evlist__sample_id_all(session->evlist)) {
dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
tool->ordered_events = false;
}
@@ -491,6 +490,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->ksymbol = perf_event__process_ksymbol;
if (tool->bpf == NULL)
tool->bpf = perf_event__process_bpf;
+ if (tool->text_poke == NULL)
+ tool->text_poke = perf_event__process_text_poke;
if (tool->read == NULL)
tool->read = process_event_sample_stub;
if (tool->throttle == NULL)
@@ -660,6 +661,24 @@ static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
swap_sample_id_all(event, &event->context_switch + 1);
}
+static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all)
+{
+ event->text_poke.addr = bswap_64(event->text_poke.addr);
+ event->text_poke.old_len = bswap_16(event->text_poke.old_len);
+ event->text_poke.new_len = bswap_16(event->text_poke.new_len);
+
+ if (sample_id_all) {
+ size_t len = sizeof(event->text_poke.old_len) +
+ sizeof(event->text_poke.new_len) +
+ event->text_poke.old_len +
+ event->text_poke.new_len;
+ void *data = &event->text_poke.old_len;
+
+ data += PERF_ALIGN(len, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
static void perf_event__throttle_swap(union perf_event *event,
bool sample_id_all)
{
@@ -933,6 +952,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_SWITCH] = perf_event__switch_swap,
[PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap,
[PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap,
+ [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -1059,7 +1079,7 @@ static void callchain__printf(struct evsel *evsel,
unsigned int i;
struct ip_callchain *callchain = sample->callchain;
- if (perf_evsel__has_branch_callstack(evsel))
+ if (evsel__has_branch_callstack(evsel))
callchain__lbr_callstack_printf(sample);
printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
@@ -1104,7 +1124,7 @@ static void regs_dump__printf(u64 mask, u64 *regs)
for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs[i++];
- printf(".... %-5s 0x%" PRIx64 "\n",
+ printf(".... %-5s 0x%016" PRIx64 "\n",
perf_reg_name(rid), val);
}
}
@@ -1161,10 +1181,10 @@ static void perf_evlist__print_tstamp(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample)
{
- u64 sample_type = __perf_evlist__combined_sample_type(evlist);
+ u64 sample_type = __evlist__combined_sample_type(evlist);
if (event->header.type != PERF_RECORD_SAMPLE &&
- !perf_evlist__sample_id_all(evlist)) {
+ !evlist__sample_id_all(evlist)) {
fputs("-1 -1 ", stdout);
return;
}
@@ -1243,8 +1263,8 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
if (evsel__has_callchain(evsel))
callchain__printf(evsel, sample);
- if (sample_type & PERF_SAMPLE_BRANCH_STACK)
- branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel));
+ if (evsel__has_br_stack(evsel))
+ branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
if (sample_type & PERF_SAMPLE_REGS_USER)
regs_user__printf(sample);
@@ -1280,8 +1300,7 @@ static void dump_read(struct evsel *evsel, union perf_event *event)
return;
printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid,
- perf_evsel__name(evsel),
- event->read.value);
+ evsel__name(evsel), event->read.value);
if (!evsel)
return;
@@ -1476,6 +1495,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->ksymbol(tool, event, sample, machine);
case PERF_RECORD_BPF_EVENT:
return tool->bpf(tool, event, sample, machine);
+ case PERF_RECORD_TEXT_POKE:
+ return tool->text_poke(tool, event, sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
@@ -1543,8 +1564,13 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
return 0;
case PERF_RECORD_HEADER_TRACING_DATA:
- /* setup for reading amidst mmap */
- lseek(fd, file_offset, SEEK_SET);
+ /*
+ * Setup for reading amidst mmap, but only when we
+ * are in 'file' mode. The 'pipe' fd is in proper
+ * place already.
+ */
+ if (!perf_data__is_pipe(session->data))
+ lseek(fd, file_offset, SEEK_SET);
return tool->tracing_data(session, event);
case PERF_RECORD_HEADER_BUILD_ID:
return tool->build_id(session, event);
@@ -1652,7 +1678,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
return -1;
if (session->header.needs_swap)
- event_swap(event, perf_evlist__sample_id_all(session->evlist));
+ event_swap(event, evlist__sample_id_all(session->evlist));
out_parse_sample:
@@ -1701,7 +1727,7 @@ static s64 perf_session__process_event(struct perf_session *session,
int ret;
if (session->header.needs_swap)
- event_swap(event, perf_evlist__sample_id_all(evlist));
+ event_swap(event, evlist__sample_id_all(evlist));
if (event->header.type >= PERF_RECORD_HEADER_MAX)
return -EINVAL;
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
new file mode 100644
index 000000000000..ded9ced02797
--- /dev/null
+++ b/tools/perf/util/sideband_evlist.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/mmap.h"
+#include "util/perf_api_probe.h"
+#include <perf/mmap.h>
+#include <linux/perf_event.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdbool.h>
+
+int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+ evsel__sb_cb_t cb, void *data)
+{
+ struct evsel *evsel;
+
+ if (!attr->sample_id_all) {
+ pr_warning("enabling sample_id_all for all side band events\n");
+ attr->sample_id_all = 1;
+ }
+
+ evsel = evsel__new_idx(attr, evlist->core.nr_entries);
+ if (!evsel)
+ return -1;
+
+ evsel->side_band.cb = cb;
+ evsel->side_band.data = data;
+ evlist__add(evlist, evsel);
+ return 0;
+}
+
+static void *perf_evlist__poll_thread(void *arg)
+{
+ struct evlist *evlist = arg;
+ bool draining = false;
+ int i, done = 0;
+ /*
+ * In order to read symbols from other namespaces perf to needs to call
+ * setns(2). This isn't permitted if the struct_fs has multiple users.
+ * unshare(2) the fs so that we may continue to setns into namespaces
+ * that we're observing when, for instance, reading the build-ids at
+ * the end of a 'perf record' session.
+ */
+ unshare(CLONE_FS);
+
+ while (!done) {
+ bool got_data = false;
+
+ if (evlist->thread.done)
+ draining = true;
+
+ if (!draining)
+ evlist__poll(evlist, 1000);
+
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
+ struct mmap *map = &evlist->mmap[i];
+ union perf_event *event;
+
+ if (perf_mmap__read_init(&map->core))
+ continue;
+ while ((event = perf_mmap__read_event(&map->core)) != NULL) {
+ struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+
+ if (evsel && evsel->side_band.cb)
+ evsel->side_band.cb(event, evsel->side_band.data);
+ else
+ pr_warning("cannot locate proper evsel for the side band event\n");
+
+ perf_mmap__consume(&map->core);
+ got_data = true;
+ }
+ perf_mmap__read_done(&map->core);
+ }
+
+ if (draining && !got_data)
+ break;
+ }
+ return NULL;
+}
+
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ evsel->core.attr.sample_id_all = 1;
+ evsel->core.attr.watermark = 1;
+ evsel->core.attr.wakeup_watermark = 1;
+ evsel->side_band.cb = cb;
+ evsel->side_band.data = data;
+ }
+}
+
+int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target)
+{
+ struct evsel *counter;
+
+ if (!evlist)
+ return 0;
+
+ if (perf_evlist__create_maps(evlist, target))
+ goto out_delete_evlist;
+
+ if (evlist->core.nr_entries > 1) {
+ bool can_sample_identifier = perf_can_sample_identifier();
+
+ evlist__for_each_entry(evlist, counter)
+ evsel__set_sample_id(counter, can_sample_identifier);
+
+ perf_evlist__set_id_pos(evlist);
+ }
+
+ evlist__for_each_entry(evlist, counter) {
+ if (evsel__open(counter, evlist->core.cpus, evlist->core.threads) < 0)
+ goto out_delete_evlist;
+ }
+
+ if (evlist__mmap(evlist, UINT_MAX))
+ goto out_delete_evlist;
+
+ evlist__for_each_entry(evlist, counter) {
+ if (evsel__enable(counter))
+ goto out_delete_evlist;
+ }
+
+ evlist->thread.done = 0;
+ if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
+ goto out_delete_evlist;
+
+ return 0;
+
+out_delete_evlist:
+ evlist__delete(evlist);
+ evlist = NULL;
+ return -1;
+}
+
+void perf_evlist__stop_sb_thread(struct evlist *evlist)
+{
+ if (!evlist)
+ return;
+ evlist->thread.done = 1;
+ pthread_join(evlist->thread.th, NULL);
+ evlist__delete(evlist);
+}
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 3b791ef2cd50..20bacd5972ad 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -15,6 +15,9 @@ int smt_on(void)
if (cached)
return cached_result;
+ if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0)
+ goto done;
+
ncpu = sysconf(_SC_NPROCESSORS_CONF);
for (cpu = 0; cpu < ncpu; cpu++) {
unsigned long long siblings;
@@ -24,13 +27,13 @@ int smt_on(void)
snprintf(fn, sizeof fn,
"devices/system/cpu/cpu%d/topology/core_cpus", cpu);
- if (access(fn, F_OK) == -1) {
+ if (sysfs__read_str(fn, &str, &strlen) < 0) {
snprintf(fn, sizeof fn,
"devices/system/cpu/cpu%d/topology/thread_siblings",
cpu);
+ if (sysfs__read_str(fn, &str, &strlen) < 0)
+ continue;
}
- if (sysfs__read_str(fn, &str, &strlen) < 0)
- continue;
/* Entry is hex, but does not have 0x, so need custom parser */
siblings = strtoull(str, NULL, 16);
free(str);
@@ -42,6 +45,7 @@ int smt_on(void)
}
if (!cached) {
cached_result = 0;
+done:
cached = true;
}
return cached_result;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f14cc728c358..d42339df20f8 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -237,7 +237,7 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
return (int64_t)(right_ip - left_ip);
}
-static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
+int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
{
if (!sym_l || !sym_r)
return cmp_null(sym_l, sym_r);
@@ -300,8 +300,14 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms,
if (verbose > 0) {
char o = map ? dso__symtab_origin(map->dso) : '!';
+ u64 rip = ip;
+
+ if (map && map->dso && map->dso->kernel
+ && map->dso->adjust_symbols)
+ rip = map->unmap_ip(map, ip);
+
ret += repsep_snprintf(bf, size, "%-#*llx %c ",
- BITS_PER_LONG / 4 + 2, ip, o);
+ BITS_PER_LONG / 4 + 2, rip, o);
}
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
@@ -2354,7 +2360,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name)
evsel = evlist__first(evlist);
while (--nr > 0)
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
return evsel;
}
@@ -2811,7 +2817,7 @@ static char *prefix_if_not_in(const char *pre, char *str)
return str;
if (asprintf(&n, "%s,%s", pre, str) < 0)
- return NULL;
+ n = NULL;
free(str);
return n;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index cfa6ac6f7d06..66d39c4cfe2b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -311,5 +311,7 @@ int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
int64_t
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r);
char *hist_entry__srcline(struct hist_entry *he);
#endif /* __PERF_SORT_H */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 9e757d18d713..4b57c0c07632 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -117,7 +117,7 @@ static void aggr_printout(struct perf_stat_config *config,
cpu_map__id_to_die(id),
config->csv_output ? 0 : -3,
cpu_map__id_to_cpu(id), config->csv_sep);
- } else {
+ } else if (id > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
evsel__cpus(evsel)->map[id],
@@ -237,8 +237,6 @@ static bool valid_only_metric(const char *unit)
if (!unit)
return false;
if (strstr(unit, "/sec") ||
- strstr(unit, "hz") ||
- strstr(unit, "Hz") ||
strstr(unit, "CPUs utilized"))
return false;
return true;
@@ -248,7 +246,7 @@ static const char *fixunit(char *buf, struct evsel *evsel,
const char *unit)
{
if (!strncmp(unit, "of all", 6)) {
- snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+ snprintf(buf, 1024, "%s %s", evsel__name(evsel),
unit);
return buf;
}
@@ -335,7 +333,7 @@ static int first_shadow_cpu(struct perf_stat_config *config,
if (config->aggr_mode == AGGR_GLOBAL)
return 0;
- for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+ for (i = 0; i < evsel__nr_cpus(evsel); i++) {
int cpu2 = evsel__cpus(evsel)->map[i];
if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id)
@@ -369,7 +367,7 @@ static void abs_printout(struct perf_stat_config *config,
config->csv_output ? 0 : config->unit_width,
evsel->unit, config->csv_sep);
- fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel));
+ fprintf(output, "%-*s", config->csv_output ? 0 : 25, evsel__name(evsel));
print_cgroup(config, evsel);
}
@@ -463,8 +461,7 @@ static void printout(struct perf_stat_config *config, int id, int nr,
counter->unit, config->csv_sep);
fprintf(config->output, "%*s",
- config->csv_output ? 0 : -25,
- perf_evsel__name(counter));
+ config->csv_output ? 0 : -25, evsel__name(counter));
print_cgroup(config, counter);
@@ -510,7 +507,7 @@ static void aggr_update_shadow(struct perf_stat_config *config,
id = config->aggr_map->map[s];
evlist__for_each_entry(evlist, counter) {
val = 0;
- for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
s2 = config->aggr_get_id(config, evlist->core.cpus, cpu);
if (s2 != id)
continue;
@@ -561,11 +558,11 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
- if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
+ if (strcmp(evsel__name(alias), evsel__name(counter)) ||
alias->scale != counter->scale ||
alias->cgrp != counter->cgrp ||
strcmp(alias->unit, counter->unit) ||
- perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter) ||
+ evsel__is_clock(alias) != evsel__is_clock(counter) ||
!strcmp(alias->pmu_name, counter->pmu_name))
break;
alias->merged_stat = true;
@@ -601,7 +598,7 @@ static void aggr_cb(struct perf_stat_config *config,
struct aggr_data *ad = data;
int cpu, s2;
- for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
struct perf_counts_values *counts;
s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
@@ -671,7 +668,7 @@ static void print_aggr(struct perf_stat_config *config,
int s;
bool first;
- if (!(config->aggr_map || config->aggr_get_id))
+ if (!config->aggr_map || !config->aggr_get_id)
return;
aggr_update_shadow(config, evlist);
@@ -849,7 +846,7 @@ static void print_counter(struct perf_stat_config *config,
double uval;
int cpu;
- for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
struct aggr_data ad = { .cpu = cpu };
if (!collect_data(config, counter, counter_cb, &ad))
@@ -949,7 +946,6 @@ static void print_metric_headers(struct perf_stat_config *config,
out.print_metric = print_metric_header;
out.new_line = new_line_metric;
out.force_header = true;
- os.evsel = counter;
perf_stat__print_shadow_stats(config, counter, 0,
0,
&out,
@@ -1150,7 +1146,7 @@ static void print_percore_thread(struct perf_stat_config *config,
int s, s2, id;
bool first = true;
- for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
+ for (int i = 0; i < evsel__nr_cpus(counter); i++) {
s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
for (s = 0; s < config->aggr_map->nr; s++) {
id = config->aggr_map->map[s];
@@ -1172,7 +1168,7 @@ static void print_percore(struct perf_stat_config *config,
int s;
bool first = true;
- if (!(config->aggr_map || config->aggr_get_id))
+ if (!config->aggr_map || !config->aggr_get_id)
return;
if (config->percore_show_thread)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 03ecb8cd0eec..901265127e36 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -216,9 +216,9 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
count *= counter->scale;
- if (perf_evsel__is_clock(counter))
+ if (evsel__is_clock(counter))
update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
- else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+ else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
@@ -241,25 +241,37 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
ctx, cpu, count);
- else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+ else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
+ update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
+ update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
+ update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
+ update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
+ ctx, cpu, count);
+ else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
ctx, cpu, count);
- else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+ else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
ctx, cpu, count);
- else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+ else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
- else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+ else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
else if (perf_stat_evsel__is(counter, SMI_NUM))
update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
@@ -323,35 +335,46 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
{
struct evsel *counter, *leader, **metric_events, *oc;
bool found;
- const char **metric_names;
+ struct expr_parse_ctx ctx;
+ struct hashmap_entry *cur;
+ size_t bkt;
int i;
- int num_metric_names;
+ expr__ctx_init(&ctx);
evlist__for_each_entry(evsel_list, counter) {
bool invalid = false;
leader = counter->leader;
if (!counter->metric_expr)
continue;
+
+ expr__ctx_clear(&ctx);
metric_events = counter->metric_events;
if (!metric_events) {
- if (expr__find_other(counter->metric_expr, counter->name,
- &metric_names, &num_metric_names) < 0)
+ if (expr__find_other(counter->metric_expr,
+ counter->name,
+ &ctx, 1) < 0)
continue;
metric_events = calloc(sizeof(struct evsel *),
- num_metric_names + 1);
- if (!metric_events)
+ hashmap__size(&ctx.ids) + 1);
+ if (!metric_events) {
+ expr__ctx_clear(&ctx);
return;
+ }
counter->metric_events = metric_events;
}
- for (i = 0; i < num_metric_names; i++) {
+ i = 0;
+ hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+ const char *metric_name = (const char *)cur->key;
+
found = false;
if (leader) {
/* Search in group */
for_each_group_member (oc, leader) {
- if (!strcasecmp(oc->name, metric_names[i]) &&
+ if (!strcasecmp(oc->name,
+ metric_name) &&
!oc->collect_stat) {
found = true;
break;
@@ -360,7 +383,8 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
}
if (!found) {
/* Search ignoring groups */
- oc = perf_stat__find_event(evsel_list, metric_names[i]);
+ oc = perf_stat__find_event(evsel_list,
+ metric_name);
}
if (!oc) {
/* Deduping one is good enough to handle duplicated PMUs. */
@@ -373,27 +397,28 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
* of events. So we ask the user instead to add the missing
* events.
*/
- if (!printed || strcasecmp(printed, metric_names[i])) {
+ if (!printed ||
+ strcasecmp(printed, metric_name)) {
fprintf(stderr,
"Add %s event to groups to get metric expression for %s\n",
- metric_names[i],
+ metric_name,
counter->name);
- printed = strdup(metric_names[i]);
+ printed = strdup(metric_name);
}
invalid = true;
continue;
}
- metric_events[i] = oc;
+ metric_events[i++] = oc;
oc->collect_stat = true;
}
metric_events[i] = NULL;
- free(metric_names);
if (invalid) {
free(metric_events);
counter->metric_events = NULL;
counter->metric_expr = NULL;
}
}
+ expr__ctx_clear(&ctx);
}
static double runtime_stat_avg(struct runtime_stat *st,
@@ -504,7 +529,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
}
static void print_l1_icache_misses(struct perf_stat_config *config,
@@ -525,7 +550,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
}
static void print_dtlb_cache_misses(struct perf_stat_config *config,
@@ -545,7 +570,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
}
static void print_itlb_cache_misses(struct perf_stat_config *config,
@@ -565,7 +590,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
}
static void print_ll_cache_misses(struct perf_stat_config *config,
@@ -585,7 +610,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
}
/*
@@ -692,6 +717,47 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
return sanitize_val(1.0 - sum);
}
+/*
+ * Kernel reports metrics multiplied with slots. To get back
+ * the ratios we need to recreate the sum.
+ */
+
+static double td_metric_ratio(int ctx, int cpu,
+ enum stat_type type,
+ struct runtime_stat *stat)
+{
+ double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
+ runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
+ double d = runtime_stat_avg(stat, type, ctx, cpu);
+
+ if (sum)
+ return d / sum;
+ return 0;
+}
+
+/*
+ * ... but only if most of the values are actually available.
+ * We allow two missing.
+ */
+
+static bool full_td(int ctx, int cpu,
+ struct runtime_stat *stat)
+{
+ int c = 0;
+
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
+ c++;
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
+ c++;
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
+ c++;
+ if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
+ c++;
+ return c >= 2;
+}
+
static void print_smi_cost(struct perf_stat_config *config,
int cpu, struct evsel *evsel,
struct perf_stat_output_ctx *out,
@@ -717,27 +783,17 @@ static void print_smi_cost(struct perf_stat_config *config,
out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
}
-static void generic_metric(struct perf_stat_config *config,
- const char *metric_expr,
- struct evsel **metric_events,
- char *name,
- const char *metric_name,
- const char *metric_unit,
- double avg,
- int cpu,
- struct perf_stat_output_ctx *out,
- struct runtime_stat *st)
+static int prepare_metric(struct evsel **metric_events,
+ struct metric_ref *metric_refs,
+ struct expr_parse_ctx *pctx,
+ int cpu,
+ struct runtime_stat *st)
{
- print_metric_t print_metric = out->print_metric;
- struct parse_ctx pctx;
- double ratio, scale;
- int i;
- void *ctxp = out->ctx;
+ double scale;
char *n, *pn;
+ int i, j, ret;
- expr__ctx_init(&pctx);
- /* Must be first id entry */
- expr__add_id(&pctx, name, avg);
+ expr__ctx_init(pctx);
for (i = 0; metric_events[i]; i++) {
struct saved_value *v;
struct stats *stats;
@@ -760,7 +816,7 @@ static void generic_metric(struct perf_stat_config *config,
n = strdup(metric_events[i]->name);
if (!n)
- return;
+ return -ENOMEM;
/*
* This display code with --no-merge adds [cpu] postfixes.
* These are not supported by the parser. Remove everything
@@ -771,13 +827,44 @@ static void generic_metric(struct perf_stat_config *config,
*pn = 0;
if (metric_total)
- expr__add_id(&pctx, n, metric_total);
+ expr__add_id_val(pctx, n, metric_total);
else
- expr__add_id(&pctx, n, avg_stats(stats)*scale);
+ expr__add_id_val(pctx, n, avg_stats(stats)*scale);
}
+ for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
+ ret = expr__add_ref(pctx, &metric_refs[j]);
+ if (ret)
+ return ret;
+ }
+
+ return i;
+}
+
+static void generic_metric(struct perf_stat_config *config,
+ const char *metric_expr,
+ struct evsel **metric_events,
+ struct metric_ref *metric_refs,
+ char *name,
+ const char *metric_name,
+ const char *metric_unit,
+ int runtime,
+ int cpu,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ print_metric_t print_metric = out->print_metric;
+ struct expr_parse_ctx pctx;
+ double ratio, scale;
+ int i;
+ void *ctxp = out->ctx;
+
+ i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st);
+ if (i < 0)
+ return;
+
if (!metric_events[i]) {
- if (expr__parse(&ratio, &pctx, metric_expr) == 0) {
+ if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
char *unit;
char metric_bf[64];
@@ -786,13 +873,17 @@ static void generic_metric(struct perf_stat_config *config,
&unit, &scale) >= 0) {
ratio *= scale;
}
-
- scnprintf(metric_bf, sizeof(metric_bf),
+ if (strstr(metric_expr, "?"))
+ scnprintf(metric_bf, sizeof(metric_bf),
+ "%s %s_%d", unit, metric_name, runtime);
+ else
+ scnprintf(metric_bf, sizeof(metric_bf),
"%s %s", unit, metric_name);
+
print_metric(config, ctxp, NULL, "%8.1f",
metric_bf, ratio);
} else {
- print_metric(config, ctxp, NULL, "%8.1f",
+ print_metric(config, ctxp, NULL, "%8.2f",
metric_name ?
metric_name :
out->force_header ? name : "",
@@ -809,8 +900,23 @@ static void generic_metric(struct perf_stat_config *config,
(metric_name ? metric_name : name) : "", 0);
}
- for (i = 1; i < pctx.num_ids; i++)
- zfree(&pctx.ids[i].name);
+ expr__ctx_clear(&pctx);
+}
+
+double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
+{
+ struct expr_parse_ctx pctx;
+ double ratio = 0.0;
+
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0)
+ goto out;
+
+ if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1))
+ ratio = 0.0;
+
+out:
+ expr__ctx_clear(&pctx);
+ return ratio;
}
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
@@ -828,7 +934,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct metric_event *me;
int num = 1;
- if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+ if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
if (total) {
@@ -853,7 +959,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
"stalled cycles per insn",
ratio);
}
- } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+ } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
print_branch_misses(config, cpu, evsel, avg, out, st);
else
@@ -867,7 +973,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0);
+ print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
} else if (
evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I |
@@ -877,7 +983,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
print_l1_icache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0);
+ print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
} else if (
evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
@@ -887,7 +993,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0);
+ print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
} else if (
evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
@@ -897,7 +1003,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0);
+ print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
} else if (
evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL |
@@ -907,8 +1013,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
print_ll_cache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
+ print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
+ } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
if (total)
@@ -919,11 +1025,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
"of all cache refs", ratio);
else
print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+ } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+ } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+ } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
if (total) {
@@ -974,7 +1080,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
ratio = total / avg;
print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
- } else if (perf_evsel__is_clock(evsel)) {
+ } else if (evsel__is_clock(evsel)) {
if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
avg / (ratio * evsel->scale));
@@ -1020,9 +1126,45 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
be_bound * 100.);
else
print_metric(config, ctxp, NULL, NULL, name, 0);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
+ full_td(ctx, cpu, st)) {
+ double retiring = td_metric_ratio(ctx, cpu,
+ STAT_TOPDOWN_RETIRING, st);
+
+ if (retiring > 0.7)
+ color = PERF_COLOR_GREEN;
+ print_metric(config, ctxp, color, "%8.1f%%", "retiring",
+ retiring * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
+ full_td(ctx, cpu, st)) {
+ double fe_bound = td_metric_ratio(ctx, cpu,
+ STAT_TOPDOWN_FE_BOUND, st);
+
+ if (fe_bound > 0.2)
+ color = PERF_COLOR_RED;
+ print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
+ fe_bound * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
+ full_td(ctx, cpu, st)) {
+ double be_bound = td_metric_ratio(ctx, cpu,
+ STAT_TOPDOWN_BE_BOUND, st);
+
+ if (be_bound > 0.2)
+ color = PERF_COLOR_RED;
+ print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
+ be_bound * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
+ full_td(ctx, cpu, st)) {
+ double bad_spec = td_metric_ratio(ctx, cpu,
+ STAT_TOPDOWN_BAD_SPEC, st);
+
+ if (bad_spec > 0.1)
+ color = PERF_COLOR_RED;
+ print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
+ bad_spec * 100.);
} else if (evsel->metric_expr) {
- generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
- evsel->metric_name, NULL, avg, cpu, out, st);
+ generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
+ evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
char unit = 'M';
char unit_buf[10];
@@ -1050,8 +1192,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (num++ > 0)
out->new_line(config, ctxp);
generic_metric(config, mexp->metric_expr, mexp->metric_events,
- evsel->name, mexp->metric_name,
- mexp->metric_unit, avg, cpu, out, st);
+ mexp->metric_refs, evsel->name, mexp->metric_name,
+ mexp->metric_unit, mexp->runtime, cpu, out, st);
}
}
if (num == 0)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 5f26137b8d60..bd0decd6d753 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -95,6 +95,10 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+ ID(TOPDOWN_RETIRING, topdown-retiring),
+ ID(TOPDOWN_BAD_SPEC, topdown-bad-spec),
+ ID(TOPDOWN_FE_BOUND, topdown-fe-bound),
+ ID(TOPDOWN_BE_BOUND, topdown-be-bound),
ID(SMI_NUM, msr/smi/),
ID(APERF, msr/aperf/),
};
@@ -108,14 +112,14 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
- if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
+ if (!strcmp(evsel__name(evsel), id_str[i])) {
ps->id = i;
break;
}
}
}
-static void perf_evsel__reset_stat_priv(struct evsel *evsel)
+static void evsel__reset_stat_priv(struct evsel *evsel)
{
int i;
struct perf_stat_evsel *ps = evsel->stats;
@@ -126,16 +130,16 @@ static void perf_evsel__reset_stat_priv(struct evsel *evsel)
perf_stat_evsel_id_init(evsel);
}
-static int perf_evsel__alloc_stat_priv(struct evsel *evsel)
+static int evsel__alloc_stat_priv(struct evsel *evsel)
{
evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
if (evsel->stats == NULL)
return -ENOMEM;
- perf_evsel__reset_stat_priv(evsel);
+ evsel__reset_stat_priv(evsel);
return 0;
}
-static void perf_evsel__free_stat_priv(struct evsel *evsel)
+static void evsel__free_stat_priv(struct evsel *evsel)
{
struct perf_stat_evsel *ps = evsel->stats;
@@ -144,8 +148,7 @@ static void perf_evsel__free_stat_priv(struct evsel *evsel)
zfree(&evsel->stats);
}
-static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel,
- int ncpus, int nthreads)
+static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads)
{
struct perf_counts *counts;
@@ -156,29 +159,26 @@ static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel,
return counts ? 0 : -ENOMEM;
}
-static void perf_evsel__free_prev_raw_counts(struct evsel *evsel)
+static void evsel__free_prev_raw_counts(struct evsel *evsel)
{
perf_counts__delete(evsel->prev_raw_counts);
evsel->prev_raw_counts = NULL;
}
-static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel)
+static void evsel__reset_prev_raw_counts(struct evsel *evsel)
{
- if (evsel->prev_raw_counts) {
- evsel->prev_raw_counts->aggr.val = 0;
- evsel->prev_raw_counts->aggr.ena = 0;
- evsel->prev_raw_counts->aggr.run = 0;
- }
+ if (evsel->prev_raw_counts)
+ perf_counts__reset(evsel->prev_raw_counts);
}
-static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
+static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
{
- int ncpus = perf_evsel__nr_cpus(evsel);
+ int ncpus = evsel__nr_cpus(evsel);
int nthreads = perf_thread_map__nr(evsel->core.threads);
- if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
- perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
- (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
+ if (evsel__alloc_stat_priv(evsel) < 0 ||
+ evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
+ (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
return -ENOMEM;
return 0;
@@ -189,7 +189,7 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
- if (perf_evsel__alloc_stats(evsel, alloc_raw))
+ if (evsel__alloc_stats(evsel, alloc_raw))
goto out_free;
}
@@ -205,9 +205,9 @@ void perf_evlist__free_stats(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
- perf_evsel__free_stat_priv(evsel);
- perf_evsel__free_counts(evsel);
- perf_evsel__free_prev_raw_counts(evsel);
+ evsel__free_stat_priv(evsel);
+ evsel__free_counts(evsel);
+ evsel__free_prev_raw_counts(evsel);
}
}
@@ -216,8 +216,8 @@ void perf_evlist__reset_stats(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
- perf_evsel__reset_stat_priv(evsel);
- perf_evsel__reset_counts(evsel);
+ evsel__reset_stat_priv(evsel);
+ evsel__reset_counts(evsel);
}
}
@@ -226,7 +226,51 @@ void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel)
- perf_evsel__reset_prev_raw_counts(evsel);
+ evsel__reset_prev_raw_counts(evsel);
+}
+
+static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel)
+{
+ int ncpus = evsel__nr_cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
+
+ for (int thread = 0; thread < nthreads; thread++) {
+ for (int cpu = 0; cpu < ncpus; cpu++) {
+ *perf_counts(evsel->counts, cpu, thread) =
+ *perf_counts(evsel->prev_raw_counts, cpu,
+ thread);
+ }
+ }
+
+ evsel->counts->aggr = evsel->prev_raw_counts->aggr;
+}
+
+void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ perf_evsel__copy_prev_raw_counts(evsel);
+}
+
+void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ /*
+ * To collect the overall statistics for interval mode,
+ * we copy the counts from evsel->prev_raw_counts to
+ * evsel->counts. The perf_stat_process_counter creates
+ * aggr values from per cpu values, but the per cpu values
+ * are 0 for AGGR_GLOBAL. So we use a trick that saves the
+ * previous aggr value to the first member of perf_counts,
+ * then aggr calculation in process_counter_values can work
+ * correctly.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ *perf_counts(evsel->prev_raw_counts, 0, 0) =
+ evsel->prev_raw_counts->aggr;
+ }
}
static void zero_per_pkg(struct evsel *counter)
@@ -302,7 +346,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
case AGGR_NODE:
case AGGR_NONE:
if (!evsel->snapshot)
- perf_evsel__compute_deltas(evsel, cpu, thread, count);
+ evsel__compute_deltas(evsel, cpu, thread, count);
perf_counts_values__scale(count, config->scale, NULL);
if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
perf_stat__update_shadow_stats(evsel, count->val,
@@ -334,7 +378,7 @@ static int process_counter_maps(struct perf_stat_config *config,
struct evsel *counter)
{
int nthreads = perf_thread_map__nr(counter->core.threads);
- int ncpus = perf_evsel__nr_cpus(counter);
+ int ncpus = evsel__nr_cpus(counter);
int cpu, thread;
if (counter->core.system_wide)
@@ -368,8 +412,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
* interval mode, otherwise overall avg running
* averages will be shown for each interval.
*/
- if (config->interval)
- init_stats(ps->res_stats);
+ if (config->interval || config->summary) {
+ for (i = 0; i < 3; i++)
+ init_stats(&ps->res_stats[i]);
+ }
if (counter->per_pkg)
zero_per_pkg(counter);
@@ -382,7 +428,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
return 0;
if (!counter->snapshot)
- perf_evsel__compute_deltas(counter, -1, -1, aggr);
+ evsel__compute_deltas(counter, -1, -1, aggr);
perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
for (i = 0; i < 3; i++)
@@ -390,7 +436,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
if (verbose > 0) {
fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter), count[0], count[1], count[2]);
+ evsel__name(counter), count[0], count[1], count[2]);
}
/*
@@ -507,7 +553,7 @@ int create_perf_stat_counter(struct evsel *evsel,
* either manually by us or by kernel via enable_on_exec
* set later.
*/
- if (perf_evsel__is_group_leader(evsel)) {
+ if (evsel__is_group_leader(evsel)) {
attr->disabled = 1;
/*
@@ -519,7 +565,7 @@ int create_perf_stat_counter(struct evsel *evsel,
}
if (target__has_cpu(target) && !target__has_per_thread(target))
- return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
+ return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
- return perf_evsel__open_per_thread(evsel, evsel->core.threads);
+ return evsel__open_per_thread(evsel, evsel->core.threads);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index b4fdfaa7f2c0..487010c624be 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -28,6 +28,10 @@ enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+ PERF_STAT_EVSEL_ID__TOPDOWN_RETIRING,
+ PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC,
+ PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND,
+ PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND,
PERF_STAT_EVSEL_ID__SMI_NUM,
PERF_STAT_EVSEL_ID__APERF,
PERF_STAT_EVSEL_ID__MAX,
@@ -82,6 +86,10 @@ enum stat_type {
STAT_TOPDOWN_SLOTS_RETIRED,
STAT_TOPDOWN_FETCH_BUBBLES,
STAT_TOPDOWN_RECOVERY_BUBBLES,
+ STAT_TOPDOWN_RETIRING,
+ STAT_TOPDOWN_BAD_SPEC,
+ STAT_TOPDOWN_FE_BOUND,
+ STAT_TOPDOWN_BE_BOUND,
STAT_SMI_NUM,
STAT_APERF,
STAT_MAX
@@ -110,10 +118,14 @@ struct perf_stat_config {
bool all_kernel;
bool all_user;
bool percore_show_thread;
+ bool summary;
+ bool metric_no_group;
+ bool metric_no_merge;
+ bool stop_read_counter;
FILE *output;
unsigned int interval;
unsigned int timeout;
- unsigned int initial_delay;
+ int initial_delay;
unsigned int unit_width;
unsigned int metric_only_len;
int times;
@@ -130,8 +142,14 @@ struct perf_stat_config {
struct perf_cpu_map *cpus_aggr_map;
u64 *walltime_run;
struct rblist metric_events;
+ int ctl_fd;
+ int ctl_fd_ack;
+ bool ctl_fd_close;
+ const char *cgroup_list;
};
+void perf_stat__set_big_num(int set);
+
void update_stats(struct stats *stats, u64 val);
double avg_stats(struct stats *stats);
double stddev_stats(struct stats *stats);
@@ -198,6 +216,8 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
void perf_evlist__free_stats(struct evlist *evlist);
void perf_evlist__reset_stats(struct evlist *evlist);
void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
+void perf_evlist__copy_prev_raw_counts(struct evlist *evlist);
+void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
int perf_stat_process_counter(struct perf_stat_config *config,
struct evsel *counter);
@@ -223,4 +243,7 @@ perf_evlist__print_counters(struct evlist *evlist,
struct target *_target,
struct timespec *ts,
int argc, const char **argv);
+
+struct metric_expr;
+double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
#endif
diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c
new file mode 100644
index 000000000000..4bd5e5a00aa5
--- /dev/null
+++ b/tools/perf/util/stream.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Compare and figure out the top N hottest streams
+ * Copyright (c) 2020, Intel Corporation.
+ * Author: Jin Yao
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "hist.h"
+#include "sort.h"
+#include "stream.h"
+#include "evlist.h"
+
+static void evsel_streams__delete(struct evsel_streams *es, int nr_evsel)
+{
+ for (int i = 0; i < nr_evsel; i++)
+ zfree(&es[i].streams);
+
+ free(es);
+}
+
+void evlist_streams__delete(struct evlist_streams *els)
+{
+ evsel_streams__delete(els->ev_streams, els->nr_evsel);
+ free(els);
+}
+
+static struct evlist_streams *evlist_streams__new(int nr_evsel,
+ int nr_streams_max)
+{
+ struct evlist_streams *els;
+ struct evsel_streams *es;
+
+ els = zalloc(sizeof(*els));
+ if (!els)
+ return NULL;
+
+ es = calloc(nr_evsel, sizeof(struct evsel_streams));
+ if (!es) {
+ free(els);
+ return NULL;
+ }
+
+ for (int i = 0; i < nr_evsel; i++) {
+ struct evsel_streams *s = &es[i];
+
+ s->streams = calloc(nr_streams_max, sizeof(struct stream));
+ if (!s->streams)
+ goto err;
+
+ s->nr_streams_max = nr_streams_max;
+ s->evsel_idx = -1;
+ }
+
+ els->ev_streams = es;
+ els->nr_evsel = nr_evsel;
+ return els;
+
+err:
+ evsel_streams__delete(es, nr_evsel);
+ return NULL;
+}
+
+/*
+ * The cnodes with high hit number are hot callchains.
+ */
+static void evsel_streams__set_hot_cnode(struct evsel_streams *es,
+ struct callchain_node *cnode)
+{
+ int i, idx = 0;
+ u64 hit;
+
+ if (es->nr_streams < es->nr_streams_max) {
+ i = es->nr_streams;
+ es->streams[i].cnode = cnode;
+ es->nr_streams++;
+ return;
+ }
+
+ /*
+ * Considering a few number of hot streams, only use simple
+ * way to find the cnode with smallest hit number and replace.
+ */
+ hit = (es->streams[0].cnode)->hit;
+ for (i = 1; i < es->nr_streams; i++) {
+ if ((es->streams[i].cnode)->hit < hit) {
+ hit = (es->streams[i].cnode)->hit;
+ idx = i;
+ }
+ }
+
+ if (cnode->hit > hit)
+ es->streams[idx].cnode = cnode;
+}
+
+static void update_hot_callchain(struct hist_entry *he,
+ struct evsel_streams *es)
+{
+ struct rb_root *root = &he->sorted_chain;
+ struct rb_node *rb_node = rb_first(root);
+ struct callchain_node *cnode;
+
+ while (rb_node) {
+ cnode = rb_entry(rb_node, struct callchain_node, rb_node);
+ evsel_streams__set_hot_cnode(es, cnode);
+ rb_node = rb_next(rb_node);
+ }
+}
+
+static void init_hot_callchain(struct hists *hists, struct evsel_streams *es)
+{
+ struct rb_node *next = rb_first_cached(&hists->entries);
+
+ while (next) {
+ struct hist_entry *he;
+
+ he = rb_entry(next, struct hist_entry, rb_node);
+ update_hot_callchain(he, es);
+ next = rb_next(&he->rb_node);
+ }
+
+ es->streams_hits = callchain_total_hits(hists);
+}
+
+static int evlist__init_callchain_streams(struct evlist *evlist,
+ struct evlist_streams *els)
+{
+ struct evsel_streams *es = els->ev_streams;
+ struct evsel *pos;
+ int i = 0;
+
+ BUG_ON(els->nr_evsel < evlist->core.nr_entries);
+
+ evlist__for_each_entry(evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+
+ hists__output_resort(hists, NULL);
+ init_hot_callchain(hists, &es[i]);
+ es[i].evsel_idx = pos->idx;
+ i++;
+ }
+
+ return 0;
+}
+
+struct evlist_streams *evlist__create_streams(struct evlist *evlist,
+ int nr_streams_max)
+{
+ int nr_evsel = evlist->core.nr_entries, ret = -1;
+ struct evlist_streams *els = evlist_streams__new(nr_evsel,
+ nr_streams_max);
+
+ if (!els)
+ return NULL;
+
+ ret = evlist__init_callchain_streams(evlist, els);
+ if (ret) {
+ evlist_streams__delete(els);
+ return NULL;
+ }
+
+ return els;
+}
+
+struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
+ int evsel_idx)
+{
+ struct evsel_streams *es = els->ev_streams;
+
+ for (int i = 0; i < els->nr_evsel; i++) {
+ if (es[i].evsel_idx == evsel_idx)
+ return &es[i];
+ }
+
+ return NULL;
+}
+
+static struct stream *stream__callchain_match(struct stream *base_stream,
+ struct evsel_streams *es_pair)
+{
+ for (int i = 0; i < es_pair->nr_streams; i++) {
+ struct stream *pair_stream = &es_pair->streams[i];
+
+ if (callchain_cnode_matched(base_stream->cnode,
+ pair_stream->cnode)) {
+ return pair_stream;
+ }
+ }
+
+ return NULL;
+}
+
+static struct stream *stream__match(struct stream *base_stream,
+ struct evsel_streams *es_pair)
+{
+ return stream__callchain_match(base_stream, es_pair);
+}
+
+static void stream__link(struct stream *base_stream, struct stream *pair_stream)
+{
+ base_stream->pair_cnode = pair_stream->cnode;
+ pair_stream->pair_cnode = base_stream->cnode;
+}
+
+void evsel_streams__match(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ for (int i = 0; i < es_base->nr_streams; i++) {
+ struct stream *base_stream = &es_base->streams[i];
+ struct stream *pair_stream;
+
+ pair_stream = stream__match(base_stream, es_pair);
+ if (pair_stream)
+ stream__link(base_stream, pair_stream);
+ }
+}
+
+static void print_callchain_pair(struct stream *base_stream, int idx,
+ struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ struct callchain_node *base_cnode = base_stream->cnode;
+ struct callchain_node *pair_cnode = base_stream->pair_cnode;
+ struct callchain_list *base_chain, *pair_chain;
+ char buf1[512], buf2[512], cbuf1[256], cbuf2[256];
+ char *s1, *s2;
+ double pct;
+
+ printf("\nhot chain pair %d:\n", idx);
+
+ pct = (double)base_cnode->hit / (double)es_base->streams_hits;
+ scnprintf(buf1, sizeof(buf1), "cycles: %ld, hits: %.2f%%",
+ callchain_avg_cycles(base_cnode), pct * 100.0);
+
+ pct = (double)pair_cnode->hit / (double)es_pair->streams_hits;
+ scnprintf(buf2, sizeof(buf2), "cycles: %ld, hits: %.2f%%",
+ callchain_avg_cycles(pair_cnode), pct * 100.0);
+
+ printf("%35s\t%35s\n", buf1, buf2);
+
+ printf("%35s\t%35s\n",
+ "---------------------------",
+ "--------------------------");
+
+ pair_chain = list_first_entry(&pair_cnode->val,
+ struct callchain_list,
+ list);
+
+ list_for_each_entry(base_chain, &base_cnode->val, list) {
+ if (&pair_chain->list == &pair_cnode->val)
+ return;
+
+ s1 = callchain_list__sym_name(base_chain, cbuf1, sizeof(cbuf1),
+ false);
+ s2 = callchain_list__sym_name(pair_chain, cbuf2, sizeof(cbuf2),
+ false);
+
+ scnprintf(buf1, sizeof(buf1), "%35s\t%35s", s1, s2);
+ printf("%s\n", buf1);
+ pair_chain = list_next_entry(pair_chain, list);
+ }
+}
+
+static void print_stream_callchain(struct stream *stream, int idx,
+ struct evsel_streams *es, bool pair)
+{
+ struct callchain_node *cnode = stream->cnode;
+ struct callchain_list *chain;
+ char buf[512], cbuf[256], *s;
+ double pct;
+
+ printf("\nhot chain %d:\n", idx);
+
+ pct = (double)cnode->hit / (double)es->streams_hits;
+ scnprintf(buf, sizeof(buf), "cycles: %ld, hits: %.2f%%",
+ callchain_avg_cycles(cnode), pct * 100.0);
+
+ if (pair) {
+ printf("%35s\t%35s\n", "", buf);
+ printf("%35s\t%35s\n",
+ "", "--------------------------");
+ } else {
+ printf("%35s\n", buf);
+ printf("%35s\n", "--------------------------");
+ }
+
+ list_for_each_entry(chain, &cnode->val, list) {
+ s = callchain_list__sym_name(chain, cbuf, sizeof(cbuf), false);
+
+ if (pair)
+ scnprintf(buf, sizeof(buf), "%35s\t%35s", "", s);
+ else
+ scnprintf(buf, sizeof(buf), "%35s", s);
+
+ printf("%s\n", buf);
+ }
+}
+
+static void callchain_streams_report(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ struct stream *base_stream;
+ int i, idx = 0;
+
+ printf("[ Matched hot streams ]\n");
+ for (i = 0; i < es_base->nr_streams; i++) {
+ base_stream = &es_base->streams[i];
+ if (base_stream->pair_cnode) {
+ print_callchain_pair(base_stream, ++idx,
+ es_base, es_pair);
+ }
+ }
+
+ idx = 0;
+ printf("\n[ Hot streams in old perf data only ]\n");
+ for (i = 0; i < es_base->nr_streams; i++) {
+ base_stream = &es_base->streams[i];
+ if (!base_stream->pair_cnode) {
+ print_stream_callchain(base_stream, ++idx,
+ es_base, false);
+ }
+ }
+
+ idx = 0;
+ printf("\n[ Hot streams in new perf data only ]\n");
+ for (i = 0; i < es_pair->nr_streams; i++) {
+ base_stream = &es_pair->streams[i];
+ if (!base_stream->pair_cnode) {
+ print_stream_callchain(base_stream, ++idx,
+ es_pair, true);
+ }
+ }
+}
+
+void evsel_streams__report(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair)
+{
+ return callchain_streams_report(es_base, es_pair);
+}
diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h
new file mode 100644
index 000000000000..bee768874fea
--- /dev/null
+++ b/tools/perf/util/stream.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STREAM_H
+#define __PERF_STREAM_H
+
+#include "callchain.h"
+
+struct stream {
+ struct callchain_node *cnode;
+ struct callchain_node *pair_cnode;
+};
+
+struct evsel_streams {
+ struct stream *streams;
+ int nr_streams_max;
+ int nr_streams;
+ int evsel_idx;
+ u64 streams_hits;
+};
+
+struct evlist_streams {
+ struct evsel_streams *ev_streams;
+ int nr_evsel;
+};
+
+struct evlist;
+
+void evlist_streams__delete(struct evlist_streams *els);
+
+struct evlist_streams *evlist__create_streams(struct evlist *evlist,
+ int nr_streams_max);
+
+struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
+ int evsel_idx);
+
+void evsel_streams__match(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair);
+
+void evsel_streams__report(struct evsel_streams *es_base,
+ struct evsel_streams *es_pair);
+
+#endif /* __PERF_STREAM_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index be5b493f8284..44dd86a4f25f 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -50,6 +50,10 @@ typedef Elf64_Nhdr GElf_Nhdr;
#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
#endif
+#ifdef HAVE_LIBBFD_SUPPORT
+#define PACKAGE 'perf'
+#include <bfd.h>
+#else
#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
extern char *cplus_demangle(const char *, int);
@@ -65,9 +69,7 @@ static inline char *bfd_demangle(void __maybe_unused *v,
{
return NULL;
}
-#else
-#define PACKAGE 'perf'
-#include <bfd.h>
+#endif
#endif
#endif
@@ -530,8 +532,40 @@ out:
return err;
}
-int filename__read_build_id(const char *filename, void *bf, size_t size)
+#ifdef HAVE_LIBBFD_BUILDID_SUPPORT
+
+int filename__read_build_id(const char *filename, struct build_id *bid)
+{
+ size_t size = sizeof(bid->data);
+ int err = -1;
+ bfd *abfd;
+
+ abfd = bfd_openr(filename, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ if (!abfd->build_id || abfd->build_id->size > size)
+ goto out_close;
+
+ memcpy(bid->data, abfd->build_id->data, abfd->build_id->size);
+ memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size);
+ err = bid->size = abfd->build_id->size;
+
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+
+#else // HAVE_LIBBFD_BUILDID_SUPPORT
+
+int filename__read_build_id(const char *filename, struct build_id *bid)
{
+ size_t size = sizeof(bid->data);
int fd, err = -1;
Elf *elf;
@@ -548,7 +582,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
goto out_close;
}
- err = elf_read_build_id(elf, bf, size);
+ err = elf_read_build_id(elf, bid->data, size);
+ if (err > 0)
+ bid->size = err;
elf_end(elf);
out_close:
@@ -557,13 +593,13 @@ out:
return err;
}
-int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
+#endif // HAVE_LIBBFD_BUILDID_SUPPORT
+
+int sysfs__read_build_id(const char *filename, struct build_id *bid)
{
+ size_t size = sizeof(bid->data);
int fd, err = -1;
- if (size < BUILD_ID_SIZE)
- goto out;
-
fd = open(filename, O_RDONLY);
if (fd < 0)
goto out;
@@ -584,8 +620,9 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
break;
if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
size_t sz = min(descsz, size);
- if (read(fd, build_id, sz) == (ssize_t)sz) {
- memset(build_id + sz, 0, size - sz);
+ if (read(fd, bid->data, sz) == (ssize_t)sz) {
+ memset(bid->data + sz, 0, size - sz);
+ bid->size = sz;
err = 0;
break;
}
@@ -608,6 +645,44 @@ out:
return err;
}
+#ifdef HAVE_LIBBFD_SUPPORT
+
+int filename__read_debuglink(const char *filename, char *debuglink,
+ size_t size)
+{
+ int err = -1;
+ asection *section;
+ bfd *abfd;
+
+ abfd = bfd_openr(filename, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ section = bfd_get_section_by_name(abfd, ".gnu_debuglink");
+ if (!section)
+ goto out_close;
+
+ if (section->size > size)
+ goto out_close;
+
+ if (!bfd_get_section_contents(abfd, section, debuglink, 0,
+ section->size))
+ goto out_close;
+
+ err = 0;
+
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+
+#else
+
int filename__read_debuglink(const char *filename, char *debuglink,
size_t size)
{
@@ -660,6 +735,8 @@ out:
return err;
}
+#endif
+
static int dso__swap_init(struct dso *dso, unsigned char eidata)
{
static unsigned int const endian = 1;
@@ -757,13 +834,17 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
/* Always reject images with a mismatched build-id: */
if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) {
u8 build_id[BUILD_ID_SIZE];
+ struct build_id bid;
+ int size;
- if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) {
+ size = elf_read_build_id(elf, build_id, BUILD_ID_SIZE);
+ if (size <= 0) {
dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID;
goto out_elf_end;
}
- if (!dso__build_id_equal(dso, build_id)) {
+ build_id__init(&bid, build_id, size);
+ if (!dso__build_id_equal(dso, &bid)) {
pr_debug("%s: build id mismatch for %s.\n", __func__, name);
dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
goto out_elf_end;
@@ -789,7 +870,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
if (ss->opdshdr.sh_type != SHT_PROGBITS)
ss->opdsec = NULL;
- if (dso->kernel == DSO_TYPE_USER)
+ if (dso->kernel == DSO_SPACE__USER)
ss->adjust_symbols = true;
else
ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
@@ -872,7 +953,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
* kallsyms and identity maps. Overwrite it to
* map to the kernel dso.
*/
- if (*remap_kernel && dso->kernel) {
+ if (*remap_kernel && dso->kernel && !kmodule) {
*remap_kernel = false;
map->start = shdr->sh_addr + ref_reloc(kmap);
map->end = map->start + shdr->sh_size;
@@ -1068,7 +1149,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
* Initial kernel and module mappings do not map to the dso.
* Flag the fixups.
*/
- if (dso->kernel || kmodule) {
+ if (dso->kernel) {
remap_kernel = true;
adjust_kernel_syms = dso->adjust_symbols;
}
@@ -1130,7 +1211,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
(sym.st_value & 1))
--sym.st_value;
- if (dso->kernel || kmodule) {
+ if (dso->kernel) {
if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
section_name, adjust_kernel_syms, kmodule, &remap_kernel))
goto out_elf_end;
@@ -1458,6 +1539,7 @@ struct kcore_copy_info {
u64 first_symbol;
u64 last_symbol;
u64 first_module;
+ u64 first_module_symbol;
u64 last_module_symbol;
size_t phnum;
struct list_head phdrs;
@@ -1534,6 +1616,8 @@ static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
return 0;
if (strchr(name, '[')) {
+ if (!kci->first_module_symbol || start < kci->first_module_symbol)
+ kci->first_module_symbol = start;
if (start > kci->last_module_symbol)
kci->last_module_symbol = start;
return 0;
@@ -1731,6 +1815,10 @@ static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
kci->etext += page_size;
}
+ if (kci->first_module_symbol &&
+ (!kci->first_module || kci->first_module_symbol < kci->first_module))
+ kci->first_module = kci->first_module_symbol;
+
kci->first_module = round_down(kci->first_module, page_size);
if (kci->last_module_symbol) {
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index d6e99af263ec..f9eb0bee7f15 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -31,9 +31,10 @@ static bool check_need_swap(int file_endian)
#define NT_GNU_BUILD_ID 3
-static int read_build_id(void *note_data, size_t note_len, void *bf,
- size_t size, bool need_swap)
+static int read_build_id(void *note_data, size_t note_len, struct build_id *bid,
+ bool need_swap)
{
+ size_t size = sizeof(bid->data);
struct {
u32 n_namesz;
u32 n_descsz;
@@ -63,8 +64,9 @@ static int read_build_id(void *note_data, size_t note_len, void *bf,
nhdr->n_namesz == sizeof("GNU")) {
if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
size_t sz = min(size, descsz);
- memcpy(bf, ptr, sz);
- memset(bf + sz, 0, size - sz);
+ memcpy(bid->data, ptr, sz);
+ memset(bid->data + sz, 0, size - sz);
+ bid->size = sz;
return 0;
}
}
@@ -84,7 +86,7 @@ int filename__read_debuglink(const char *filename __maybe_unused,
/*
* Just try PT_NOTE header otherwise fails
*/
-int filename__read_build_id(const char *filename, void *bf, size_t size)
+int filename__read_build_id(const char *filename, struct build_id *bid)
{
FILE *fp;
int ret = -1;
@@ -156,9 +158,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
if (fread(buf, buf_size, 1, fp) != 1)
goto out_free;
- ret = read_build_id(buf, buf_size, bf, size, need_swap);
+ ret = read_build_id(buf, buf_size, bid, need_swap);
if (ret == 0)
- ret = size;
+ ret = bid->size;
break;
}
} else {
@@ -207,9 +209,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
if (fread(buf, buf_size, 1, fp) != 1)
goto out_free;
- ret = read_build_id(buf, buf_size, bf, size, need_swap);
+ ret = read_build_id(buf, buf_size, bid, need_swap);
if (ret == 0)
- ret = size;
+ ret = bid->size;
break;
}
}
@@ -220,7 +222,7 @@ out:
return ret;
}
-int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
+int sysfs__read_build_id(const char *filename, struct build_id *bid)
{
int fd;
int ret = -1;
@@ -243,7 +245,7 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
if (read(fd, buf, buf_size) != (ssize_t) buf_size)
goto out_free;
- ret = read_build_id(buf, buf_size, build_id, size, false);
+ ret = read_build_id(buf, buf_size, bid, false);
out_free:
free(buf);
out:
@@ -339,16 +341,15 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
struct symsrc *runtime_ss __maybe_unused,
int kmodule __maybe_unused)
{
- unsigned char build_id[BUILD_ID_SIZE];
+ struct build_id bid;
int ret;
ret = fd__is_64_bit(ss->fd);
if (ret >= 0)
dso->is_64_bit = ret;
- if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) {
- dso__set_build_id(dso, build_id);
- }
+ if (filename__read_build_id(ss->name, &bid) > 0)
+ dso__set_build_id(dso, &bid);
return 0;
}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 26bc6a0096ce..6138866665df 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -79,6 +79,7 @@ static enum dso_binary_type binary_type_symtab[] = {
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__NOT_FOUND,
};
@@ -566,6 +567,20 @@ void dso__sort_by_name(struct dso *dso)
return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
}
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+static int hex2u64(const char *ptr, u64 *long_val)
+{
+ char *p;
+
+ *long_val = strtoull(ptr, &p, 16);
+
+ return p - ptr;
+}
+
+
int modules__parse(const char *filename, void *arg,
int (*process_module)(void *arg, const char *name,
u64 start, u64 size))
@@ -648,9 +663,12 @@ static bool symbol__is_idle(const char *name)
"exit_idle",
"mwait_idle",
"mwait_idle_with_hints",
+ "mwait_idle_with_hints.constprop.0",
"poll_idle",
"ppc64_runlatch_off",
"pseries_dedicated_idle_sleep",
+ "psw_idle",
+ "psw_idle_exit",
NULL
};
int i;
@@ -791,7 +809,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
if (strcmp(curr_map->dso->short_name, module)) {
if (curr_map != initial_map &&
- dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+ dso->kernel == DSO_SPACE__KERNEL_GUEST &&
machine__is_default_guest(machine)) {
/*
* We assume all symbols of a module are
@@ -848,7 +866,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
goto add_symbol;
}
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
snprintf(dso_name, sizeof(dso_name),
"[guest.kernel].%d",
kernel_range++);
@@ -892,7 +910,7 @@ discard_symbol:
}
if (curr_map != initial_map &&
- dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+ dso->kernel == DSO_SPACE__KERNEL_GUEST &&
machine__is_default_guest(kmaps->machine)) {
dso__set_loaded(curr_map->dso);
}
@@ -1209,6 +1227,7 @@ int maps__merge_in(struct maps *kmaps, struct map *new_map)
m->end = old_map->start;
list_add_tail(&m->node, &merged);
+ new_map->pgoff += old_map->end - new_map->start;
new_map->start = old_map->end;
}
} else {
@@ -1229,6 +1248,7 @@ int maps__merge_in(struct maps *kmaps, struct map *new_map)
* |new......| -> |new...|
* |old....| -> |old....|
*/
+ new_map->pgoff += old_map->end - new_map->start;
new_map->start = old_map->end;
}
}
@@ -1368,7 +1388,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
* Set the data type and long name so that kcore can be read via
* dso__data_read_addr().
*/
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE;
else
dso->binary_type = DSO_BINARY_TYPE__KCORE;
@@ -1432,7 +1452,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
symbols__fixup_end(&dso->symbols);
symbols__fixup_duplicate(&dso->symbols);
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
else
dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
@@ -1506,6 +1526,138 @@ out_failure:
return -1;
}
+#ifdef HAVE_LIBBFD_SUPPORT
+#define PACKAGE 'perf'
+#include <bfd.h>
+
+static int bfd_symbols__cmpvalue(const void *a, const void *b)
+{
+ const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b;
+
+ if (bfd_asymbol_value(as) != bfd_asymbol_value(bs))
+ return bfd_asymbol_value(as) - bfd_asymbol_value(bs);
+
+ return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0];
+}
+
+static int bfd2elf_binding(asymbol *symbol)
+{
+ if (symbol->flags & BSF_WEAK)
+ return STB_WEAK;
+ if (symbol->flags & BSF_GLOBAL)
+ return STB_GLOBAL;
+ if (symbol->flags & BSF_LOCAL)
+ return STB_LOCAL;
+ return -1;
+}
+
+int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
+{
+ int err = -1;
+ long symbols_size, symbols_count;
+ asection *section;
+ asymbol **symbols, *sym;
+ struct symbol *symbol;
+ bfd *abfd;
+ u_int i;
+ u64 start, len;
+
+ abfd = bfd_openr(dso->long_name, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__,
+ dso->long_name);
+ goto out_close;
+ }
+
+ if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
+ goto out_close;
+
+ section = bfd_get_section_by_name(abfd, ".text");
+ if (section)
+ dso->text_offset = section->vma - section->filepos;
+
+ bfd_close(abfd);
+
+ abfd = bfd_openr(debugfile, NULL);
+ if (!abfd)
+ return -1;
+
+ if (!bfd_check_format(abfd, bfd_object)) {
+ pr_debug2("%s: cannot read %s bfd file.\n", __func__,
+ debugfile);
+ goto out_close;
+ }
+
+ if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
+ goto out_close;
+
+ symbols_size = bfd_get_symtab_upper_bound(abfd);
+ if (symbols_size == 0) {
+ bfd_close(abfd);
+ return 0;
+ }
+
+ if (symbols_size < 0)
+ goto out_close;
+
+ symbols = malloc(symbols_size);
+ if (!symbols)
+ goto out_close;
+
+ symbols_count = bfd_canonicalize_symtab(abfd, symbols);
+ if (symbols_count < 0)
+ goto out_free;
+
+ qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue);
+
+#ifdef bfd_get_section
+#define bfd_asymbol_section bfd_get_section
+#endif
+ for (i = 0; i < symbols_count; ++i) {
+ sym = symbols[i];
+ section = bfd_asymbol_section(sym);
+ if (bfd2elf_binding(sym) < 0)
+ continue;
+
+ while (i + 1 < symbols_count &&
+ bfd_asymbol_section(symbols[i + 1]) == section &&
+ bfd2elf_binding(symbols[i + 1]) < 0)
+ i++;
+
+ if (i + 1 < symbols_count &&
+ bfd_asymbol_section(symbols[i + 1]) == section)
+ len = symbols[i + 1]->value - sym->value;
+ else
+ len = section->size - sym->value;
+
+ start = bfd_asymbol_value(sym) - dso->text_offset;
+ symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC,
+ bfd_asymbol_name(sym));
+ if (!symbol)
+ goto out_free;
+
+ symbols__insert(&dso->symbols, symbol);
+ }
+#ifdef bfd_get_section
+#undef bfd_asymbol_section
+#endif
+
+ symbols__fixup_end(&dso->symbols);
+ symbols__fixup_duplicate(&dso->symbols);
+ dso->adjust_symbols = 1;
+
+ err = 0;
+out_free:
+ free(symbols);
+out_close:
+ bfd_close(abfd);
+ return err;
+}
+#endif
+
static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
enum dso_binary_type type)
{
@@ -1515,19 +1667,20 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
- return !kmod && dso->kernel == DSO_TYPE_USER;
+ return !kmod && dso->kernel == DSO_SPACE__USER;
case DSO_BINARY_TYPE__KALLSYMS:
case DSO_BINARY_TYPE__VMLINUX:
case DSO_BINARY_TYPE__KCORE:
- return dso->kernel == DSO_TYPE_KERNEL;
+ return dso->kernel == DSO_SPACE__KERNEL;
case DSO_BINARY_TYPE__GUEST_KALLSYMS:
case DSO_BINARY_TYPE__GUEST_VMLINUX:
case DSO_BINARY_TYPE__GUEST_KCORE:
- return dso->kernel == DSO_TYPE_GUEST_KERNEL;
+ return dso->kernel == DSO_SPACE__KERNEL_GUEST;
case DSO_BINARY_TYPE__GUEST_KMODULE:
case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
@@ -1544,6 +1697,8 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
return true;
case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
case DSO_BINARY_TYPE__NOT_FOUND:
default:
return false;
@@ -1600,7 +1755,7 @@ int dso__load(struct dso *dso, struct map *map)
struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
bool kmod;
bool perfmap;
- unsigned char build_id[BUILD_ID_SIZE];
+ struct build_id bid;
struct nscookie nsc;
char newmapname[PATH_MAX];
const char *map_path = dso->long_name;
@@ -1628,9 +1783,9 @@ int dso__load(struct dso *dso, struct map *map)
dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
if (dso->kernel && !kmod) {
- if (dso->kernel == DSO_TYPE_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL)
ret = dso__load_kernel_sym(dso, map);
- else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ else if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
ret = dso__load_guest_kernel_sym(dso, map);
machine = map__kmaps(map)->machine;
@@ -1662,8 +1817,8 @@ int dso__load(struct dso *dso, struct map *map)
if (!dso->has_build_id &&
is_regular_file(dso->long_name)) {
__symbol__join_symfs(name, PATH_MAX, dso->long_name);
- if (filename__read_build_id(name, build_id, BUILD_ID_SIZE) > 0)
- dso__set_build_id(dso, build_id);
+ if (filename__read_build_id(name, &bid) > 0)
+ dso__set_build_id(dso, &bid);
}
/*
@@ -1676,6 +1831,7 @@ int dso__load(struct dso *dso, struct map *map)
bool next_slot = false;
bool is_reg;
bool nsexit;
+ int bfdrc = -1;
int sirc = -1;
enum dso_binary_type symtab_type = binary_type_symtab[i];
@@ -1694,12 +1850,19 @@ int dso__load(struct dso *dso, struct map *map)
nsinfo__mountns_exit(&nsc);
is_reg = is_regular_file(name);
+#ifdef HAVE_LIBBFD_SUPPORT
if (is_reg)
+ bfdrc = dso__load_bfd_symbols(dso, name);
+#endif
+ if (is_reg && bfdrc < 0)
sirc = symsrc__init(ss, dso, name, symtab_type);
if (nsexit)
nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ if (bfdrc == 0)
+ break;
+
if (!is_reg || sirc < 0)
continue;
@@ -1860,7 +2023,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
else
symbol__join_symfs(symfs_vmlinux, vmlinux);
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
else
symtab_type = DSO_BINARY_TYPE__VMLINUX;
@@ -1872,7 +2035,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
symsrc__destroy(&ss);
if (err > 0) {
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
else
dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
@@ -1959,7 +2122,7 @@ static bool filename__readable(const char *file)
static char *dso__find_kallsyms(struct dso *dso, struct map *map)
{
- u8 host_build_id[BUILD_ID_SIZE];
+ struct build_id bid;
char sbuild_id[SBUILD_ID_SIZE];
bool is_host = false;
char path[PATH_MAX];
@@ -1972,9 +2135,8 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
goto proc_kallsyms;
}
- if (sysfs__read_build_id("/sys/kernel/notes", host_build_id,
- sizeof(host_build_id)) == 0)
- is_host = dso__build_id_equal(dso, host_build_id);
+ if (sysfs__read_build_id("/sys/kernel/notes", &bid) == 0)
+ is_host = dso__build_id_equal(dso, &bid);
/* Try a fast path for /proc/kallsyms if possible */
if (is_host) {
@@ -1990,7 +2152,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
goto proc_kallsyms;
}
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ build_id__sprintf(&dso->bid, sbuild_id);
/* Find kallsyms in build-id cache with kcore */
scnprintf(path, sizeof(path), "%s/%s/%s",
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 93fc43db1be3..f4801c488def 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -23,12 +23,13 @@ struct dso;
struct map;
struct maps;
struct option;
+struct build_id;
/*
* libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
* for newer versions we can use mmap to reduce memory usage:
*/
-#ifdef HAVE_LIBELF_MMAP_SUPPORT
+#ifdef ELF_C_READ_MMAP
# define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
#else
# define PERF_ELF_C_READ_MMAP ELF_C_READ
@@ -55,7 +56,7 @@ struct symbol {
u8 inlined:1;
u8 arch_sym;
bool annotate2;
- char name[0];
+ char name[];
};
void symbol__delete(struct symbol *sym);
@@ -142,8 +143,8 @@ struct symbol *dso__next_symbol(struct symbol *sym);
enum dso_type dso__type_fd(int fd);
-int filename__read_build_id(const char *filename, void *bf, size_t size);
-int sysfs__read_build_id(const char *filename, void *bf, size_t size);
+int filename__read_build_id(const char *filename, struct build_id *id);
+int sysfs__read_build_id(const char *filename, struct build_id *bid);
int modules__parse(const char *filename, void *arg,
int (*process_module)(void *arg, const char *name,
u64 start, u64 size));
@@ -175,6 +176,10 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
struct symsrc;
+#ifdef HAVE_LIBBFD_SUPPORT
+int dso__load_bfd_symbols(struct dso *dso, const char *debugfile);
+#endif
+
int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
struct symsrc *runtime_ss, int kmodule);
int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index a661b122d9d8..8a23391558cf 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -37,6 +37,7 @@
#include <string.h>
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <api/fs/fs.h>
+#include <api/io.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -71,7 +72,6 @@ int perf_tool__process_synth_event(struct perf_tool *tool,
static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
pid_t *tgid, pid_t *ppid)
{
- char filename[PATH_MAX];
char bf[4096];
int fd;
size_t size = 0;
@@ -81,11 +81,11 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
*tgid = -1;
*ppid = -1;
- snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
+ snprintf(bf, sizeof(bf), "/proc/%d/status", pid);
- fd = open(filename, O_RDONLY);
+ fd = open(bf, O_RDONLY);
if (fd < 0) {
- pr_debug("couldn't open %s\n", filename);
+ pr_debug("couldn't open %s\n", bf);
return -1;
}
@@ -274,6 +274,79 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
return 0;
}
+static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
+ u32 *prot, u32 *flags, __u64 *offset,
+ u32 *maj, u32 *min,
+ __u64 *inode,
+ ssize_t pathname_size, char *pathname)
+{
+ __u64 temp;
+ int ch;
+ char *start_pathname = pathname;
+
+ if (io__get_hex(io, start) != '-')
+ return false;
+ if (io__get_hex(io, end) != ' ')
+ return false;
+
+ /* map protection and flags bits */
+ *prot = 0;
+ ch = io__get_char(io);
+ if (ch == 'r')
+ *prot |= PROT_READ;
+ else if (ch != '-')
+ return false;
+ ch = io__get_char(io);
+ if (ch == 'w')
+ *prot |= PROT_WRITE;
+ else if (ch != '-')
+ return false;
+ ch = io__get_char(io);
+ if (ch == 'x')
+ *prot |= PROT_EXEC;
+ else if (ch != '-')
+ return false;
+ ch = io__get_char(io);
+ if (ch == 's')
+ *flags = MAP_SHARED;
+ else if (ch == 'p')
+ *flags = MAP_PRIVATE;
+ else
+ return false;
+ if (io__get_char(io) != ' ')
+ return false;
+
+ if (io__get_hex(io, offset) != ' ')
+ return false;
+
+ if (io__get_hex(io, &temp) != ':')
+ return false;
+ *maj = temp;
+ if (io__get_hex(io, &temp) != ' ')
+ return false;
+ *min = temp;
+
+ ch = io__get_dec(io, inode);
+ if (ch != ' ') {
+ *pathname = '\0';
+ return ch == '\n';
+ }
+ do {
+ ch = io__get_char(io);
+ } while (ch == ' ');
+ while (true) {
+ if (ch < 0)
+ return false;
+ if (ch == '\0' || ch == '\n' ||
+ (pathname + 1 - start_pathname) >= pathname_size) {
+ *pathname = '\0';
+ return true;
+ }
+ *pathname++ = ch;
+ ch = io__get_char(io);
+ }
+}
+
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
@@ -281,9 +354,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
struct machine *machine,
bool mmap_data)
{
- char filename[PATH_MAX];
- FILE *fp;
unsigned long long t;
+ char bf[BUFSIZ];
+ struct io io;
bool truncation = false;
unsigned long long timeout = proc_map_timeout * 1000000ULL;
int rc = 0;
@@ -293,59 +366,52 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
if (machine__is_default_guest(machine))
return 0;
- snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
- machine->root_dir, pid, pid);
+ snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps",
+ machine->root_dir, pid, pid);
- fp = fopen(filename, "r");
- if (fp == NULL) {
+ io.fd = open(bf, O_RDONLY, 0);
+ if (io.fd < 0) {
/*
* We raced with a task exiting - just return:
*/
- pr_debug("couldn't open %s\n", filename);
+ pr_debug("couldn't open %s\n", bf);
return -1;
}
+ io__init(&io, io.fd, bf, sizeof(bf));
event->header.type = PERF_RECORD_MMAP2;
t = rdclock();
- while (1) {
- char bf[BUFSIZ];
- char prot[5];
- char execname[PATH_MAX];
- char anonstr[] = "//anon";
- unsigned int ino;
+ while (!io.eof) {
+ static const char anonstr[] = "//anon";
size_t size;
- ssize_t n;
- if (fgets(bf, sizeof(bf), fp) == NULL)
- break;
+ /* ensure null termination since stack will be reused. */
+ event->mmap2.filename[0] = '\0';
+
+ /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
+ if (!read_proc_maps_line(&io,
+ &event->mmap2.start,
+ &event->mmap2.len,
+ &event->mmap2.prot,
+ &event->mmap2.flags,
+ &event->mmap2.pgoff,
+ &event->mmap2.maj,
+ &event->mmap2.min,
+ &event->mmap2.ino,
+ sizeof(event->mmap2.filename),
+ event->mmap2.filename))
+ continue;
if ((rdclock() - t) > timeout) {
- pr_warning("Reading %s time out. "
+ pr_warning("Reading %s/proc/%d/task/%d/maps time out. "
"You may want to increase "
"the time limit by --proc-map-timeout\n",
- filename);
+ machine->root_dir, pid, pid);
truncation = true;
goto out;
}
- /* ensure null termination since stack will be reused. */
- strcpy(execname, "");
-
- /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
- n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
- &event->mmap2.start, &event->mmap2.len, prot,
- &event->mmap2.pgoff, &event->mmap2.maj,
- &event->mmap2.min,
- &ino, execname);
-
- /*
- * Anon maps don't have the execname.
- */
- if (n < 7)
- continue;
-
- event->mmap2.ino = (u64)ino;
event->mmap2.ino_generation = 0;
/*
@@ -356,23 +422,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
else
event->header.misc = PERF_RECORD_MISC_GUEST_USER;
- /* map protection and flags bits */
- event->mmap2.prot = 0;
- event->mmap2.flags = 0;
- if (prot[0] == 'r')
- event->mmap2.prot |= PROT_READ;
- if (prot[1] == 'w')
- event->mmap2.prot |= PROT_WRITE;
- if (prot[2] == 'x')
- event->mmap2.prot |= PROT_EXEC;
-
- if (prot[3] == 's')
- event->mmap2.flags |= MAP_SHARED;
- else
- event->mmap2.flags |= MAP_PRIVATE;
-
- if (prot[2] != 'x') {
- if (!mmap_data || prot[0] != 'r')
+ if ((event->mmap2.prot & PROT_EXEC) == 0) {
+ if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0)
continue;
event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -382,17 +433,17 @@ out:
if (truncation)
event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
- if (!strcmp(execname, ""))
- strcpy(execname, anonstr);
+ if (!strcmp(event->mmap2.filename, ""))
+ strcpy(event->mmap2.filename, anonstr);
if (hugetlbfs_mnt_len &&
- !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
- strcpy(execname, anonstr);
+ !strncmp(event->mmap2.filename, hugetlbfs_mnt,
+ hugetlbfs_mnt_len)) {
+ strcpy(event->mmap2.filename, anonstr);
event->mmap2.flags |= MAP_HUGETLB;
}
- size = strlen(execname) + 1;
- memcpy(event->mmap2.filename, execname, size);
+ size = strlen(event->mmap2.filename) + 1;
size = PERF_ALIGN(size, sizeof(u64));
event->mmap2.len -= event->mmap.start;
event->mmap2.header.size = (sizeof(event->mmap2) -
@@ -411,7 +462,7 @@ out:
break;
}
- fclose(fp);
+ close(io.fd);
return rc;
}
@@ -1130,7 +1181,7 @@ void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf
synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max);
default:
break;
- };
+ }
}
static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map)
@@ -1910,7 +1961,7 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16
len = pos->long_name_len + 1;
len = PERF_ALIGN(len, NAME_ALIGN);
- memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
+ memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data));
ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
ev.build_id.header.misc = misc;
ev.build_id.pid = machine->pid;
@@ -1955,14 +2006,6 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p
return 0;
}
-int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
- struct perf_tool *tool __maybe_unused,
- perf_event__handler_t process __maybe_unused,
- struct machine *machine __maybe_unused)
-{
- return 0;
-}
-
extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session,
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 820fceeb19a9..03bd99d3be16 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -8,9 +8,9 @@
#include "syscalltbl.h"
#include <stdlib.h>
#include <linux/compiler.h>
+#include <linux/zalloc.h>
#ifdef HAVE_SYSCALL_TABLE_SUPPORT
-#include <linux/zalloc.h>
#include <string.h>
#include "string2.h"
@@ -142,7 +142,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g
struct syscalltbl *syscalltbl__new(void)
{
- struct syscalltbl *tbl = malloc(sizeof(*tbl));
+ struct syscalltbl *tbl = zalloc(sizeof(*tbl));
if (tbl)
tbl->audit_machine = audit_detect_machine();
return tbl;
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index 9172613028d0..a41d2ca9e4ae 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -3,14 +3,12 @@
#define __PERF_SYSCALLTBL_H
struct syscalltbl {
- union {
- int audit_machine;
- struct {
- int max_id;
- int nr_entries;
- void *entries;
- } syscalls;
- };
+ int audit_machine;
+ struct {
+ int max_id;
+ int nr_entries;
+ void *entries;
+ } syscalls;
};
struct syscalltbl *syscalltbl__new(void);
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 0885967d5bc3..1b992bbba4e8 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -80,6 +80,10 @@ struct thread_stack_entry {
* @comm: current comm
* @arr_sz: size of array if this is the first element of an array
* @rstate: used to detect retpolines
+ * @br_stack_rb: branch stack (ring buffer)
+ * @br_stack_sz: maximum branch stack size
+ * @br_stack_pos: current position in @br_stack_rb
+ * @mispred_all: mark all branches as mispredicted
*/
struct thread_stack {
struct thread_stack_entry *stack;
@@ -95,6 +99,10 @@ struct thread_stack {
struct comm *comm;
unsigned int arr_sz;
enum retpoline_state_t rstate;
+ struct branch_stack *br_stack_rb;
+ unsigned int br_stack_sz;
+ unsigned int br_stack_pos;
+ bool mispred_all;
};
/*
@@ -126,13 +134,26 @@ static int thread_stack__grow(struct thread_stack *ts)
}
static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
- struct call_return_processor *crp)
+ struct call_return_processor *crp,
+ bool callstack, unsigned int br_stack_sz)
{
int err;
- err = thread_stack__grow(ts);
- if (err)
- return err;
+ if (callstack) {
+ err = thread_stack__grow(ts);
+ if (err)
+ return err;
+ }
+
+ if (br_stack_sz) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += br_stack_sz * sizeof(struct branch_entry);
+ ts->br_stack_rb = zalloc(sz);
+ if (!ts->br_stack_rb)
+ return -ENOMEM;
+ ts->br_stack_sz = br_stack_sz;
+ }
if (thread->maps && thread->maps->machine) {
struct machine *machine = thread->maps->machine;
@@ -150,7 +171,9 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
}
static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
- struct call_return_processor *crp)
+ struct call_return_processor *crp,
+ bool callstack,
+ unsigned int br_stack_sz)
{
struct thread_stack *ts = thread->ts, *new_ts;
unsigned int old_sz = ts ? ts->arr_sz : 0;
@@ -176,7 +199,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
ts += cpu;
if (!ts->stack &&
- thread_stack__init(ts, thread, crp))
+ thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
return NULL;
return ts;
@@ -319,6 +342,9 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
if (!crp) {
ts->cnt = 0;
+ ts->br_stack_pos = 0;
+ if (ts->br_stack_rb)
+ ts->br_stack_rb->nr = 0;
return 0;
}
@@ -353,8 +379,33 @@ int thread_stack__flush(struct thread *thread)
return err;
}
+static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
+ u64 from_ip, u64 to_ip)
+{
+ struct branch_stack *bs = ts->br_stack_rb;
+ struct branch_entry *be;
+
+ if (!ts->br_stack_pos)
+ ts->br_stack_pos = ts->br_stack_sz;
+
+ ts->br_stack_pos -= 1;
+
+ be = &bs->entries[ts->br_stack_pos];
+ be->from = from_ip;
+ be->to = to_ip;
+ be->flags.value = 0;
+ be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
+ be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
+ /* No support for mispredict */
+ be->flags.mispred = ts->mispred_all;
+
+ if (bs->nr < ts->br_stack_sz)
+ bs->nr += 1;
+}
+
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
- u64 to_ip, u16 insn_len, u64 trace_nr)
+ u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+ unsigned int br_stack_sz, bool mispred_all)
{
struct thread_stack *ts = thread__stack(thread, cpu);
@@ -362,12 +413,13 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
return -EINVAL;
if (!ts) {
- ts = thread_stack__new(thread, cpu, NULL);
+ ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
if (!ts) {
pr_warning("Out of memory: no thread stack\n");
return -ENOMEM;
}
ts->trace_nr = trace_nr;
+ ts->mispred_all = mispred_all;
}
/*
@@ -381,8 +433,14 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
ts->trace_nr = trace_nr;
}
- /* Stop here if thread_stack__process() is in use */
- if (ts->crp)
+ if (br_stack_sz)
+ thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
+
+ /*
+ * Stop here if thread_stack__process() is in use, or not recording call
+ * stack.
+ */
+ if (ts->crp || !callstack)
return 0;
if (flags & PERF_IP_FLAG_CALL) {
@@ -430,6 +488,7 @@ static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
{
__thread_stack__flush(thread, ts);
zfree(&ts->stack);
+ zfree(&ts->br_stack_rb);
}
static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
@@ -497,6 +556,199 @@ void thread_stack__sample(struct thread *thread, int cpu,
chain->nr = i;
}
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the call chain.
+ */
+void thread_stack__sample_late(struct thread *thread, int cpu,
+ struct ip_callchain *chain, size_t sz,
+ u64 sample_ip, u64 kernel_start)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ u64 sample_context = callchain_context(sample_ip, kernel_start);
+ u64 last_context, context, ip;
+ size_t nr = 0, j;
+
+ if (sz < 2) {
+ chain->nr = 0;
+ return;
+ }
+
+ if (!ts)
+ goto out;
+
+ /*
+ * When tracing kernel space, kernel addresses occur at the top of the
+ * call chain after the event occurred but before tracing stopped.
+ * Skip them.
+ */
+ for (j = 1; j <= ts->cnt; j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context == PERF_CONTEXT_USER ||
+ (context == sample_context && ip == sample_ip))
+ break;
+ }
+
+ last_context = sample_ip; /* Use sample_ip as an invalid context */
+
+ for (; nr < sz && j <= ts->cnt; nr++, j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context != last_context) {
+ if (nr >= sz - 1)
+ break;
+ chain->ips[nr++] = context;
+ last_context = context;
+ }
+ chain->ips[nr] = ip;
+ }
+out:
+ if (nr) {
+ chain->nr = nr;
+ } else {
+ chain->ips[0] = sample_context;
+ chain->ips[1] = sample_ip;
+ chain->nr = 2;
+ }
+}
+
+void thread_stack__br_sample(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ const size_t bsz = sizeof(struct branch_entry);
+ struct branch_stack *src;
+ struct branch_entry *be;
+ unsigned int nr;
+
+ dst->nr = 0;
+
+ if (!ts)
+ return;
+
+ src = ts->br_stack_rb;
+ if (!src->nr)
+ return;
+
+ dst->nr = min((unsigned int)src->nr, sz);
+
+ be = &dst->entries[0];
+ nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
+ memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
+
+ if (src->nr >= ts->br_stack_sz) {
+ sz -= nr;
+ be = &dst->entries[nr];
+ nr = min(ts->br_stack_pos, sz);
+ memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
+ }
+}
+
+/* Start of user space branch entries */
+static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
+{
+ if (!*start)
+ *start = be->to && be->to < kernel_start;
+
+ return *start;
+}
+
+/*
+ * Start of branch entries after the ip fell in between 2 branches, or user
+ * space branch entries.
+ */
+static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
+ bool *start, struct branch_entry *nb)
+{
+ if (!*start) {
+ *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
+ be->from < kernel_start ||
+ (be->to && be->to < kernel_start);
+ }
+
+ return *start;
+}
+
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the branch stack.
+ */
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz,
+ u64 ip, u64 kernel_start)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ struct branch_entry *d, *s, *spos, *ssz;
+ struct branch_stack *src;
+ unsigned int nr = 0;
+ bool start = false;
+
+ dst->nr = 0;
+
+ if (!ts)
+ return;
+
+ src = ts->br_stack_rb;
+ if (!src->nr)
+ return;
+
+ spos = &src->entries[ts->br_stack_pos];
+ ssz = &src->entries[ts->br_stack_sz];
+
+ d = &dst->entries[0];
+ s = spos;
+
+ if (ip < kernel_start) {
+ /*
+ * User space sample: start copying branch entries when the
+ * branch is in user space.
+ */
+ for (s = spos; s < ssz && nr < sz; s++) {
+ if (us_start(s, kernel_start, &start)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ }
+
+ if (src->nr >= ts->br_stack_sz) {
+ for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+ if (us_start(s, kernel_start, &start)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ }
+ }
+ } else {
+ struct branch_entry *nb = NULL;
+
+ /*
+ * Kernel space sample: start copying branch entries when the ip
+ * falls in between 2 branches (or the branch is in user space
+ * because then the start must have been missed).
+ */
+ for (s = spos; s < ssz && nr < sz; s++) {
+ if (ks_start(s, ip, kernel_start, &start, nb)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ nb = s;
+ }
+
+ if (src->nr >= ts->br_stack_sz) {
+ for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+ if (ks_start(s, ip, kernel_start, &start, nb)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ nb = s;
+ }
+ }
+ }
+
+ dst->nr = nr;
+}
+
struct call_return_processor *
call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
void *data)
@@ -864,7 +1116,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
}
if (!ts) {
- ts = thread_stack__new(thread, sample->cpu, crp);
+ ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
if (!ts)
return -ENOMEM;
ts->comm = comm;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index e1ec5a58f1b2..3bc47a42af8e 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -81,10 +81,19 @@ struct call_return_processor {
};
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
- u64 to_ip, u16 insn_len, u64 trace_nr);
+ u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+ unsigned int br_stack_sz, bool mispred_all);
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
size_t sz, u64 ip, u64 kernel_start);
+void thread_stack__sample_late(struct thread *thread, int cpu,
+ struct ip_callchain *chain, size_t sz, u64 ip,
+ u64 kernel_start);
+void thread_stack__br_sample(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz);
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz,
+ u64 sample_ip, u64 kernel_start);
int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
size_t thread_stack__depth(struct thread *thread, int cpu);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 28b719388028..665e5c0618ed 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -47,6 +47,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread->tid = tid;
thread->ppid = -1;
thread->cpu = -1;
+ thread->lbr_stitch_enable = false;
INIT_LIST_HEAD(&thread->namespaces_list);
INIT_LIST_HEAD(&thread->comm_list);
init_rwsem(&thread->namespaces_lock);
@@ -110,6 +111,7 @@ void thread__delete(struct thread *thread)
exit_rwsem(&thread->namespaces_lock);
exit_rwsem(&thread->comm_lock);
+ thread__free_stitch_list(thread);
free(thread);
}
@@ -452,3 +454,25 @@ int thread__memcpy(struct thread *thread, struct machine *machine,
return dso__data_read_offset(al.map->dso, machine, offset, buf, len);
}
+
+void thread__free_stitch_list(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct stitch_list *pos, *tmp;
+
+ if (!lbr_stitch)
+ return;
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ zfree(&lbr_stitch->prev_lbr_cursor);
+ zfree(&thread->lbr_stitch);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 20b96b5d1f15..b066fb30d203 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -13,6 +13,8 @@
#include <strlist.h>
#include <intlist.h>
#include "rwsem.h"
+#include "event.h"
+#include "callchain.h"
struct addr_location;
struct map;
@@ -20,6 +22,13 @@ struct perf_record_namespaces;
struct thread_stack;
struct unwind_libunwind_ops;
+struct lbr_stitch {
+ struct list_head lists;
+ struct list_head free_lists;
+ struct perf_sample prev_sample;
+ struct callchain_cursor_node *prev_lbr_cursor;
+};
+
struct thread {
union {
struct rb_node rb_node;
@@ -46,6 +55,10 @@ struct thread {
struct srccode_state srccode_state;
bool filter;
int filter_entry_depth;
+
+ /* LBR call stack stitch */
+ bool lbr_stitch_enable;
+ struct lbr_stitch *lbr_stitch;
};
struct machine;
@@ -142,4 +155,6 @@ static inline bool thread__is_filtered(struct thread *thread)
return false;
}
+void thread__free_stitch_list(struct thread *thread);
+
#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 3fb67bd31e4a..bbbc0dcd461f 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -57,7 +57,8 @@ struct perf_tool {
throttle,
unthrottle,
ksymbol,
- bpf;
+ bpf,
+ text_poke;
event_attr_op attr;
event_attr_op event_update;
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 3dce2de9d005..27945eeb0cb5 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -77,7 +77,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
opts->freq ? "Hz" : "");
}
- ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));
+ ret += SNPRINTF(bf + ret, size - ret, "%s", evsel__name(top->sym_evsel));
ret += SNPRINTF(bf + ret, size - ret, "], ");
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index f117d4f4821e..ff8391208ecd 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -18,7 +18,7 @@ struct perf_session;
struct perf_top {
struct perf_tool tool;
- struct evlist *evlist;
+ struct evlist *evlist, *sb_evlist;
struct record_opts record_opts;
struct annotation_options annotation_opts;
struct evswitch evswitch;
@@ -36,6 +36,7 @@ struct perf_top {
bool use_tui, use_stdio;
bool vmlinux_warned;
bool dump_symtab;
+ bool stitch_lbr;
struct hist_entry *sym_filter_entry;
struct evsel *sym_evsel;
struct perf_session *session;
diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c
new file mode 100644
index 000000000000..1081b20f9891
--- /dev/null
+++ b/tools/perf/util/topdown.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "pmu.h"
+#include "topdown.h"
+
+int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+ int off = 0;
+ int i;
+ int len = 0;
+ char *s;
+
+ for (i = 0; attr[i]; i++) {
+ if (pmu_have_event("cpu", attr[i])) {
+ len += strlen(attr[i]) + 1;
+ attr[i - off] = attr[i];
+ } else
+ off++;
+ }
+ attr[i - off] = NULL;
+
+ *str = malloc(len + 1 + 2);
+ if (!*str)
+ return -1;
+ s = *str;
+ if (i - off == 0) {
+ *s = 0;
+ return 0;
+ }
+ if (use_group)
+ *s++ = '{';
+ for (i = 0; attr[i]; i++) {
+ strcpy(s, attr[i]);
+ s += strlen(s);
+ *s++ = ',';
+ }
+ if (use_group) {
+ s[-1] = '}';
+ *s = 0;
+ } else
+ s[-1] = 0;
+ return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+ *warn = false;
+ return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
+__weak bool arch_topdown_sample_read(struct evsel *leader __maybe_unused)
+{
+ return false;
+}
diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h
new file mode 100644
index 000000000000..2f0d0b887639
--- /dev/null
+++ b/tools/perf/util/topdown.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TOPDOWN_H
+#define TOPDOWN_H 1
+#include "evsel.h"
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+bool arch_topdown_sample_read(struct evsel *leader);
+
+int topdown_filter_events(const char **attr, char **str, bool use_group);
+
+#endif
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 086e98ff42a3..0e5c4786f296 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -428,7 +428,7 @@ try_id:
if (!ppath->next) {
error:
pr_debug("No memory to alloc tracepoints list\n");
- put_tracepoints_path(&path);
+ put_tracepoints_path(path.next);
return NULL;
}
next:
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 8593d3c200c6..f507dff713c9 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -75,7 +75,7 @@ static void skip(int size)
r = size > BUFSIZ ? BUFSIZ : size;
do_read(buf, r);
size -= r;
- };
+ }
}
static unsigned int read4(struct tep_handle *pevent)
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
index bfa782421cbd..62b4c75c966c 100644
--- a/tools/perf/util/tsc.c
+++ b/tools/perf/util/tsc.c
@@ -1,7 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+
#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <linux/stddef.h>
#include <linux/types.h>
+#include <asm/barrier.h>
+
+#include "event.h"
+#include "synthetic-events.h"
+#include "debug.h"
#include "tsc.h"
u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
@@ -19,12 +28,84 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
{
u64 quot, rem;
+ if (tc->cap_user_time_short)
+ cyc = tc->time_cycles +
+ ((cyc - tc->time_cycles) & tc->time_mask);
+
quot = cyc >> tc->time_shift;
rem = cyc & (((u64)1 << tc->time_shift) - 1);
return tc->time_zero + quot * tc->time_mult +
((rem * tc->time_mult) >> tc->time_shift);
}
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc)
+{
+ u32 seq;
+ int i = 0;
+
+ while (1) {
+ seq = pc->lock;
+ rmb();
+ tc->time_mult = pc->time_mult;
+ tc->time_shift = pc->time_shift;
+ tc->time_zero = pc->time_zero;
+ tc->time_cycles = pc->time_cycles;
+ tc->time_mask = pc->time_mask;
+ tc->cap_user_time_zero = pc->cap_user_time_zero;
+ tc->cap_user_time_short = pc->cap_user_time_short;
+ rmb();
+ if (pc->lock == seq && !(seq & 1))
+ break;
+ if (++i > 10000) {
+ pr_debug("failed to get perf_event_mmap_page lock\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!tc->cap_user_time_zero)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+ struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event event = {
+ .time_conv = {
+ .header = {
+ .type = PERF_RECORD_TIME_CONV,
+ .size = sizeof(struct perf_record_time_conv),
+ },
+ },
+ };
+ struct perf_tsc_conversion tc;
+ int err;
+
+ if (!pc)
+ return 0;
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err == -EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+
+ pr_debug2("Synthesizing TSC conversion information\n");
+
+ event.time_conv.time_mult = tc.time_mult;
+ event.time_conv.time_shift = tc.time_shift;
+ event.time_conv.time_zero = tc.time_zero;
+ event.time_conv.time_cycles = tc.time_cycles;
+ event.time_conv.time_mask = tc.time_mask;
+ event.time_conv.cap_user_time_zero = tc.cap_user_time_zero;
+ event.time_conv.cap_user_time_short = tc.cap_user_time_short;
+
+ return process(tool, &event, NULL, machine);
+}
+
u64 __weak rdtsc(void)
{
return 0;
diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h
index 3c5a632ee57c..72a15419f3b3 100644
--- a/tools/perf/util/tsc.h
+++ b/tools/perf/util/tsc.h
@@ -8,6 +8,11 @@ struct perf_tsc_conversion {
u16 time_shift;
u32 time_mult;
u64 time_zero;
+ u64 time_cycles;
+ u64 time_mask;
+
+ bool cap_user_time_zero;
+ bool cap_user_time_short;
};
struct perf_event_mmap_page;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index b4649f5a0c2f..9aededc0bc06 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -243,7 +243,7 @@ struct eh_frame_hdr {
* encoded_t fde_addr;
* } binary_search_table[fde_count];
*/
- char data[0];
+ char data[];
} __packed;
static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d707c9624dd9..37a9492edb3e 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -290,6 +290,7 @@ int perf_event_paranoid(void)
bool perf_event_paranoid_check(int max_level)
{
return perf_cap__capable(CAP_SYS_ADMIN) ||
+ perf_cap__capable(CAP_PERFMON) ||
perf_event_paranoid() <= max_level;
}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index f486fdd3a538..ad737052e597 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -62,4 +62,10 @@ char *perf_exe(char *buf, int len);
#endif
#endif
+extern bool test_attr__enabled;
+void test_attr__ready(void);
+void test_attr__init(void);
+struct perf_event_attr;
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int fd, int group_fd, unsigned long flags);
#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
index d2202392ffdb..48dd2b018c47 100644
--- a/tools/perf/util/zstd.c
+++ b/tools/perf/util/zstd.c
@@ -99,7 +99,7 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size
while (input.pos < input.size) {
ret = ZSTD_decompressStream(data->dstream, &output, &input);
if (ZSTD_isError(ret)) {
- pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n",
+ pr_err("failed to decompress (B): %zd -> %zd, dst_size %zd : %s\n",
src_size, output.size, dst_size, ZSTD_getErrorName(ret));
break;
}
diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile
index ebd3e1a1c28e..a249c50ebf55 100644
--- a/tools/power/acpi/Makefile
+++ b/tools/power/acpi/Makefile
@@ -7,6 +7,8 @@
include ../../scripts/Makefile.include
+.NOTPARALLEL:
+
all: acpidbg acpidump ec
clean: acpidbg_clean acpidump_clean ec_clean
install: acpidbg_install acpidump_install ec_install
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 5aaddc79adf7..11c5046dce16 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -35,7 +35,7 @@ static acpi_status osl_add_table_to_list(char *signature, u32 instance);
static acpi_status
osl_read_table_from_file(char *filename,
acpi_size file_offset,
- char *signature, struct acpi_table_header **table);
+ struct acpi_table_header **table);
static acpi_status
osl_map_table(acpi_size address,
@@ -110,7 +110,7 @@ u32 gbl_table_count = 0;
*
* RETURN: Status; Converted from errno.
*
- * DESCRIPTION: Get last errno and conver it to acpi_status.
+ * DESCRIPTION: Get last errno and convert it to acpi_status.
*
*****************************************************************************/
@@ -1184,8 +1184,6 @@ osl_table_name_from_file(char *filename, char *signature, u32 *instance)
*
* PARAMETERS: filename - File that contains the desired table
* file_offset - Offset of the table in file
- * signature - Optional ACPI Signature for desired table.
- * A null terminated 4-character string.
* table - Where a pointer to the table is returned
*
* RETURN: Status; Table buffer is returned if AE_OK.
@@ -1197,7 +1195,7 @@ osl_table_name_from_file(char *filename, char *signature, u32 *instance)
static acpi_status
osl_read_table_from_file(char *filename,
acpi_size file_offset,
- char *signature, struct acpi_table_header **table)
+ struct acpi_table_header **table)
{
FILE *table_file;
struct acpi_table_header header;
@@ -1225,6 +1223,8 @@ osl_read_table_from_file(char *filename,
goto exit;
}
+#ifdef ACPI_OBSOLETE_FUNCTIONS
+
/* If signature is specified, it must match the table */
if (signature) {
@@ -1244,6 +1244,7 @@ osl_read_table_from_file(char *filename,
goto exit;
}
}
+#endif
table_length = ap_get_table_length(&header);
if (table_length == 0) {
@@ -1366,7 +1367,7 @@ osl_get_customized_table(char *pathname,
/* There is no physical address saved for customized tables, use zero */
*address = 0;
- status = osl_read_table_from_file(table_filename, 0, NULL, table);
+ status = osl_read_table_from_file(table_filename, 0, table);
return (status);
}
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index 6e04304560ca..c3b56db8b921 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -285,7 +285,7 @@ struct cpufreq_available_governors *cpufreq_get_available_governors(unsigned
} else {
first = malloc(sizeof(*first));
if (!first)
- goto error_out;
+ return NULL;
current = first;
}
current->first = first;
@@ -362,7 +362,7 @@ struct cpufreq_available_frequencies
} else {
first = malloc(sizeof(*first));
if (!first)
- goto error_out;
+ return NULL;
current = first;
}
current->first = first;
@@ -418,7 +418,7 @@ struct cpufreq_available_frequencies
} else {
first = malloc(sizeof(*first));
if (!first)
- goto error_out;
+ return NULL;
current = first;
}
current->first = first;
@@ -493,7 +493,7 @@ static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu,
} else {
first = malloc(sizeof(*first));
if (!first)
- goto error_out;
+ return NULL;
current = first;
}
current->first = first;
@@ -726,7 +726,7 @@ struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
} else {
first = malloc(sizeof(*first));
if (!first)
- goto error_out;
+ return NULL;
current = first;
}
current->first = first;
diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1
index 80a1311fa747..20b6345c53ad 100644
--- a/tools/power/cpupower/man/cpupower-idle-info.1
+++ b/tools/power/cpupower/man/cpupower-idle-info.1
@@ -75,7 +75,7 @@ By default only values of core zero are displayed. How to display settings of
other cores is described in the cpupower(1) manpage in the \-\-cpu option
section.
.SH REFERENCES
-http://www.acpi.info/spec.htm
+https://uefi.org/specifications
.SH "FILES"
.nf
\fI/sys/devices/system/cpu/cpu*/cpuidle/state*\fP
diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1
index 70a56476f4b0..8ee737eefa5c 100644
--- a/tools/power/cpupower/man/cpupower-monitor.1
+++ b/tools/power/cpupower/man/cpupower-monitor.1
@@ -170,7 +170,7 @@ displayed.
.SH REFERENCES
"BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 14h Processors"
-http://support.amd.com/us/Processor_TechDocs/43170.pdf
+https://support.amd.com/us/Processor_TechDocs/43170.pdf
"Intel® Turbo Boost Technology
in Intel® Core™ Microarchitecture (Nehalem) Based Processors"
@@ -178,7 +178,7 @@ http://download.intel.com/design/processor/applnots/320354.pdf
"Intel® 64 and IA-32 Architectures Software Developer's Manual
Volume 3B: System Programming Guide"
-http://www.intel.com/products/processor/manuals
+https://www.intel.com/products/processor/manuals
.SH FILES
.ta
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 6ed82fba5aaa..7b2164e07057 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -99,13 +99,17 @@ static unsigned long string_to_frequency(const char *str)
continue;
if (str[cp] == '.') {
- while (power > -1 && isdigit(str[cp+1]))
- cp++, power--;
+ while (power > -1 && isdigit(str[cp+1])) {
+ cp++;
+ power--;
+ }
}
- if (power >= -1) /* not enough => pad */
+ if (power >= -1) { /* not enough => pad */
pad = power + 1;
- else /* to much => strip */
- pad = 0, cp += power + 1;
+ } else { /* too much => strip */
+ pad = 0;
+ cp += power + 1;
+ }
/* check bounds */
if (cp <= 0 || cp + pad > NORM_FREQ_LEN - 1)
return 0;
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c
index d3755ea70d4d..0ba61a2c4d81 100644
--- a/tools/power/cpupower/utils/cpupower-info.c
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -62,7 +62,7 @@ int cmd_info(int argc, char **argv)
default:
print_wrong_arg_exit();
}
- };
+ }
if (!params.params)
params.params = 0x7;
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 3cca6f715dd9..052044d7e012 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -72,7 +72,7 @@ int cmd_set(int argc, char **argv)
default:
print_wrong_arg_exit();
}
- };
+ }
if (!params.params)
print_wrong_arg_exit();
diff --git a/tools/power/cpupower/utils/helpers/bitmask.c b/tools/power/cpupower/utils/helpers/bitmask.c
index 6c7932f5bd66..649d87cb8b0f 100644
--- a/tools/power/cpupower/utils/helpers/bitmask.c
+++ b/tools/power/cpupower/utils/helpers/bitmask.c
@@ -26,11 +26,11 @@ struct bitmask *bitmask_alloc(unsigned int n)
struct bitmask *bmp;
bmp = malloc(sizeof(*bmp));
- if (bmp == 0)
+ if (!bmp)
return 0;
bmp->size = n;
bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long));
- if (bmp->maskp == 0) {
+ if (!bmp->maskp) {
free(bmp);
return 0;
}
@@ -40,7 +40,7 @@ struct bitmask *bitmask_alloc(unsigned int n)
/* Free `struct bitmask` */
void bitmask_free(struct bitmask *bmp)
{
- if (bmp == 0)
+ if (!bmp)
return;
free(bmp->maskp);
bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
index 20f46348271b..5edd35bd9ee9 100644
--- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -117,7 +117,7 @@ static int amd_fam14h_get_pci_info(struct cstate *state,
break;
default:
return -1;
- };
+ }
return 0;
}
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index a65f7d011513..8b42c2f0a5b0 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -53,7 +53,7 @@ static int cpuidle_start(void)
dprint("CPU %d - State: %d - Val: %llu\n",
cpu, state, previous_count[cpu][state]);
}
- };
+ }
return 0;
}
@@ -72,7 +72,7 @@ static int cpuidle_stop(void)
dprint("CPU %d - State: %d - Val: %llu\n",
cpu, state, previous_count[cpu][state]);
}
- };
+ }
return 0;
}
@@ -172,7 +172,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
cpuidle_cstates[num].id = num;
cpuidle_cstates[num].get_count_percent =
cpuidle_get_count_percent;
- };
+ }
/* Free this at program termination */
previous_count = malloc(sizeof(long long *) * cpu_count);
diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
index 97ad3233a521..55e55b6b42f9 100644
--- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
@@ -79,7 +79,7 @@ static int hsw_ext_get_count(enum intel_hsw_ext_id id, unsigned long long *val,
break;
default:
return -1;
- };
+ }
if (read_msr(cpu, msr, val))
return -1;
return 0;
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
index 114271165182..16eaf006f61f 100644
--- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -91,7 +91,7 @@ static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val,
break;
default:
return -1;
- };
+ }
if (read_msr(cpu, msr, val))
return -1;
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
index df8b223cc096..811d63ab17a7 100644
--- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -77,7 +77,7 @@ static int snb_get_count(enum intel_snb_id id, unsigned long long *val,
break;
default:
return -1;
- };
+ }
if (read_msr(cpu, msr, val))
return -1;
return 0;
diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README
index afe6beb40ad9..89d0a7dab4bc 100644
--- a/tools/power/pm-graph/README
+++ b/tools/power/pm-graph/README
@@ -6,7 +6,7 @@
|_| |___/ |_|
pm-graph: suspend/resume/boot timing analysis tools
- Version: 5.6
+ Version: 5.7
Author: Todd Brandt <todd.e.brandt@intel.com>
Home Page: https://01.org/pm-graph
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index 4f80ad7d7275..962e5768681c 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -125,7 +125,7 @@ acpi_suspend_begin:
suspend_console:
acpi_pm_prepare:
syscore_suspend:
-arch_enable_nonboot_cpus_end:
+arch_thaw_secondary_cpus_end:
syscore_resume:
acpi_pm_finish:
resume_console:
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 9b0404d10768..1bc36a1db14f 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -81,7 +81,7 @@ def ascii(text):
# store system values and test parameters
class SystemValues:
title = 'SleepGraph'
- version = '5.6'
+ version = '5.7'
ansi = False
rs = 0
display = ''
@@ -171,7 +171,7 @@ class SystemValues:
tracefuncs = {
'sys_sync': {},
'ksys_sync': {},
- '__pm_notifier_call_chain': {},
+ 'pm_notifier_call_chain_robust': {},
'pm_prepare_console': {},
'pm_notifier_call_chain': {},
'freeze_processes': {},
@@ -924,10 +924,7 @@ class SystemValues:
tp = TestProps()
tf = self.openlog(self.ftracefile, 'r')
for line in tf:
- # determine the trace data type (required for further parsing)
- m = re.match(tp.tracertypefmt, line)
- if(m):
- tp.setTracerType(m.group('t'))
+ if tp.stampInfo(line, self):
continue
# parse only valid lines, if this is not one move on
m = re.match(tp.ftrace_line_fmt, line)
@@ -1244,8 +1241,8 @@ class DevProps:
if self.xtraclass:
return ' '+self.xtraclass
if self.isasync:
- return ' async_device'
- return ' sync_device'
+ return ' (async)'
+ return ' (sync)'
# Class: DeviceNode
# Description:
@@ -1301,6 +1298,7 @@ class Data:
'FAIL' : r'(?i).*\bFAILED\b.*',
'INVALID' : r'(?i).*\bINVALID\b.*',
'CRASH' : r'(?i).*\bCRASHED\b.*',
+ 'TIMEOUT' : r'(?i).*\bTIMEOUT\b.*',
'IRQ' : r'.*\bgenirq: .*',
'TASKFAIL': r'.*Freezing of tasks *.*',
'ACPI' : r'.*\bACPI *(?P<b>[A-Za-z]*) *Error[: ].*',
@@ -1358,11 +1356,11 @@ class Data:
if self.dmesg[p]['order'] == order:
return p
return ''
- def lastPhase(self):
+ def lastPhase(self, depth=1):
plist = self.sortedPhases()
- if len(plist) < 1:
+ if len(plist) < depth:
return ''
- return plist[-1]
+ return plist[-1*depth]
def turbostatInfo(self):
tp = TestProps()
out = {'syslpi':'N/A','pkgpc10':'N/A'}
@@ -1382,9 +1380,12 @@ class Data:
if len(self.dmesgtext) < 1 and sysvals.dmesgfile:
lf = sysvals.openlog(sysvals.dmesgfile, 'r')
i = 0
+ tp = TestProps()
list = []
for line in lf:
i += 1
+ if tp.stampInfo(line, sysvals):
+ continue
m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
if not m:
continue
@@ -1400,15 +1401,15 @@ class Data:
list.append((msg, err, dir, t, i, i))
self.kerror = True
break
- msglist = []
+ tp.msglist = []
for msg, type, dir, t, idx1, idx2 in list:
- msglist.append(msg)
+ tp.msglist.append(msg)
self.errorinfo[dir].append((type, t, idx1, idx2))
if self.kerror:
sysvals.dmesglog = True
if len(self.dmesgtext) < 1 and sysvals.dmesgfile:
lf.close()
- return msglist
+ return tp
def setStart(self, time, msg=''):
self.start = time
if msg:
@@ -1623,6 +1624,8 @@ class Data:
if('src' in d):
for e in d['src']:
e.time = self.trimTimeVal(e.time, t0, dT, left)
+ e.end = self.trimTimeVal(e.end, t0, dT, left)
+ e.length = e.end - e.time
for dir in ['suspend', 'resume']:
list = []
for e in self.errorinfo[dir]:
@@ -1640,7 +1643,12 @@ class Data:
if tL > 0:
left = True if tR > tZero else False
self.trimTime(tS, tL, left)
- self.tLow.append('%.0f'%(tL*1000))
+ if 'trying' in self.dmesg[lp] and self.dmesg[lp]['trying'] >= 0.001:
+ tTry = round(self.dmesg[lp]['trying'] * 1000)
+ text = '%.0f (-%.0f waking)' % (tL * 1000, tTry)
+ else:
+ text = '%.0f' % (tL * 1000)
+ self.tLow.append(text)
lp = phase
def getMemTime(self):
if not self.hwstart or not self.hwend:
@@ -1776,7 +1784,7 @@ class Data:
length = -1.0
if(start >= 0 and end >= 0):
length = end - start
- if pid == -2:
+ if pid == -2 or name not in sysvals.tracefuncs.keys():
i = 2
origname = name
while(name in list):
@@ -1789,6 +1797,15 @@ class Data:
if color:
list[name]['color'] = color
return name
+ def findDevice(self, phase, name):
+ list = self.dmesg[phase]['list']
+ mydev = ''
+ for devname in sorted(list):
+ if name == devname or re.match('^%s\[(?P<num>[0-9]*)\]$' % name, devname):
+ mydev = devname
+ if mydev:
+ return list[mydev]
+ return False
def deviceChildren(self, devname, phase):
devlist = []
list = self.dmesg[phase]['list']
@@ -2779,6 +2796,7 @@ class TestProps:
testerrfmt = '^# enter_sleep_error (?P<e>.*)'
sysinfofmt = '^# sysinfo .*'
cmdlinefmt = '^# command \| (?P<cmd>.*)'
+ kparamsfmt = '^# kparams \| (?P<kp>.*)'
devpropfmt = '# Device Properties: .*'
pinfofmt = '# platform-(?P<val>[a-z,A-Z,0-9]*): (?P<info>.*)'
tracertypefmt = '# tracer: (?P<t>.*)'
@@ -2790,8 +2808,9 @@ class TestProps:
'[ +!#\*@$]*(?P<dur>[0-9\.]*) .*\| (?P<msg>.*)'
ftrace_line_fmt_nop = \
' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\
- '(?P<flags>.{4}) *(?P<time>[0-9\.]*): *'+\
+ '(?P<flags>\S*) *(?P<time>[0-9\.]*): *'+\
'(?P<msg>.*)'
+ machinesuspend = 'machine_suspend\[.*'
def __init__(self):
self.stamp = ''
self.sysinfo = ''
@@ -2812,16 +2831,13 @@ class TestProps:
self.ftrace_line_fmt = self.ftrace_line_fmt_nop
else:
doError('Invalid tracer format: [%s]' % tracer)
- def stampInfo(self, line):
+ def stampInfo(self, line, sv):
if re.match(self.stampfmt, line):
self.stamp = line
return True
elif re.match(self.sysinfofmt, line):
self.sysinfo = line
return True
- elif re.match(self.cmdlinefmt, line):
- self.cmdline = line
- return True
elif re.match(self.tstatfmt, line):
self.turbostat.append(line)
return True
@@ -2834,6 +2850,20 @@ class TestProps:
elif re.match(self.firmwarefmt, line):
self.fwdata.append(line)
return True
+ elif(re.match(self.devpropfmt, line)):
+ self.parseDevprops(line, sv)
+ return True
+ elif(re.match(self.pinfofmt, line)):
+ self.parsePlatformInfo(line, sv)
+ return True
+ m = re.match(self.cmdlinefmt, line)
+ if m:
+ self.cmdline = m.group('cmd')
+ return True
+ m = re.match(self.tracertypefmt, line)
+ if(m):
+ self.setTracerType(m.group('t'))
+ return True
return False
def parseStamp(self, data, sv):
# global test data
@@ -2858,9 +2888,13 @@ class TestProps:
data.stamp[key] = val
sv.hostname = data.stamp['host']
sv.suspendmode = data.stamp['mode']
+ if sv.suspendmode == 'freeze':
+ self.machinesuspend = 'timekeeping_freeze\[.*'
+ else:
+ self.machinesuspend = 'machine_suspend\[.*'
if sv.suspendmode == 'command' and sv.ftracefile != '':
modes = ['on', 'freeze', 'standby', 'mem', 'disk']
- fp = sysvals.openlog(sv.ftracefile, 'r')
+ fp = sv.openlog(sv.ftracefile, 'r')
for line in fp:
m = re.match('.* machine_suspend\[(?P<mode>.*)\]', line)
if m and m.group('mode') in ['1', '2', '3', '4']:
@@ -2868,9 +2902,7 @@ class TestProps:
data.stamp['mode'] = sv.suspendmode
break
fp.close()
- m = re.match(self.cmdlinefmt, self.cmdline)
- if m:
- sv.cmdline = m.group('cmd')
+ sv.cmdline = self.cmdline
if not sv.stamp:
sv.stamp = data.stamp
# firmware data
@@ -3052,20 +3084,7 @@ def appendIncompleteTraceLog(testruns):
for line in tf:
# remove any latent carriage returns
line = line.replace('\r\n', '')
- if tp.stampInfo(line):
- continue
- # determine the trace data type (required for further parsing)
- m = re.match(tp.tracertypefmt, line)
- if(m):
- tp.setTracerType(m.group('t'))
- continue
- # device properties line
- if(re.match(tp.devpropfmt, line)):
- tp.parseDevprops(line, sysvals)
- continue
- # platform info line
- if(re.match(tp.pinfofmt, line)):
- tp.parsePlatformInfo(line, sysvals)
+ if tp.stampInfo(line, sysvals):
continue
# parse only valid lines, if this is not one move on
m = re.match(tp.ftrace_line_fmt, line)
@@ -3166,33 +3185,19 @@ def parseTraceLog(live=False):
if sysvals.usekprobes:
tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON',
- 'CPU_OFF', 'timekeeping_freeze', 'acpi_suspend']
+ 'CPU_OFF', 'acpi_suspend']
# extract the callgraph and traceevent data
+ s2idle_enter = hwsus = False
tp = TestProps()
- testruns = []
- testdata = []
- testrun = 0
- data, limbo = 0, True
+ testruns, testdata = [], []
+ testrun, data, limbo = 0, 0, True
tf = sysvals.openlog(sysvals.ftracefile, 'r')
phase = 'suspend_prepare'
for line in tf:
# remove any latent carriage returns
line = line.replace('\r\n', '')
- if tp.stampInfo(line):
- continue
- # tracer type line: determine the trace data type
- m = re.match(tp.tracertypefmt, line)
- if(m):
- tp.setTracerType(m.group('t'))
- continue
- # device properties line
- if(re.match(tp.devpropfmt, line)):
- tp.parseDevprops(line, sysvals)
- continue
- # platform info line
- if(re.match(tp.pinfofmt, line)):
- tp.parsePlatformInfo(line, sysvals)
+ if tp.stampInfo(line, sysvals):
continue
# ignore all other commented lines
if line[0] == '#':
@@ -3303,16 +3308,29 @@ def parseTraceLog(live=False):
phase = data.setPhase('suspend_noirq', t.time, isbegin)
continue
# suspend_machine/resume_machine
- elif(re.match('machine_suspend\[.*', t.name)):
+ elif(re.match(tp.machinesuspend, t.name)):
+ lp = data.lastPhase()
if(isbegin):
- lp = data.lastPhase()
+ hwsus = True
if lp.startswith('resume_machine'):
- data.dmesg[lp]['end'] = t.time
+ # trim out s2idle loops, track time trying to freeze
+ llp = data.lastPhase(2)
+ if llp.startswith('suspend_machine'):
+ if 'trying' not in data.dmesg[llp]:
+ data.dmesg[llp]['trying'] = 0
+ data.dmesg[llp]['trying'] += \
+ t.time - data.dmesg[lp]['start']
+ data.currphase = ''
+ del data.dmesg[lp]
+ continue
phase = data.setPhase('suspend_machine', data.dmesg[lp]['end'], True)
data.setPhase(phase, t.time, False)
if data.tSuspended == 0:
data.tSuspended = t.time
else:
+ if lp.startswith('resume_machine'):
+ data.dmesg[lp]['end'] = t.time
+ continue
phase = data.setPhase('resume_machine', t.time, True)
if(sysvals.suspendmode in ['mem', 'disk']):
susp = phase.replace('resume', 'suspend')
@@ -3343,6 +3361,19 @@ def parseTraceLog(live=False):
# global events (outside device calls) are graphed
if(name not in testrun.ttemp):
testrun.ttemp[name] = []
+ # special handling for s2idle_enter
+ if name == 'machine_suspend':
+ if hwsus:
+ s2idle_enter = hwsus = False
+ elif s2idle_enter and not isbegin:
+ if(len(testrun.ttemp[name]) > 0):
+ testrun.ttemp[name][-1]['end'] = t.time
+ testrun.ttemp[name][-1]['loop'] += 1
+ elif not s2idle_enter and isbegin:
+ s2idle_enter = True
+ testrun.ttemp[name].append({'begin': t.time,
+ 'end': t.time, 'pid': pid, 'loop': 0})
+ continue
if(isbegin):
# create a new list entry
testrun.ttemp[name].append(\
@@ -3374,9 +3405,8 @@ def parseTraceLog(live=False):
if(not m):
continue
n = m.group('d')
- list = data.dmesg[phase]['list']
- if(n in list):
- dev = list[n]
+ dev = data.findDevice(phase, n)
+ if dev:
dev['length'] = t.time - dev['start']
dev['end'] = t.time
# kprobe event processing
@@ -3479,7 +3509,12 @@ def parseTraceLog(live=False):
# add actual trace funcs
for name in sorted(test.ttemp):
for event in test.ttemp[name]:
- data.newActionGlobal(name, event['begin'], event['end'], event['pid'])
+ if event['end'] - event['begin'] <= 0:
+ continue
+ title = name
+ if name == 'machine_suspend' and 'loop' in event:
+ title = 's2idle_enter_%dx' % event['loop']
+ data.newActionGlobal(title, event['begin'], event['end'], event['pid'])
# add the kprobe based virtual tracefuncs as actual devices
for key in sorted(tp.ktemp):
name, pid = key
@@ -3548,8 +3583,9 @@ def parseTraceLog(live=False):
for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
if p not in data.dmesg:
if not terr:
- pprint('TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp))
- terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp)
+ ph = p if 'machine' in p else lp
+ terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, ph)
+ pprint('TEST%s FAILED: %s' % (tn, terr))
error.append(terr)
if data.tSuspended == 0:
data.tSuspended = data.dmesg[lp]['end']
@@ -3611,7 +3647,7 @@ def loadKernelLog():
idx = line.find('[')
if idx > 1:
line = line[idx:]
- if tp.stampInfo(line):
+ if tp.stampInfo(line, sysvals):
continue
m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
if(not m):
@@ -3959,18 +3995,20 @@ def addCallgraphs(sv, hf, data):
if sv.cgphase and p != sv.cgphase:
continue
list = data.dmesg[p]['list']
- for devname in data.sortedDevices(p):
- if len(sv.cgfilter) > 0 and devname not in sv.cgfilter:
+ for d in data.sortedDevices(p):
+ if len(sv.cgfilter) > 0 and d not in sv.cgfilter:
continue
- dev = list[devname]
+ dev = list[d]
color = 'white'
if 'color' in data.dmesg[p]:
color = data.dmesg[p]['color']
if 'color' in dev:
color = dev['color']
- name = devname
- if(devname in sv.devprops):
- name = sv.devprops[devname].altName(devname)
+ name = d if '[' not in d else d.split('[')[0]
+ if(d in sv.devprops):
+ name = sv.devprops[d].altName(d)
+ if 'drv' in dev and dev['drv']:
+ name += ' {%s}' % dev['drv']
if sv.suspendmode in suspendmodename:
name += ' '+p
if('ftrace' in dev):
@@ -4517,12 +4555,9 @@ def createHTML(testruns, testfail):
# draw the devices for this phase
phaselist = data.dmesg[b]['list']
for d in sorted(data.tdevlist[b]):
- name = d
- drv = ''
- dev = phaselist[d]
- xtraclass = ''
- xtrainfo = ''
- xtrastyle = ''
+ dname = d if '[' not in d else d.split('[')[0]
+ name, dev = dname, phaselist[d]
+ drv = xtraclass = xtrainfo = xtrastyle = ''
if 'htmlclass' in dev:
xtraclass = dev['htmlclass']
if 'color' in dev:
@@ -4553,7 +4588,7 @@ def createHTML(testruns, testfail):
title += b
devtl.html += devtl.html_device.format(dev['id'], \
title, left, top, '%.3f'%rowheight, width, \
- d+drv, xtraclass, xtrastyle)
+ dname+drv, xtraclass, xtrastyle)
if('cpuexec' in dev):
for t in sorted(dev['cpuexec']):
start, end = t
@@ -4571,6 +4606,8 @@ def createHTML(testruns, testfail):
continue
# draw any trace events for this device
for e in dev['src']:
+ if e.length == 0:
+ continue
height = '%.3f' % devtl.rowH
top = '%.3f' % (rowtop + devtl.scaleH + (e.row*devtl.rowH))
left = '%f' % (((e.time-m0)*100)/mTotal)
@@ -5876,7 +5913,7 @@ def getArgFloat(name, args, min, max, main=True):
def processData(live=False, quiet=False):
if not quiet:
- pprint('PROCESSING DATA')
+ pprint('PROCESSING: %s' % sysvals.htmlfile)
sysvals.vprint('usetraceevents=%s, usetracemarkers=%s, usekprobes=%s' % \
(sysvals.usetraceevents, sysvals.usetracemarkers, sysvals.usekprobes))
error = ''
@@ -5928,7 +5965,7 @@ def processData(live=False, quiet=False):
sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
createHTML(testruns, error)
if not quiet:
- pprint('DONE')
+ pprint('DONE: %s' % sysvals.htmlfile)
data = testruns[0]
stamp = data.stamp
stamp['suspend'], stamp['resume'] = data.getTimeValues()
@@ -5984,25 +6021,27 @@ def runTest(n=0, quiet=False):
return 0
def find_in_html(html, start, end, firstonly=True):
- n, cnt, out = 0, len(html), []
- while n < cnt:
- e = cnt if (n + 10000 > cnt or n == 0) else n + 10000
- m = re.search(start, html[n:e])
- if not m:
- break
- i = m.end()
- m = re.search(end, html[n+i:e])
+ cnt, out, list = len(html), [], []
+ if firstonly:
+ m = re.search(start, html)
+ if m:
+ list.append(m)
+ else:
+ list = re.finditer(start, html)
+ for match in list:
+ s = match.end()
+ e = cnt if (len(out) < 1 or s + 10000 > cnt) else s + 10000
+ m = re.search(end, html[s:e])
if not m:
break
- j = m.start()
- str = html[n+i:n+i+j]
+ e = s + m.start()
+ str = html[s:e]
if end == 'ms':
num = re.search(r'[-+]?\d*\.\d+|\d+', str)
str = num.group() if num else 'NaN'
if firstonly:
return str
out.append(str)
- n += i+j
if firstonly:
return ''
return out
@@ -6034,7 +6073,7 @@ def data_from_html(file, outpath, issues, fulldetail=False):
else:
result = 'pass'
# extract error info
- ilist = []
+ tp, ilist = False, []
extra = dict()
log = find_in_html(html, '<div id="dmesglog" style="display:none;">',
'</div>').strip()
@@ -6042,8 +6081,8 @@ def data_from_html(file, outpath, issues, fulldetail=False):
d = Data(0)
d.end = 999999999
d.dmesgtext = log.split('\n')
- msglist = d.extractErrorInfo()
- for msg in msglist:
+ tp = d.extractErrorInfo()
+ for msg in tp.msglist:
sysvals.errorSummary(issues, msg)
if stmp[2] == 'freeze':
extra = d.turbostatInfo()
@@ -6059,8 +6098,8 @@ def data_from_html(file, outpath, issues, fulldetail=False):
if wifi:
extra['wifi'] = wifi
low = find_in_html(html, 'freeze time: <b>', ' ms</b>')
- if low and '|' in low:
- issue = 'FREEZEx%d' % len(low.split('|'))
+ if low and 'waking' in low:
+ issue = 'FREEZEWAKE'
match = [i for i in issues if i['match'] == issue]
if len(match) > 0:
match[0]['count'] += 1
@@ -6126,6 +6165,11 @@ def data_from_html(file, outpath, issues, fulldetail=False):
data[key] = extra[key]
if fulldetail:
data['funclist'] = find_in_html(html, '<div title="', '" class="traceevent"', False)
+ if tp:
+ for arg in ['-multi ', '-info ']:
+ if arg in tp.cmdline:
+ data['target'] = tp.cmdline[tp.cmdline.find(arg):].split()[1]
+ break
return data
def genHtml(subdir, force=False):
@@ -6155,8 +6199,7 @@ def runSummary(subdir, local=True, genhtml=False):
pprint('Generating a summary of folder:\n %s' % inpath)
if genhtml:
genHtml(subdir)
- issues = []
- testruns = []
+ target, issues, testruns = '', [], []
desc = {'host':[],'mode':[],'kernel':[]}
for dirname, dirnames, filenames in os.walk(subdir):
for filename in filenames:
@@ -6165,6 +6208,8 @@ def runSummary(subdir, local=True, genhtml=False):
data = data_from_html(os.path.join(dirname, filename), outpath, issues)
if(not data):
continue
+ if 'target' in data:
+ target = data['target']
testruns.append(data)
for key in desc:
if data[key] not in desc[key]:
@@ -6172,6 +6217,8 @@ def runSummary(subdir, local=True, genhtml=False):
pprint('Summary files:')
if len(desc['host']) == len(desc['mode']) == len(desc['kernel']) == 1:
title = '%s %s %s' % (desc['host'][0], desc['kernel'][0], desc['mode'][0])
+ if target:
+ title += ' %s' % target
else:
title = inpath
createHTMLSummarySimple(testruns, os.path.join(outpath, 'summary.html'), title)
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index b73763489410..cd089a505859 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.3";
+static const char *version_str = "v1.6";
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -25,7 +25,7 @@ static FILE *outf;
static int cpu_model;
static int cpu_stepping;
-#define MAX_CPUS_IN_ONE_REQ 64
+#define MAX_CPUS_IN_ONE_REQ 256
static short max_target_cpus;
static unsigned short target_cpus[MAX_CPUS_IN_ONE_REQ];
@@ -44,6 +44,9 @@ static int force_online_offline;
static int auto_mode;
static int fact_enable_fail;
+static int mbox_delay;
+static int mbox_retries = 3;
+
/* clos related */
static int current_clos = -1;
static int clos_epp = -1;
@@ -198,7 +201,7 @@ int out_format_is_json(void)
static int get_stored_topology_info(int cpu, int *core_id, int *pkg_id, int *die_id)
{
- const char *pathname = "/tmp/isst_cpu_topology.dat";
+ const char *pathname = "/var/run/isst_cpu_topology.dat";
struct cpu_topology cpu_top;
FILE *fp;
int ret;
@@ -230,7 +233,7 @@ err_ret:
static void store_cpu_topology(void)
{
- const char *pathname = "/tmp/isst_cpu_topology.dat";
+ const char *pathname = "/var/run/isst_cpu_topology.dat";
FILE *fp;
int i;
@@ -247,6 +250,8 @@ static void store_cpu_topology(void)
return;
}
+ fprintf(stderr, "Caching topology information\n");
+
for (i = 0; i < topo_max_cpus; ++i) {
struct cpu_topology cpu_top;
@@ -540,20 +545,23 @@ static void set_cpu_present_cpu_mask(void)
}
}
-int get_core_count(int pkg_id, int die_id)
+int get_max_punit_core_id(int pkg_id, int die_id)
{
- int cnt = 0;
+ int max_id = 0;
+ int i;
- if (pkg_id < MAX_PACKAGE_COUNT && die_id < MAX_DIE_PER_PACKAGE) {
- int i;
+ for (i = 0; i < topo_max_cpus; ++i)
+ {
+ if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask))
+ continue;
- for (i = 0; i < sizeof(long long) * 8; ++i) {
- if (core_mask[pkg_id][die_id] & (1ULL << i))
- cnt++;
- }
+ if (cpu_map[i].pkg_id == pkg_id &&
+ cpu_map[i].die_id == die_id &&
+ cpu_map[i].punit_cpu_core > max_id)
+ max_id = cpu_map[i].punit_cpu_core;
}
- return cnt;
+ return max_id;
}
int get_cpu_count(int pkg_id, int die_id)
@@ -653,7 +661,7 @@ void set_cpu_mask_from_punit_coremask(int cpu, unsigned long long core_mask,
pkg_id = get_physical_package_id(cpu);
for (i = 0; i < 64; ++i) {
- if (core_mask & BIT(i)) {
+ if (core_mask & BIT_ULL(i)) {
int j;
for (j = 0; j < topo_max_cpus; ++j) {
@@ -734,7 +742,7 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command,
unsigned int req_data, unsigned int *resp)
{
const char *pathname = "/dev/isst_interface";
- int fd;
+ int fd, retry;
struct isst_if_mbox_cmds mbox_cmds = { 0 };
debug_printf(
@@ -786,29 +794,42 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command,
mbox_cmds.mbox_cmd[0].parameter = parameter;
mbox_cmds.mbox_cmd[0].req_data = req_data;
+ if (mbox_delay)
+ usleep(mbox_delay * 1000);
+
fd = open(pathname, O_RDWR);
if (fd < 0)
err(-1, "%s open failed", pathname);
- if (ioctl(fd, ISST_IF_MBOX_COMMAND, &mbox_cmds) == -1) {
- if (errno == ENOTTY) {
- perror("ISST_IF_MBOX_COMMAND\n");
- fprintf(stderr, "Check presence of kernel modules: isst_if_mbox_pci or isst_if_mbox_msr\n");
- exit(0);
+ retry = mbox_retries;
+
+ do {
+ if (ioctl(fd, ISST_IF_MBOX_COMMAND, &mbox_cmds) == -1) {
+ if (errno == ENOTTY) {
+ perror("ISST_IF_MBOX_COMMAND\n");
+ fprintf(stderr, "Check presence of kernel modules: isst_if_mbox_pci or isst_if_mbox_msr\n");
+ exit(0);
+ }
+ debug_printf(
+ "Error: mbox_cmd cpu:%d command:%x sub_command:%x parameter:%x req_data:%x errorno:%d\n",
+ cpu, command, sub_command, parameter, req_data, errno);
+ --retry;
+ } else {
+ *resp = mbox_cmds.mbox_cmd[0].resp_data;
+ debug_printf(
+ "mbox_cmd response: cpu:%d command:%x sub_command:%x parameter:%x req_data:%x resp:%x\n",
+ cpu, command, sub_command, parameter, req_data, *resp);
+ break;
}
- debug_printf(
- "Error: mbox_cmd cpu:%d command:%x sub_command:%x parameter:%x req_data:%x errorno:%d\n",
- cpu, command, sub_command, parameter, req_data, errno);
- return -1;
- } else {
- *resp = mbox_cmds.mbox_cmd[0].resp_data;
- debug_printf(
- "mbox_cmd response: cpu:%d command:%x sub_command:%x parameter:%x req_data:%x resp:%x\n",
- cpu, command, sub_command, parameter, req_data, *resp);
- }
+ } while (retry);
close(fd);
+ if (!retry) {
+ debug_printf("Failed mbox command even after retries\n");
+ return -1;
+
+ }
return 0;
}
@@ -1169,6 +1190,7 @@ static void dump_clx_n_config_for_cpu(int cpu, void *arg1, void *arg2,
ctdp_level = &clx_n_pkg_dev.ctdp_level[0];
pbf_info = &ctdp_level->pbf_info;
+ clx_n_pkg_dev.processed = 1;
isst_ctdp_display_information(cpu, outf, tdp_level, &clx_n_pkg_dev);
free_cpu_set(ctdp_level->core_cpumask);
free_cpu_set(pbf_info->core_cpumask);
@@ -1244,7 +1266,11 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
fprintf(stderr, "Option is set to online/offline\n");
ctdp_level.core_cpumask_size =
alloc_cpu_set(&ctdp_level.core_cpumask);
- isst_get_coremask_info(cpu, tdp_level, &ctdp_level);
+ ret = isst_get_coremask_info(cpu, tdp_level, &ctdp_level);
+ if (ret) {
+ isst_display_error_info_message(1, "Can't get coremask, online/offline option is ignored", 0, 0);
+ return;
+ }
if (ctdp_level.cpu_count) {
int i, max_cpus = get_topo_max_cpus();
for (i = 0; i < max_cpus; ++i) {
@@ -1631,6 +1657,8 @@ static int set_pbf_core_power(int cpu)
static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
void *arg4)
{
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ struct isst_pkg_ctdp pkg_dev;
int ret;
int status = *(int *)arg4;
@@ -1646,6 +1674,24 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
goto disp_result;
}
+ ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+ goto disp_result;
+ }
+
+ ret = isst_get_ctdp_control(cpu, pkg_dev.current_level, &ctdp_level);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get current level", 0, 0);
+ goto disp_result;
+ }
+
+ if (!ctdp_level.pbf_support) {
+ isst_display_error_info_message(1, "base-freq feature is not present at this level", 1, pkg_dev.current_level);
+ ret = -1;
+ goto disp_result;
+ }
+
if (auto_mode && status) {
ret = set_pbf_core_power(cpu);
if (ret)
@@ -1772,10 +1818,30 @@ static void dump_fact_config(int arg)
static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
void *arg4)
{
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ struct isst_pkg_ctdp pkg_dev;
int ret;
int status = *(int *)arg4;
- if (auto_mode && status) {
+ ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+ goto disp_results;
+ }
+
+ ret = isst_get_ctdp_control(cpu, pkg_dev.current_level, &ctdp_level);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get current level", 0, 0);
+ goto disp_results;
+ }
+
+ if (!ctdp_level.fact_support) {
+ isst_display_error_info_message(1, "turbo-freq feature is not present at this level", 1, pkg_dev.current_level);
+ ret = -1;
+ goto disp_results;
+ }
+
+ if (status) {
ret = isst_pm_qos_config(cpu, 1, 1);
if (ret)
goto disp_results;
@@ -2552,6 +2618,8 @@ static void usage(void)
printf("\t[-i|--info] : Print platform information\n");
printf("\t[-o|--out] : Output file\n");
printf("\t\t\tDefault : stderr\n");
+ printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n");
+ printf("\t[-r|--retry] : Retry count for mail box commands on failure, default 3\n");
printf("\t[-v|--version] : Print version\n");
printf("\nResult format\n");
@@ -2583,6 +2651,7 @@ static void print_version(void)
static void cmdline(int argc, char **argv)
{
const char *pathname = "/dev/isst_interface";
+ char *ptr;
FILE *fp;
int opt;
int option_index = 0;
@@ -2594,7 +2663,9 @@ static void cmdline(int argc, char **argv)
{ "format", required_argument, 0, 'f' },
{ "help", no_argument, 0, 'h' },
{ "info", no_argument, 0, 'i' },
+ { "pause", required_argument, 0, 'p' },
{ "out", required_argument, 0, 'o' },
+ { "retry", required_argument, 0, 'r' },
{ "version", no_argument, 0, 'v' },
{ 0, 0, 0, 0 }
};
@@ -2647,6 +2718,20 @@ static void cmdline(int argc, char **argv)
fclose(outf);
outf = fopen_or_exit(optarg, "w");
break;
+ case 'p':
+ ret = strtol(optarg, &ptr, 10);
+ if (!ret)
+ fprintf(stderr, "Invalid pause interval, ignore\n");
+ else
+ mbox_delay = ret;
+ break;
+ case 'r':
+ ret = strtol(optarg, &ptr, 10);
+ if (!ret)
+ fprintf(stderr, "Invalid retry count, ignore\n");
+ else
+ mbox_retries = ret;
+ break;
case 'v':
print_version();
break;
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 67c9b1139631..1d7ecb54352e 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -396,7 +396,7 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
{
struct isst_pkg_ctdp_level_info ctdp_level;
struct isst_pkg_ctdp pkg_dev;
- int i, ret, core_cnt, max;
+ int i, ret, max_punit_core, max_mask_index;
unsigned int req, resp;
ret = isst_get_ctdp_levels(cpu, &pkg_dev);
@@ -421,10 +421,10 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask);
- core_cnt = get_core_count(get_physical_package_id(cpu), get_physical_die_id(cpu));
- max = core_cnt > 32 ? 2 : 1;
+ max_punit_core = get_max_punit_core_id(get_physical_package_id(cpu), get_physical_die_id(cpu));
+ max_mask_index = max_punit_core > 32 ? 2 : 1;
- for (i = 0; i < max; ++i) {
+ for (i = 0; i < max_mask_index; ++i) {
unsigned long long mask;
int count;
@@ -912,16 +912,16 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
return ret;
if (ctdp_level.fact_enabled) {
- debug_printf("Turbo-freq feature must be disabled first\n");
+ isst_display_error_info_message(1, "Ignoring request, turbo-freq feature is still enabled", 0, 0);
return -EINVAL;
}
ret = isst_write_pm_config(cpu, 0);
if (ret)
- isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0);
+ isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0);
} else {
ret = isst_write_pm_config(cpu, 1);
if (ret)
- isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0);
+ isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0);
}
ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0,
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 51dbaa5f02ec..e105fece47b6 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -316,21 +316,31 @@ void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
{
char header[256];
char value[256];
+ int level = 1;
+
+ if (out_format_is_json()) {
+ snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d",
+ get_physical_package_id(cpu), get_physical_die_id(cpu),
+ cpu);
+ format_and_print(outf, level++, header, NULL);
+ } else {
+ snprintf(header, sizeof(header), "package-%d",
+ get_physical_package_id(cpu));
+ format_and_print(outf, level++, header, NULL);
+ snprintf(header, sizeof(header), "die-%d",
+ get_physical_die_id(cpu));
+ format_and_print(outf, level++, header, NULL);
+ snprintf(header, sizeof(header), "cpu-%d", cpu);
+ format_and_print(outf, level++, header, NULL);
+ }
- snprintf(header, sizeof(header), "package-%d",
- get_physical_package_id(cpu));
- format_and_print(outf, 1, header, NULL);
- snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
- format_and_print(outf, 2, header, NULL);
- snprintf(header, sizeof(header), "cpu-%d", cpu);
- format_and_print(outf, 3, header, NULL);
if (str0 && !val)
snprintf(value, sizeof(value), "%s", str0);
else if (str1 && val)
snprintf(value, sizeof(value), "%s", str1);
else
snprintf(value, sizeof(value), "%u", val);
- format_and_print(outf, 4, prefix, value);
+ format_and_print(outf, level, prefix, value);
format_and_print(outf, 1, NULL, NULL);
}
@@ -470,7 +480,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
_isst_pbf_display_information(cpu, outf,
tdp_level,
&ctdp_level->pbf_info,
- level + 1);
+ level + 2);
continue;
}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 2e1afd856a78..29715e9c2e06 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -29,6 +29,7 @@
#include <sys/ioctl.h>
#define BIT(x) (1 << (x))
+#define BIT_ULL(nr) (1ULL << (nr))
#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))
#define GENMASK_ULL(h, l) \
(((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))
@@ -169,7 +170,7 @@ struct isst_pkg_ctdp {
extern int get_topo_max_cpus(void);
extern int get_cpu_count(int pkg_id, int die_id);
-extern int get_core_count(int pkg_id, int die_id);
+extern int get_max_punit_core_id(int pkg_id, int die_id);
/* Common interfaces */
FILE *get_output_file(void);
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
index 2249a1546cc1..ada881afb489 100644
--- a/tools/spi/Makefile
+++ b/tools/spi/Makefile
@@ -52,7 +52,9 @@ $(OUTPUT)spidev_fdx: $(SPIDEV_FDX_IN)
clean:
rm -f $(ALL_PROGRAMS)
rm -rf $(OUTPUT)include/
- find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '\.*.o.d' -delete
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '\.*.o.cmd' -delete
install: $(ALL_PROGRAMS)
install -d -m 755 $(DESTDIR)$(bindir); \
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index 27967dd90f8f..83844f8b862a 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -47,7 +47,7 @@ static int transfer_size;
static int iterations;
static int interval = 5; /* interval in seconds for showing transfer rate */
-uint8_t default_tx[] = {
+static uint8_t default_tx[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
@@ -56,8 +56,8 @@ uint8_t default_tx[] = {
0xF0, 0x0D,
};
-uint8_t default_rx[ARRAY_SIZE(default_tx)] = {0, };
-char *input_tx;
+static uint8_t default_rx[ARRAY_SIZE(default_tx)] = {0, };
+static char *input_tx;
static void hex_dump(const void *src, size_t length, size_t line_size,
char *prefix)
@@ -128,18 +128,22 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len)
.bits_per_word = bits,
};
- if (mode & SPI_TX_QUAD)
+ if (mode & SPI_TX_OCTAL)
+ tr.tx_nbits = 8;
+ else if (mode & SPI_TX_QUAD)
tr.tx_nbits = 4;
else if (mode & SPI_TX_DUAL)
tr.tx_nbits = 2;
- if (mode & SPI_RX_QUAD)
+ if (mode & SPI_RX_OCTAL)
+ tr.rx_nbits = 8;
+ else if (mode & SPI_RX_QUAD)
tr.rx_nbits = 4;
else if (mode & SPI_RX_DUAL)
tr.rx_nbits = 2;
if (!(mode & SPI_LOOP)) {
- if (mode & (SPI_TX_QUAD | SPI_TX_DUAL))
+ if (mode & (SPI_TX_OCTAL | SPI_TX_QUAD | SPI_TX_DUAL))
tr.rx_buf = 0;
- else if (mode & (SPI_RX_QUAD | SPI_RX_DUAL))
+ else if (mode & (SPI_RX_OCTAL | SPI_RX_QUAD | SPI_RX_DUAL))
tr.tx_buf = 0;
}
@@ -187,6 +191,7 @@ static void print_usage(const char *prog)
" -R --ready slave pulls low to pause\n"
" -2 --dual dual transfer\n"
" -4 --quad quad transfer\n"
+ " -8 --octal octal transfer\n"
" -S --size transfer size\n"
" -I --iter iterations\n");
exit(1);
@@ -213,13 +218,14 @@ static void parse_opts(int argc, char *argv[])
{ "dual", 0, 0, '2' },
{ "verbose", 0, 0, 'v' },
{ "quad", 0, 0, '4' },
+ { "octal", 0, 0, '8' },
{ "size", 1, 0, 'S' },
{ "iter", 1, 0, 'I' },
{ NULL, 0, 0, 0 },
};
int c;
- c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR24p:vS:I:",
+ c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR248p:vS:I:",
lopts, NULL);
if (c == -1)
@@ -280,6 +286,9 @@ static void parse_opts(int argc, char *argv[])
case '4':
mode |= SPI_TX_QUAD;
break;
+ case '8':
+ mode |= SPI_TX_OCTAL;
+ break;
case 'S':
transfer_size = atoi(optarg);
break;
@@ -295,6 +304,8 @@ static void parse_opts(int argc, char *argv[])
mode |= SPI_RX_DUAL;
if (mode & SPI_TX_QUAD)
mode |= SPI_RX_QUAD;
+ if (mode & SPI_TX_OCTAL)
+ mode |= SPI_RX_OCTAL;
}
}
@@ -450,8 +461,8 @@ int main(int argc, char *argv[])
pabort("can't get max speed hz");
printf("spi mode: 0x%x\n", mode);
- printf("bits per word: %d\n", bits);
- printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
+ printf("bits per word: %u\n", bits);
+ printf("max speed: %u Hz (%u kHz)\n", speed, speed/1000);
if (input_tx)
transfer_escaped_string(fd, input_tx);
diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README
index a12d295a09d8..4f048789b260 100644
--- a/tools/testing/ktest/examples/README
+++ b/tools/testing/ktest/examples/README
@@ -11,7 +11,7 @@ crosstests.conf - this config shows an example of testing a git repo against
lots of different architectures. It only does build tests, but makes
it easy to compile test different archs. You can download the arch
cross compilers from:
- http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+ https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
test.conf - A generic example of a config. This is based on an actual config
used to perform real testing.
diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf
index 6907f32590b2..3b15e85f26bd 100644
--- a/tools/testing/ktest/examples/crosstests.conf
+++ b/tools/testing/ktest/examples/crosstests.conf
@@ -3,7 +3,7 @@
#
# In this config, it is expected that the tool chains from:
#
-# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+# https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
#
# running on a x86_64 system have been downloaded and installed into:
#
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 7570e36d636d..cb16d2aac51c 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -11,6 +11,7 @@ use File::Path qw(mkpath);
use File::Copy qw(cp);
use FileHandle;
use FindBin;
+use IO::Handle;
my $VERSION = "0.2";
@@ -81,6 +82,8 @@ my %default = (
"IGNORE_UNUSED" => 0,
);
+my $test_log_start = 0;
+
my $ktest_config = "ktest.conf";
my $version;
my $have_version = 0;
@@ -98,6 +101,7 @@ my $final_post_ktest;
my $pre_ktest;
my $post_ktest;
my $pre_test;
+my $pre_test_die;
my $post_test;
my $pre_build;
my $post_build;
@@ -223,6 +227,7 @@ my $dirname = $FindBin::Bin;
my $mailto;
my $mailer;
my $mail_path;
+my $mail_max_size;
my $mail_command;
my $email_on_error;
my $email_when_finished;
@@ -259,6 +264,7 @@ my %option_map = (
"MAILTO" => \$mailto,
"MAILER" => \$mailer,
"MAIL_PATH" => \$mail_path,
+ "MAIL_MAX_SIZE" => \$mail_max_size,
"MAIL_COMMAND" => \$mail_command,
"EMAIL_ON_ERROR" => \$email_on_error,
"EMAIL_WHEN_FINISHED" => \$email_when_finished,
@@ -273,6 +279,7 @@ my %option_map = (
"PRE_KTEST" => \$pre_ktest,
"POST_KTEST" => \$post_ktest,
"PRE_TEST" => \$pre_test,
+ "PRE_TEST_DIE" => \$pre_test_die,
"POST_TEST" => \$post_test,
"BUILD_TYPE" => \$build_type,
"BUILD_OPTIONS" => \$build_options,
@@ -507,9 +514,7 @@ EOF
sub _logit {
if (defined($opt{"LOG_FILE"})) {
- open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
- print OUT @_;
- close(OUT);
+ print LOG @_;
}
}
@@ -909,6 +914,12 @@ sub process_expression {
}
}
+ if ($val =~ s/^\s*NOT\s+(.*)//) {
+ my $express = $1;
+ my $ret = process_expression($name, $express);
+ return !$ret;
+ }
+
if ($val =~ /^\s*0\s*$/) {
return 0;
} elsif ($val =~ /^\s*\d+\s*$/) {
@@ -1485,8 +1496,32 @@ sub dodie {
if ($email_on_error) {
my $name = get_test_name;
+ my $log_file;
+
+ if (defined($opt{"LOG_FILE"})) {
+ my $whence = 0; # beginning of file
+ my $pos = $test_log_start;
+
+ if (defined($mail_max_size)) {
+ my $log_size = tell LOG;
+ $log_size -= $test_log_start;
+ if ($log_size > $mail_max_size) {
+ $whence = 2; # end of file
+ $pos = - $mail_max_size;
+ }
+ }
+ $log_file = "$tmpdir/log";
+ open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)";
+ open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n";
+ seek(L, $pos, $whence);
+ while (<L>) {
+ print O;
+ }
+ close O;
+ close L;
+ }
send_email("KTEST: critical failure for test $i [$name]",
- "Your test started at $script_start_time has failed with:\n@_\n");
+ "Your test started at $script_start_time has failed with:\n@_\n", $log_file);
}
if ($monitor_cnt) {
@@ -1508,7 +1543,7 @@ sub create_pty {
my $TIOCGPTN = 0x80045430;
sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or
- dodie "Cant open /dev/ptmx";
+ dodie "Can't open /dev/ptmx";
# unlockpt()
$tmp = pack("i", 0);
@@ -1772,8 +1807,6 @@ sub run_command {
(fail "unable to exec $command" and return 0);
if (defined($opt{"LOG_FILE"})) {
- open(LOG, ">>$opt{LOG_FILE}") or
- dodie "failed to write to log";
$dolog = 1;
}
@@ -1821,7 +1854,6 @@ sub run_command {
}
close(CMD);
- close(LOG) if ($dolog);
close(RD) if ($dord);
$end_time = time;
@@ -3188,6 +3220,8 @@ sub config_bisect_end {
doprint "***************************************\n\n";
}
+my $pass = 1;
+
sub run_config_bisect {
my ($good, $bad, $last_result) = @_;
my $reset = "";
@@ -3210,11 +3244,15 @@ sub run_config_bisect {
$ret = run_config_bisect_test $config_bisect_type;
if ($ret) {
- doprint "NEW GOOD CONFIG\n";
+ doprint "NEW GOOD CONFIG ($pass)\n";
+ system("cp $output_config $tmpdir/good_config.tmp.$pass");
+ $pass++;
# Return 3 for good config
return 3;
} else {
- doprint "NEW BAD CONFIG\n";
+ doprint "NEW BAD CONFIG ($pass)\n";
+ system("cp $output_config $tmpdir/bad_config.tmp.$pass");
+ $pass++;
# Return 4 for bad config
return 4;
}
@@ -4077,8 +4115,12 @@ if ($#new_configs >= 0) {
}
}
-if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
- unlink $opt{"LOG_FILE"};
+if (defined($opt{"LOG_FILE"})) {
+ if ($opt{"CLEAR_LOG"}) {
+ unlink $opt{"LOG_FILE"};
+ }
+ open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+ LOG->autoflush(1);
}
doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
@@ -4171,7 +4213,7 @@ sub find_mailer {
}
sub do_send_mail {
- my ($subject, $message) = @_;
+ my ($subject, $message, $file) = @_;
if (!defined($mail_path)) {
# find the mailer
@@ -4181,16 +4223,30 @@ sub do_send_mail {
}
}
+ my $header_file = "$tmpdir/header";
+ open (HEAD, ">$header_file") or die "Can not create $header_file\n";
+ print HEAD "To: $mailto\n";
+ print HEAD "Subject: $subject\n\n";
+ print HEAD "$message\n";
+ close HEAD;
+
if (!defined($mail_command)) {
if ($mailer eq "mail" || $mailer eq "mailx") {
- $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+ $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO";
} elsif ($mailer eq "sendmail" ) {
- $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+ $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -t \$MAILTO";
} else {
die "\nYour mailer: $mailer is not supported.\n";
}
}
+ if (defined($file)) {
+ $mail_command =~ s/\$BODY_FILE/$file/g;
+ } else {
+ $mail_command =~ s/\$BODY_FILE//g;
+ }
+
+ $mail_command =~ s/\$HEADER_FILE/$header_file/g;
$mail_command =~ s/\$MAILER/$mailer/g;
$mail_command =~ s/\$MAIL_PATH/$mail_path/g;
$mail_command =~ s/\$MAILTO/$mailto/g;
@@ -4338,10 +4394,19 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
}
doprint "\n\n";
+
+ if (defined($opt{"LOG_FILE"})) {
+ $test_log_start = tell(LOG);
+ }
+
doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
if (defined($pre_test)) {
- run_command $pre_test;
+ my $ret = run_command $pre_test;
+ if (!$ret && defined($pre_test_die) &&
+ $pre_test_die) {
+ dodie "failed to pre_test\n";
+ }
}
unlink $dmesg;
@@ -4441,4 +4506,10 @@ if ($email_when_finished) {
send_email("KTEST: Your test has finished!",
"$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
}
+
+if (defined($opt{"LOG_FILE"})) {
+ print "\n See $opt{LOG_FILE} for the record of results.\n\n";
+ close LOG;
+}
+
exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 27666b8007ed..5e7d1d729752 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -442,6 +442,19 @@
# Users can cancel the test by Ctrl^C
# (default 0)
#EMAIL_WHEN_CANCELED = 1
+#
+# If a test ends with an error and EMAIL_ON_ERROR is set as well
+# as a LOG_FILE is defined, then the log of the failing test will
+# be included in the email that is sent.
+# It is possible that the log may be very large, in which case,
+# only the last amount of the log should be sent. To limit how
+# much of the log is sent, set MAIL_MAX_SIZE. This will be the
+# size in bytes of the last portion of the log of the failed
+# test file. That is, if this is set to 100000, then only the
+# last 100 thousand bytes of the log file will be included in
+# the email.
+# (default undef)
+#MAIL_MAX_SIZE = 1000000
# Start a test setup. If you leave this off, all options
# will be default and the test will run once.
@@ -557,6 +570,11 @@
# default (undefined)
#PRE_TEST = ${SSH} reboot_to_special_kernel
+# To kill the entire test if PRE_TEST is defined but fails set this
+# to 1.
+# (default 0)
+#PRE_TEST_DIE = 1
+
# If there is a command you want to run after the individual test case
# completes, then you can set this option.
#
diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config
index 239b9f03da2c..a7f0603d33f6 100644
--- a/tools/testing/kunit/configs/broken_on_uml.config
+++ b/tools/testing/kunit/configs/broken_on_uml.config
@@ -39,3 +39,4 @@
# CONFIG_QCOM_CPR is not set
# CONFIG_RESET_BRCMSTB_RESCAL is not set
# CONFIG_RESET_INTEL_GW is not set
+# CONFIG_ADI_AXI_ADC is not set
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 7dca74774dd2..ebf5f5763dee 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -17,14 +17,24 @@ from collections import namedtuple
from enum import Enum, auto
import kunit_config
+import kunit_json
import kunit_kernel
import kunit_parser
-KunitResult = namedtuple('KunitResult', ['status','result'])
+KunitResult = namedtuple('KunitResult', ['status','result','elapsed_time'])
+KunitConfigRequest = namedtuple('KunitConfigRequest',
+ ['build_dir', 'make_options'])
+KunitBuildRequest = namedtuple('KunitBuildRequest',
+ ['jobs', 'build_dir', 'alltests',
+ 'make_options'])
+KunitExecRequest = namedtuple('KunitExecRequest',
+ ['timeout', 'build_dir', 'alltests'])
+KunitParseRequest = namedtuple('KunitParseRequest',
+ ['raw_output', 'input_data', 'build_dir', 'json'])
KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
- 'build_dir', 'defconfig',
- 'alltests', 'make_options'])
+ 'build_dir', 'alltests', 'json',
+ 'make_options'])
KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
@@ -46,14 +56,24 @@ def get_kernel_root_path():
sys.exit(1)
return parts[0]
-def run_tests(linux: kunit_kernel.LinuxSourceTree,
- request: KunitRequest) -> KunitResult:
+def config_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitConfigRequest) -> KunitResult:
+ kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...')
+
config_start = time.time()
+ create_default_kunitconfig()
success = linux.build_reconfig(request.build_dir, request.make_options)
config_end = time.time()
if not success:
- return KunitResult(KunitStatus.CONFIG_FAILURE, 'could not configure kernel')
+ return KunitResult(KunitStatus.CONFIG_FAILURE,
+ 'could not configure kernel',
+ config_end - config_start)
+ return KunitResult(KunitStatus.SUCCESS,
+ 'configured kernel successfully',
+ config_end - config_start)
+def build_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitBuildRequest) -> KunitResult:
kunit_parser.print_with_timestamp('Building KUnit Kernel ...')
build_start = time.time()
@@ -63,87 +83,186 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
request.make_options)
build_end = time.time()
if not success:
- return KunitResult(KunitStatus.BUILD_FAILURE, 'could not build kernel')
+ return KunitResult(KunitStatus.BUILD_FAILURE,
+ 'could not build kernel',
+ build_end - build_start)
+ if not success:
+ return KunitResult(KunitStatus.BUILD_FAILURE,
+ 'could not build kernel',
+ build_end - build_start)
+ return KunitResult(KunitStatus.SUCCESS,
+ 'built kernel successfully',
+ build_end - build_start)
+def exec_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitExecRequest) -> KunitResult:
kunit_parser.print_with_timestamp('Starting KUnit Kernel ...')
test_start = time.time()
- kunit_output = linux.run_kernel(
+ result = linux.run_kernel(
timeout=None if request.alltests else request.timeout,
build_dir=request.build_dir)
+
+ test_end = time.time()
+
+ return KunitResult(KunitStatus.SUCCESS,
+ result,
+ test_end - test_start)
+
+def parse_tests(request: KunitParseRequest) -> KunitResult:
+ parse_start = time.time()
+
+ test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS,
+ [],
+ 'Tests not Parsed.')
+
if request.raw_output:
- raw_output = kunit_parser.raw_output(kunit_output)
- isolated = list(kunit_parser.isolate_kunit_output(raw_output))
- test_result = kunit_parser.parse_test_result(isolated)
+ kunit_parser.raw_output(request.input_data)
else:
- test_result = kunit_parser.parse_run_tests(kunit_output)
- test_end = time.time()
+ test_result = kunit_parser.parse_run_tests(request.input_data)
+ parse_end = time.time()
+
+ if request.json:
+ json_obj = kunit_json.get_json_result(
+ test_result=test_result,
+ def_config='kunit_defconfig',
+ build_dir=request.build_dir,
+ json_path=request.json)
+ if request.json == 'stdout':
+ print(json_obj)
+
+ if test_result.status != kunit_parser.TestStatus.SUCCESS:
+ return KunitResult(KunitStatus.TEST_FAILURE, test_result,
+ parse_end - parse_start)
+
+ return KunitResult(KunitStatus.SUCCESS, test_result,
+ parse_end - parse_start)
+
+
+def run_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitRequest) -> KunitResult:
+ run_start = time.time()
+
+ config_request = KunitConfigRequest(request.build_dir,
+ request.make_options)
+ config_result = config_tests(linux, config_request)
+ if config_result.status != KunitStatus.SUCCESS:
+ return config_result
+
+ build_request = KunitBuildRequest(request.jobs, request.build_dir,
+ request.alltests,
+ request.make_options)
+ build_result = build_tests(linux, build_request)
+ if build_result.status != KunitStatus.SUCCESS:
+ return build_result
+
+ exec_request = KunitExecRequest(request.timeout, request.build_dir,
+ request.alltests)
+ exec_result = exec_tests(linux, exec_request)
+ if exec_result.status != KunitStatus.SUCCESS:
+ return exec_result
+
+ parse_request = KunitParseRequest(request.raw_output,
+ exec_result.result,
+ request.build_dir,
+ request.json)
+ parse_result = parse_tests(parse_request)
+
+ run_end = time.time()
kunit_parser.print_with_timestamp((
'Elapsed time: %.3fs total, %.3fs configuring, %.3fs ' +
'building, %.3fs running\n') % (
- test_end - config_start,
- config_end - config_start,
- build_end - build_start,
- test_end - test_start))
+ run_end - run_start,
+ config_result.elapsed_time,
+ build_result.elapsed_time,
+ exec_result.elapsed_time))
+ return parse_result
- if test_result.status != kunit_parser.TestStatus.SUCCESS:
- return KunitResult(KunitStatus.TEST_FAILURE, test_result)
- else:
- return KunitResult(KunitStatus.SUCCESS, test_result)
+def add_common_opts(parser):
+ parser.add_argument('--build_dir',
+ help='As in the make command, it specifies the build '
+ 'directory.',
+ type=str, default='.kunit', metavar='build_dir')
+ parser.add_argument('--make_options',
+ help='X=Y make option, can be repeated.',
+ action='append')
+ parser.add_argument('--alltests',
+ help='Run all KUnit tests through allyesconfig',
+ action='store_true')
+
+def add_build_opts(parser):
+ parser.add_argument('--jobs',
+ help='As in the make command, "Specifies the number of '
+ 'jobs (commands) to run simultaneously."',
+ type=int, default=8, metavar='jobs')
+
+def add_exec_opts(parser):
+ parser.add_argument('--timeout',
+ help='maximum number of seconds to allow for all tests '
+ 'to run. This does not include time taken to build the '
+ 'tests.',
+ type=int,
+ default=300,
+ metavar='timeout')
+
+def add_parse_opts(parser):
+ parser.add_argument('--raw_output', help='don\'t format output from kernel',
+ action='store_true')
+ parser.add_argument('--json',
+ nargs='?',
+ help='Stores test results in a JSON, and either '
+ 'prints to stdout or saves to file if a '
+ 'filename is specified',
+ type=str, const='stdout', default=None)
def main(argv, linux=None):
parser = argparse.ArgumentParser(
description='Helps writing and running KUnit tests.')
subparser = parser.add_subparsers(dest='subcommand')
+ # The 'run' command will config, build, exec, and parse in one go.
run_parser = subparser.add_parser('run', help='Runs KUnit tests.')
- run_parser.add_argument('--raw_output', help='don\'t format output from kernel',
- action='store_true')
-
- run_parser.add_argument('--timeout',
- help='maximum number of seconds to allow for all tests '
- 'to run. This does not include time taken to build the '
- 'tests.',
- type=int,
- default=300,
- metavar='timeout')
-
- run_parser.add_argument('--jobs',
- help='As in the make command, "Specifies the number of '
- 'jobs (commands) to run simultaneously."',
- type=int, default=8, metavar='jobs')
-
- run_parser.add_argument('--build_dir',
- help='As in the make command, it specifies the build '
- 'directory.',
- type=str, default='', metavar='build_dir')
-
- run_parser.add_argument('--defconfig',
- help='Uses a default .kunitconfig.',
- action='store_true')
-
- run_parser.add_argument('--alltests',
- help='Run all KUnit tests through allyesconfig',
- action='store_true')
-
- run_parser.add_argument('--make_options',
- help='X=Y make option, can be repeated.',
- action='append')
+ add_common_opts(run_parser)
+ add_build_opts(run_parser)
+ add_exec_opts(run_parser)
+ add_parse_opts(run_parser)
+
+ config_parser = subparser.add_parser('config',
+ help='Ensures that .config contains all of '
+ 'the options in .kunitconfig')
+ add_common_opts(config_parser)
+
+ build_parser = subparser.add_parser('build', help='Builds a kernel with KUnit tests')
+ add_common_opts(build_parser)
+ add_build_opts(build_parser)
+
+ exec_parser = subparser.add_parser('exec', help='Run a kernel with KUnit tests')
+ add_common_opts(exec_parser)
+ add_exec_opts(exec_parser)
+ add_parse_opts(exec_parser)
+
+ # The 'parse' option is special, as it doesn't need the kernel source
+ # (therefore there is no need for a build_dir, hence no add_common_opts)
+ # and the '--file' argument is not relevant to 'run', so isn't in
+ # add_parse_opts()
+ parse_parser = subparser.add_parser('parse',
+ help='Parses KUnit results from a file, '
+ 'and parses formatted results.')
+ add_parse_opts(parse_parser)
+ parse_parser.add_argument('file',
+ help='Specifies the file to read results from.',
+ type=str, nargs='?', metavar='input_file')
cli_args = parser.parse_args(argv)
+ if get_kernel_root_path():
+ os.chdir(get_kernel_root_path())
+
if cli_args.subcommand == 'run':
- if get_kernel_root_path():
- os.chdir(get_kernel_root_path())
-
- if cli_args.build_dir:
- if not os.path.exists(cli_args.build_dir):
- os.mkdir(cli_args.build_dir)
- kunit_kernel.kunitconfig_path = os.path.join(
- cli_args.build_dir,
- kunit_kernel.kunitconfig_path)
+ if not os.path.exists(cli_args.build_dir):
+ os.mkdir(cli_args.build_dir)
- if cli_args.defconfig:
+ if not os.path.exists(kunit_kernel.kunitconfig_path):
create_default_kunitconfig()
if not linux:
@@ -153,12 +272,76 @@ def main(argv, linux=None):
cli_args.timeout,
cli_args.jobs,
cli_args.build_dir,
- cli_args.defconfig,
cli_args.alltests,
+ cli_args.json,
cli_args.make_options)
result = run_tests(linux, request)
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
+ elif cli_args.subcommand == 'config':
+ if cli_args.build_dir and (
+ not os.path.exists(cli_args.build_dir)):
+ os.mkdir(cli_args.build_dir)
+
+ if not os.path.exists(kunit_kernel.kunitconfig_path):
+ create_default_kunitconfig()
+
+ if not linux:
+ linux = kunit_kernel.LinuxSourceTree()
+
+ request = KunitConfigRequest(cli_args.build_dir,
+ cli_args.make_options)
+ result = config_tests(linux, request)
+ kunit_parser.print_with_timestamp((
+ 'Elapsed time: %.3fs\n') % (
+ result.elapsed_time))
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+ elif cli_args.subcommand == 'build':
+ if not linux:
+ linux = kunit_kernel.LinuxSourceTree()
+
+ request = KunitBuildRequest(cli_args.jobs,
+ cli_args.build_dir,
+ cli_args.alltests,
+ cli_args.make_options)
+ result = build_tests(linux, request)
+ kunit_parser.print_with_timestamp((
+ 'Elapsed time: %.3fs\n') % (
+ result.elapsed_time))
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+ elif cli_args.subcommand == 'exec':
+ if not linux:
+ linux = kunit_kernel.LinuxSourceTree()
+
+ exec_request = KunitExecRequest(cli_args.timeout,
+ cli_args.build_dir,
+ cli_args.alltests)
+ exec_result = exec_tests(linux, exec_request)
+ parse_request = KunitParseRequest(cli_args.raw_output,
+ exec_result.result,
+ cli_args.build_dir,
+ cli_args.json)
+ result = parse_tests(parse_request)
+ kunit_parser.print_with_timestamp((
+ 'Elapsed time: %.3fs\n') % (
+ exec_result.elapsed_time))
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+ elif cli_args.subcommand == 'parse':
+ if cli_args.file == None:
+ kunit_output = sys.stdin
+ else:
+ with open(cli_args.file, 'r') as f:
+ kunit_output = f.read().splitlines()
+ request = KunitParseRequest(cli_args.raw_output,
+ kunit_output,
+ cli_args.build_dir,
+ cli_args.json)
+ result = parse_tests(request)
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
else:
parser.print_help()
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index e75063d603b5..02ffc3a3e5dc 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -10,7 +10,7 @@ import collections
import re
CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$'
-CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+)$'
+CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$'
KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value'])
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
new file mode 100644
index 000000000000..624b31b2dbd6
--- /dev/null
+++ b/tools/testing/kunit/kunit_json.py
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generates JSON from KUnit results according to
+# KernelCI spec: https://github.com/kernelci/kernelci-doc/wiki/Test-API
+#
+# Copyright (C) 2020, Google LLC.
+# Author: Heidi Fahim <heidifahim@google.com>
+
+import json
+import os
+
+import kunit_parser
+
+from kunit_parser import TestStatus
+
+def get_json_result(test_result, def_config, build_dir, json_path):
+ sub_groups = []
+
+ # Each test suite is mapped to a KernelCI sub_group
+ for test_suite in test_result.suites:
+ sub_group = {
+ "name": test_suite.name,
+ "arch": "UM",
+ "defconfig": def_config,
+ "build_environment": build_dir,
+ "test_cases": [],
+ "lab_name": None,
+ "kernel": None,
+ "job": None,
+ "git_branch": "kselftest",
+ }
+ test_cases = []
+ # TODO: Add attachments attribute in test_case with detailed
+ # failure message, see https://api.kernelci.org/schema-test-case.html#get
+ for case in test_suite.cases:
+ test_case = {"name": case.name, "status": "FAIL"}
+ if case.status == TestStatus.SUCCESS:
+ test_case["status"] = "PASS"
+ elif case.status == TestStatus.TEST_CRASHED:
+ test_case["status"] = "ERROR"
+ test_cases.append(test_case)
+ sub_group["test_cases"] = test_cases
+ sub_groups.append(sub_group)
+ test_group = {
+ "name": "KUnit Test Group",
+ "arch": "UM",
+ "defconfig": def_config,
+ "build_environment": build_dir,
+ "sub_groups": sub_groups,
+ "lab_name": None,
+ "kernel": None,
+ "job": None,
+ "git_branch": "kselftest",
+ }
+ json_obj = json.dumps(test_group, indent=4)
+ if json_path != 'stdout':
+ with open(json_path, 'w') as result_path:
+ result_path.write(json_obj)
+ root = __file__.split('tools/testing/kunit/')[0]
+ kunit_parser.print_with_timestamp(
+ "Test results stored in %s" %
+ os.path.join(root, result_path.name))
+ return json_obj
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 63dbda2d029f..b557b1e93f98 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -34,11 +34,11 @@ class LinuxSourceTreeOperations(object):
def make_mrproper(self):
try:
- subprocess.check_output(['make', 'mrproper'])
+ subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT)
except OSError as e:
- raise ConfigError('Could not call make command: ' + e)
+ raise ConfigError('Could not call make command: ' + str(e))
except subprocess.CalledProcessError as e:
- raise ConfigError(e.output)
+ raise ConfigError(e.output.decode())
def make_olddefconfig(self, build_dir, make_options):
command = ['make', 'ARCH=um', 'olddefconfig']
@@ -47,24 +47,29 @@ class LinuxSourceTreeOperations(object):
if build_dir:
command += ['O=' + build_dir]
try:
- subprocess.check_output(command, stderr=subprocess.PIPE)
+ subprocess.check_output(command, stderr=subprocess.STDOUT)
except OSError as e:
- raise ConfigError('Could not call make command: ' + e)
+ raise ConfigError('Could not call make command: ' + str(e))
except subprocess.CalledProcessError as e:
- raise ConfigError(e.output)
+ raise ConfigError(e.output.decode())
- def make_allyesconfig(self):
+ def make_allyesconfig(self, build_dir, make_options):
kunit_parser.print_with_timestamp(
'Enabling all CONFIGs for UML...')
+ command = ['make', 'ARCH=um', 'allyesconfig']
+ if make_options:
+ command.extend(make_options)
+ if build_dir:
+ command += ['O=' + build_dir]
process = subprocess.Popen(
- ['make', 'ARCH=um', 'allyesconfig'],
+ command,
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT)
process.wait()
kunit_parser.print_with_timestamp(
'Disabling broken configs to run KUnit tests...')
with ExitStack() as es:
- config = open(KCONFIG_PATH, 'a')
+ config = open(get_kconfig_path(build_dir), 'a')
disable = open(BROKEN_ALLCONFIG_PATH, 'r').read()
config.write(disable)
kunit_parser.print_with_timestamp(
@@ -77,11 +82,11 @@ class LinuxSourceTreeOperations(object):
if build_dir:
command += ['O=' + build_dir]
try:
- subprocess.check_output(command)
+ subprocess.check_output(command, stderr=subprocess.STDOUT)
except OSError as e:
- raise BuildError('Could not call execute make: ' + e)
+ raise BuildError('Could not call execute make: ' + str(e))
except subprocess.CalledProcessError as e:
- raise BuildError(e.output)
+ raise BuildError(e.output.decode())
def linux_bin(self, params, timeout, build_dir, outfile):
"""Runs the Linux UML binary. Must be named 'linux'."""
@@ -161,9 +166,9 @@ class LinuxSourceTree(object):
return self.build_config(build_dir, make_options)
def build_um_kernel(self, alltests, jobs, build_dir, make_options):
- if alltests:
- self._ops.make_allyesconfig()
try:
+ if alltests:
+ self._ops.make_allyesconfig(build_dir, make_options)
self._ops.make_olddefconfig(build_dir, make_options)
self._ops.make(jobs, build_dir, make_options)
except (ConfigError, BuildError) as e:
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 64aac9dcd431..8019e3dd4c32 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -45,10 +45,11 @@ class TestStatus(Enum):
FAILURE = auto()
TEST_CRASHED = auto()
NO_TESTS = auto()
+ FAILURE_TO_PARSE_TESTS = auto()
kunit_start_re = re.compile(r'TAP version [0-9]+$')
kunit_end_re = re.compile('(List of all partitions:|'
- 'Kernel panic - not syncing: VFS:|reboot: System halted)')
+ 'Kernel panic - not syncing: VFS:)')
def isolate_kunit_output(kernel_output):
started = False
@@ -109,7 +110,7 @@ OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text'])
OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$')
-OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$')
+OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$')
def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool:
save_non_diagnositic(lines, test_case)
@@ -197,7 +198,9 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus:
else:
return TestStatus.SUCCESS
-def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool:
+def parse_ok_not_ok_test_suite(lines: List[str],
+ test_suite: TestSuite,
+ expected_suite_index: int) -> bool:
consume_non_diagnositic(lines)
if not lines:
test_suite.status = TestStatus.TEST_CRASHED
@@ -210,6 +213,12 @@ def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool:
test_suite.status = TestStatus.SUCCESS
else:
test_suite.status = TestStatus.FAILURE
+ suite_index = int(match.group(2))
+ if suite_index != expected_suite_index:
+ print_with_timestamp(
+ red('[ERROR] ') + 'expected_suite_index ' +
+ str(expected_suite_index) + ', but got ' +
+ str(suite_index))
return True
else:
return False
@@ -222,7 +231,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus:
max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases)
return max_status(max_test_case_status, test_suite.status)
-def parse_test_suite(lines: List[str]) -> TestSuite:
+def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite:
if not lines:
return None
consume_non_diagnositic(lines)
@@ -241,7 +250,7 @@ def parse_test_suite(lines: List[str]) -> TestSuite:
break
test_suite.cases.append(test_case)
expected_test_case_num -= 1
- if parse_ok_not_ok_test_suite(lines, test_suite):
+ if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index):
test_suite.status = bubble_up_test_case_errors(test_suite)
return test_suite
elif not lines:
@@ -261,27 +270,51 @@ def parse_tap_header(lines: List[str]) -> bool:
else:
return False
+TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)')
+
+def parse_test_plan(lines: List[str]) -> int:
+ consume_non_diagnositic(lines)
+ match = TEST_PLAN.match(lines[0])
+ if match:
+ lines.pop(0)
+ return int(match.group(1))
+ else:
+ return None
+
def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus:
return bubble_up_errors(lambda x: x.status, test_suite_list)
def parse_test_result(lines: List[str]) -> TestResult:
- if not lines:
- return TestResult(TestStatus.NO_TESTS, [], lines)
consume_non_diagnositic(lines)
- if not parse_tap_header(lines):
- return None
+ if not lines or not parse_tap_header(lines):
+ return TestResult(TestStatus.NO_TESTS, [], lines)
+ expected_test_suite_num = parse_test_plan(lines)
+ if not expected_test_suite_num:
+ return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines)
test_suites = []
- test_suite = parse_test_suite(lines)
- while test_suite:
- test_suites.append(test_suite)
- test_suite = parse_test_suite(lines)
- return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+ for i in range(1, expected_test_suite_num + 1):
+ test_suite = parse_test_suite(lines, i)
+ if test_suite:
+ test_suites.append(test_suite)
+ else:
+ print_with_timestamp(
+ red('[ERROR] ') + ' expected ' +
+ str(expected_test_suite_num) +
+ ' test suites, but got ' + str(i - 2))
+ break
+ test_suite = parse_test_suite(lines, -1)
+ if test_suite:
+ print_with_timestamp(red('[ERROR] ') +
+ 'got unexpected test suite: ' + test_suite.name)
+ if test_suites:
+ return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+ else:
+ return TestResult(TestStatus.NO_TESTS, [], lines)
-def parse_run_tests(kernel_output) -> TestResult:
+def print_and_count_results(test_result: TestResult) -> None:
total_tests = 0
failed_tests = 0
crashed_tests = 0
- test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
for test_suite in test_result.suites:
if test_suite.status == TestStatus.SUCCESS:
print_suite_divider(green('[PASSED] ') + test_suite.name)
@@ -303,6 +336,21 @@ def parse_run_tests(kernel_output) -> TestResult:
print_with_timestamp(red('[FAILED] ') + test_case.name)
print_log(map(yellow, test_case.log))
print_with_timestamp('')
+ return total_tests, failed_tests, crashed_tests
+
+def parse_run_tests(kernel_output) -> TestResult:
+ total_tests = 0
+ failed_tests = 0
+ crashed_tests = 0
+ test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
+ if test_result.status == TestStatus.NO_TESTS:
+ print(red('[ERROR] ') + yellow('no tests run!'))
+ elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS:
+ print(red('[ERROR] ') + yellow('could not parse test results!'))
+ else:
+ (total_tests,
+ failed_tests,
+ crashed_tests) = print_and_count_results(test_result)
print_with_timestamp(DIVIDER)
fmt = green if test_result.status == TestStatus.SUCCESS else red
print_with_timestamp(
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 984588d6ba95..99c3c5671ea4 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -11,11 +11,13 @@ from unittest import mock
import tempfile, shutil # Handling test_tmpdir
+import json
import os
import kunit_config
import kunit_parser
import kunit_kernel
+import kunit_json
import kunit
test_tmpdir = ''
@@ -170,6 +172,17 @@ class KUnitParserTest(unittest.TestCase):
result.status)
file.close()
+ def test_no_kunit_output(self):
+ crash_log = get_absolute_path(
+ 'test_data/test_insufficient_memory.log')
+ file = open(crash_log)
+ print_mock = mock.patch('builtins.print').start()
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.isolate_kunit_output(file.readlines()))
+ print_mock.assert_any_call(StrContains("no kunit output detected"))
+ print_mock.stop()
+ file.close()
+
def test_crashed_test(self):
crashed_log = get_absolute_path(
'test_data/test_is_test_passed-crash.log')
@@ -219,6 +232,37 @@ class KUnitParserTest(unittest.TestCase):
result = kunit_parser.parse_run_tests(file.readlines())
self.assertEqual('kunit-resource-test', result.suites[0].name)
+class KUnitJsonTest(unittest.TestCase):
+
+ def _json_for(self, log_file):
+ with(open(get_absolute_path(log_file))) as file:
+ test_result = kunit_parser.parse_run_tests(file)
+ json_obj = kunit_json.get_json_result(
+ test_result=test_result,
+ def_config='kunit_defconfig',
+ build_dir=None,
+ json_path='stdout')
+ return json.loads(json_obj)
+
+ def test_failed_test_json(self):
+ result = self._json_for(
+ 'test_data/test_is_test_passed-failure.log')
+ self.assertEqual(
+ {'name': 'example_simple_test', 'status': 'FAIL'},
+ result["sub_groups"][1]["test_cases"][0])
+
+ def test_crashed_test_json(self):
+ result = self._json_for(
+ 'test_data/test_is_test_passed-crash.log')
+ self.assertEqual(
+ {'name': 'example_simple_test', 'status': 'ERROR'},
+ result["sub_groups"][1]["test_cases"][0])
+
+ def test_no_tests_json(self):
+ result = self._json_for(
+ 'test_data/test_is_test_passed-no_tests_run.log')
+ self.assertEqual(0, len(result['sub_groups']))
+
class StrContains(str):
def __eq__(self, other):
return self in other
@@ -239,14 +283,39 @@ class KUnitMainTest(unittest.TestCase):
self.print_patch.stop()
pass
+ def test_config_passes_args_pass(self):
+ kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 1
+ assert self.linux_source_mock.run_kernel.call_count == 0
+
+ def test_build_passes_args_pass(self):
+ kunit.main(['build'], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 0
+ self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, '.kunit', None)
+ assert self.linux_source_mock.run_kernel.call_count == 0
+
+ def test_exec_passes_args_pass(self):
+ kunit.main(['exec'], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 0
+ assert self.linux_source_mock.run_kernel.call_count == 1
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=300)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
def test_run_passes_args_pass(self):
kunit.main(['run'], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
assert self.linux_source_mock.run_kernel.call_count == 1
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='', timeout=300)
+ build_dir='.kunit', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
+ def test_exec_passes_args_fail(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ with self.assertRaises(SystemExit) as e:
+ kunit.main(['exec'], self.linux_source_mock)
+ assert type(e.exception) == SystemExit
+ assert e.exception.code == 1
+
def test_run_passes_args_fail(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
with self.assertRaises(SystemExit) as e:
@@ -257,30 +326,60 @@ class KUnitMainTest(unittest.TestCase):
assert self.linux_source_mock.run_kernel.call_count == 1
self.print_mock.assert_any_call(StrContains(' 0 tests run'))
+ def test_exec_raw_output(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ kunit.main(['exec', '--raw_output'], self.linux_source_mock)
+ assert self.linux_source_mock.run_kernel.call_count == 1
+ for kall in self.print_mock.call_args_list:
+ assert kall != mock.call(StrContains('Testing complete.'))
+ assert kall != mock.call(StrContains(' 0 tests run'))
+
def test_run_raw_output(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
- with self.assertRaises(SystemExit) as e:
- kunit.main(['run', '--raw_output'], self.linux_source_mock)
- assert type(e.exception) == SystemExit
- assert e.exception.code == 1
+ kunit.main(['run', '--raw_output'], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
assert self.linux_source_mock.run_kernel.call_count == 1
+ for kall in self.print_mock.call_args_list:
+ assert kall != mock.call(StrContains('Testing complete.'))
+ assert kall != mock.call(StrContains(' 0 tests run'))
+
+ def test_exec_timeout(self):
+ timeout = 3453
+ kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='.kunit', timeout=timeout)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_timeout(self):
timeout = 3453
kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='', timeout=timeout)
+ build_dir='.kunit', timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_builddir(self):
build_dir = '.kunit'
- kunit.main(['run', '--build_dir', build_dir], self.linux_source_mock)
+ kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
self.linux_source_mock.run_kernel.assert_called_once_with(
build_dir=build_dir, timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
+ def test_config_builddir(self):
+ build_dir = '.kunit'
+ kunit.main(['config', '--build_dir', build_dir], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 1
+
+ def test_build_builddir(self):
+ build_dir = '.kunit'
+ kunit.main(['build', '--build_dir', build_dir], self.linux_source_mock)
+ self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, build_dir, None)
+
+ def test_exec_builddir(self):
+ build_dir = '.kunit'
+ kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock)
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
if __name__ == '__main__':
unittest.main()
diff --git a/tools/testing/kunit/test_data/test_insufficient_memory.log b/tools/testing/kunit/test_data/test_insufficient_memory.log
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_insufficient_memory.log
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
index 62ebc0288355..bc0dc8fe35b7 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
@@ -1,4 +1,5 @@
TAP version 14
+1..2
# Subtest: sysctl_test
1..8
# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
index 0b249870c8be..4d97f6708c4a 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
@@ -1,6 +1,7 @@
printk: console [tty0] enabled
printk: console [mc-1] enabled
TAP version 14
+1..2
# Subtest: sysctl_test
1..8
# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-failure.log b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
index 9e89d32d5667..7a416497e3be 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-failure.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
@@ -1,4 +1,5 @@
TAP version 14
+1..2
# Subtest: sysctl_test
1..8
# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c
index 7e5d979e73cb..fb342a8c98d3 100644
--- a/tools/testing/nvdimm/dax-dev.c
+++ b/tools/testing/nvdimm/dax-dev.c
@@ -9,12 +9,19 @@
phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size)
{
- struct resource *res = &dev_dax->region->res;
- phys_addr_t addr;
+ int i;
- addr = pgoff * PAGE_SIZE + res->start;
- if (addr >= res->start && addr <= res->end) {
- if (addr + size - 1 <= res->end) {
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ struct dev_dax_range *dax_range = &dev_dax->ranges[i];
+ struct range *range = &dax_range->range;
+ unsigned long long pgoff_end;
+ phys_addr_t addr;
+
+ pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
+ if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
+ continue;
+ addr = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
+ if (addr + size - 1 <= range->end) {
if (get_nfit_res(addr)) {
struct page *page;
@@ -23,9 +30,10 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
page = vmalloc_to_page((void *)addr);
return PFN_PHYS(page_to_pfn(page));
- } else
- return addr;
+ }
+ return addr;
}
+ break;
}
return -1;
}
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 03e40b3b0106..c62d372d426f 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -126,7 +126,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
int error;
- resource_size_t offset = pgmap->res.start;
+ resource_size_t offset = pgmap->range.start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (!nfit_res)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a8ee5c4d41eb..2ac0fff6dad8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,8 @@
#include "nfit_test.h"
#include "../watermark.h"
-#include <asm/mcsafe_test.h>
+#include <asm/copy_mc_test.h>
+#include <asm/mce.h>
/*
* Generate an NFIT table to describe the following topology:
@@ -173,6 +174,9 @@ struct nfit_test_fw {
u64 version;
u32 size_received;
u64 end_time;
+ bool armed;
+ bool missed_activate;
+ unsigned long last_activate;
};
struct nfit_test {
@@ -345,7 +349,7 @@ static int nd_intel_test_finish_fw(struct nfit_test *t,
__func__, t, nd_cmd, buf_len, idx);
if (fw->state == FW_STATE_UPDATED) {
- /* update already done, need cold boot */
+ /* update already done, need activation */
nd_cmd->status = 0x20007;
return 0;
}
@@ -430,6 +434,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t,
}
dev_dbg(dev, "%s: transition out verify\n", __func__);
fw->state = FW_STATE_UPDATED;
+ fw->missed_activate = false;
/* fall through */
case FW_STATE_UPDATED:
nd_cmd->status = 0;
@@ -1178,6 +1183,134 @@ static int nd_intel_test_cmd_master_secure_erase(struct nfit_test *t,
return 0;
}
+static unsigned long last_activate;
+
+static int nvdimm_bus_intel_fw_activate_businfo(struct nfit_test *t,
+ struct nd_intel_bus_fw_activate_businfo *nd_cmd,
+ unsigned int buf_len)
+{
+ int i, armed = 0;
+ int state;
+ u64 tmo;
+
+ for (i = 0; i < NUM_DCR; i++) {
+ struct nfit_test_fw *fw = &t->fw[i];
+
+ if (fw->armed)
+ armed++;
+ }
+
+ /*
+ * Emulate 3 second activation max, and 1 second incremental
+ * quiesce time per dimm requiring multiple activates to get all
+ * DIMMs updated.
+ */
+ if (armed)
+ state = ND_INTEL_FWA_ARMED;
+ else if (!last_activate || time_after(jiffies, last_activate + 3 * HZ))
+ state = ND_INTEL_FWA_IDLE;
+ else
+ state = ND_INTEL_FWA_BUSY;
+
+ tmo = armed * USEC_PER_SEC;
+ *nd_cmd = (struct nd_intel_bus_fw_activate_businfo) {
+ .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE
+ | ND_INTEL_BUS_FWA_CAP_OSQUIESCE
+ | ND_INTEL_BUS_FWA_CAP_RESET,
+ .state = state,
+ .activate_tmo = tmo,
+ .cpu_quiesce_tmo = tmo,
+ .io_quiesce_tmo = tmo,
+ .max_quiesce_tmo = 3 * USEC_PER_SEC,
+ };
+
+ return 0;
+}
+
+static int nvdimm_bus_intel_fw_activate(struct nfit_test *t,
+ struct nd_intel_bus_fw_activate *nd_cmd,
+ unsigned int buf_len)
+{
+ struct nd_intel_bus_fw_activate_businfo info;
+ u32 status = 0;
+ int i;
+
+ nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info));
+ if (info.state == ND_INTEL_FWA_BUSY)
+ status = ND_INTEL_BUS_FWA_STATUS_BUSY;
+ else if (info.activate_tmo > info.max_quiesce_tmo)
+ status = ND_INTEL_BUS_FWA_STATUS_TMO;
+ else if (info.state == ND_INTEL_FWA_IDLE)
+ status = ND_INTEL_BUS_FWA_STATUS_NOARM;
+
+ dev_dbg(&t->pdev.dev, "status: %d\n", status);
+ nd_cmd->status = status;
+ if (status && status != ND_INTEL_BUS_FWA_STATUS_TMO)
+ return 0;
+
+ last_activate = jiffies;
+ for (i = 0; i < NUM_DCR; i++) {
+ struct nfit_test_fw *fw = &t->fw[i];
+
+ if (!fw->armed)
+ continue;
+ if (fw->state != FW_STATE_UPDATED)
+ fw->missed_activate = true;
+ else
+ fw->state = FW_STATE_NEW;
+ fw->armed = false;
+ fw->last_activate = last_activate;
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_fw_activate_dimminfo(struct nfit_test *t,
+ struct nd_intel_fw_activate_dimminfo *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct nd_intel_bus_fw_activate_businfo info;
+ struct nfit_test_fw *fw = &t->fw[dimm];
+ u32 result, state;
+
+ nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info));
+
+ if (info.state == ND_INTEL_FWA_BUSY)
+ state = ND_INTEL_FWA_BUSY;
+ else if (info.state == ND_INTEL_FWA_IDLE)
+ state = ND_INTEL_FWA_IDLE;
+ else if (fw->armed)
+ state = ND_INTEL_FWA_ARMED;
+ else
+ state = ND_INTEL_FWA_IDLE;
+
+ result = ND_INTEL_DIMM_FWA_NONE;
+ if (last_activate && fw->last_activate == last_activate &&
+ state == ND_INTEL_FWA_IDLE) {
+ if (fw->missed_activate)
+ result = ND_INTEL_DIMM_FWA_NOTSTAGED;
+ else
+ result = ND_INTEL_DIMM_FWA_SUCCESS;
+ }
+
+ *nd_cmd = (struct nd_intel_fw_activate_dimminfo) {
+ .result = result,
+ .state = state,
+ };
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_fw_activate_arm(struct nfit_test *t,
+ struct nd_intel_fw_activate_arm *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct nfit_test_fw *fw = &t->fw[dimm];
+
+ fw->armed = nd_cmd->activate_arm == ND_INTEL_DIMM_FWA_ARM;
+ nd_cmd->status = 0;
+ return 0;
+}
static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
{
@@ -1192,6 +1325,29 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
return i;
}
+static void nfit_ctl_dbg(struct acpi_nfit_desc *acpi_desc,
+ struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int len)
+{
+ struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
+ unsigned int func = cmd;
+ unsigned int family = 0;
+
+ if (cmd == ND_CMD_CALL) {
+ struct nd_cmd_pkg *pkg = buf;
+
+ len = pkg->nd_size_in;
+ family = pkg->nd_family;
+ buf = pkg->nd_payload;
+ func = pkg->nd_command;
+ }
+ dev_dbg(&t->pdev.dev, "%s family: %d cmd: %d: func: %d input length: %d\n",
+ nvdimm ? nvdimm_name(nvdimm) : "bus", family, cmd, func,
+ len);
+ print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 16, 4,
+ buf, min(len, 256u), true);
+}
+
static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -1205,6 +1361,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
cmd_rc = &__cmd_rc;
*cmd_rc = 0;
+ nfit_ctl_dbg(acpi_desc, nvdimm, cmd, buf, buf_len);
+
if (nvdimm) {
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
@@ -1224,6 +1382,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
i = get_dimm(nfit_mem, func);
if (i < 0)
return i;
+ if (i >= NUM_DCR) {
+ dev_WARN_ONCE(&t->pdev.dev, 1,
+ "ND_CMD_CALL only valid for nfit_test0\n");
+ return -EINVAL;
+ }
switch (func) {
case NVDIMM_INTEL_GET_SECURITY_STATE:
@@ -1252,11 +1415,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
break;
case NVDIMM_INTEL_OVERWRITE:
rc = nd_intel_test_cmd_overwrite(t,
- buf, buf_len, i - t->dcr_idx);
+ buf, buf_len, i);
break;
case NVDIMM_INTEL_QUERY_OVERWRITE:
rc = nd_intel_test_cmd_query_overwrite(t,
- buf, buf_len, i - t->dcr_idx);
+ buf, buf_len, i);
break;
case NVDIMM_INTEL_SET_MASTER_PASSPHRASE:
rc = nd_intel_test_cmd_master_set_pass(t,
@@ -1266,54 +1429,59 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
rc = nd_intel_test_cmd_master_secure_erase(t,
buf, buf_len, i);
break;
+ case NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO:
+ rc = nd_intel_test_cmd_fw_activate_dimminfo(
+ t, buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_FW_ACTIVATE_ARM:
+ rc = nd_intel_test_cmd_fw_activate_arm(
+ t, buf, buf_len, i);
+ break;
case ND_INTEL_ENABLE_LSS_STATUS:
rc = nd_intel_test_cmd_set_lss_status(t,
buf, buf_len);
break;
case ND_INTEL_FW_GET_INFO:
rc = nd_intel_test_get_fw_info(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_START_UPDATE:
rc = nd_intel_test_start_update(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_SEND_DATA:
rc = nd_intel_test_send_data(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_FINISH_UPDATE:
rc = nd_intel_test_finish_fw(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_FINISH_QUERY:
rc = nd_intel_test_finish_query(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_SMART:
rc = nfit_test_cmd_smart(buf, buf_len,
- &t->smart[i - t->dcr_idx]);
+ &t->smart[i]);
break;
case ND_INTEL_SMART_THRESHOLD:
rc = nfit_test_cmd_smart_threshold(buf,
buf_len,
- &t->smart_threshold[i -
- t->dcr_idx]);
+ &t->smart_threshold[i]);
break;
case ND_INTEL_SMART_SET_THRESHOLD:
rc = nfit_test_cmd_smart_set_threshold(buf,
buf_len,
- &t->smart_threshold[i -
- t->dcr_idx],
- &t->smart[i - t->dcr_idx],
+ &t->smart_threshold[i],
+ &t->smart[i],
&t->pdev.dev, t->dimm_dev[i]);
break;
case ND_INTEL_SMART_INJECT:
rc = nfit_test_cmd_smart_inject(buf,
buf_len,
- &t->smart_threshold[i -
- t->dcr_idx],
- &t->smart[i - t->dcr_idx],
+ &t->smart_threshold[i],
+ &t->smart[i],
&t->pdev.dev, t->dimm_dev[i]);
break;
default:
@@ -1353,9 +1521,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
if (!nd_desc)
return -ENOTTY;
- if (cmd == ND_CMD_CALL) {
+ if (cmd == ND_CMD_CALL && call_pkg->nd_family
+ == NVDIMM_BUS_FAMILY_NFIT) {
func = call_pkg->nd_command;
-
buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
buf = (void *) call_pkg->nd_payload;
@@ -1379,7 +1547,26 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
default:
return -ENOTTY;
}
- }
+ } else if (cmd == ND_CMD_CALL && call_pkg->nd_family
+ == NVDIMM_BUS_FAMILY_INTEL) {
+ func = call_pkg->nd_command;
+ buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+ buf = (void *) call_pkg->nd_payload;
+
+ switch (func) {
+ case NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO:
+ rc = nvdimm_bus_intel_fw_activate_businfo(t,
+ buf, buf_len);
+ return rc;
+ case NVDIMM_BUS_INTEL_FW_ACTIVATE:
+ rc = nvdimm_bus_intel_fw_activate(t, buf,
+ buf_len);
+ return rc;
+ default:
+ return -ENOTTY;
+ }
+ } else if (cmd == ND_CMD_CALL)
+ return -ENOTTY;
if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
return -ENOTTY;
@@ -1805,6 +1992,7 @@ static void nfit_test0_setup(struct nfit_test *t)
struct acpi_nfit_flush_address *flush;
struct acpi_nfit_capabilities *pcap;
unsigned int offset = 0, i;
+ unsigned long *acpi_mask;
/*
* spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -2507,10 +2695,10 @@ static void nfit_test0_setup(struct nfit_test *t)
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
- set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
- set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
- set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
- set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_dsm_mask);
+ set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_dsm_mask);
+ set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_dsm_mask);
+ set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_dsm_mask);
set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
@@ -2531,6 +2719,12 @@ static void nfit_test0_setup(struct nfit_test *t)
&acpi_desc->dimm_cmd_force_en);
set_bit(NVDIMM_INTEL_MASTER_SECURE_ERASE,
&acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_FW_ACTIVATE_ARM, &acpi_desc->dimm_cmd_force_en);
+
+ acpi_mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL];
+ set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, acpi_mask);
+ set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE, acpi_mask);
}
static void nfit_test1_setup(struct nfit_test *t)
@@ -2699,14 +2893,18 @@ static int nfit_ctl_test(struct device *dev)
struct acpi_nfit_desc *acpi_desc;
const u64 test_val = 0x0123456789abcdefULL;
unsigned long mask, cmd_size, offset;
- union {
- struct nd_cmd_get_config_size cfg_size;
- struct nd_cmd_clear_error clear_err;
- struct nd_cmd_ars_status ars_stat;
- struct nd_cmd_ars_cap ars_cap;
- char buf[sizeof(struct nd_cmd_ars_status)
- + sizeof(struct nd_ars_record)];
- } cmds;
+ struct nfit_ctl_test_cmd {
+ struct nd_cmd_pkg pkg;
+ union {
+ struct nd_cmd_get_config_size cfg_size;
+ struct nd_cmd_clear_error clear_err;
+ struct nd_cmd_ars_status ars_stat;
+ struct nd_cmd_ars_cap ars_cap;
+ struct nd_intel_bus_fw_activate_businfo fwa_info;
+ char buf[sizeof(struct nd_cmd_ars_status)
+ + sizeof(struct nd_ars_record)];
+ };
+ } cmd;
adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL);
if (!adev)
@@ -2731,11 +2929,15 @@ static int nfit_ctl_test(struct device *dev)
.module = THIS_MODULE,
.provider_name = "ACPI.NFIT",
.ndctl = acpi_nfit_ctl,
- .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
- | 1UL << NFIT_CMD_ARS_INJECT_SET
- | 1UL << NFIT_CMD_ARS_INJECT_CLEAR
- | 1UL << NFIT_CMD_ARS_INJECT_GET,
+ .bus_family_mask = 1UL << NVDIMM_BUS_FAMILY_NFIT
+ | 1UL << NVDIMM_BUS_FAMILY_INTEL,
},
+ .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
+ | 1UL << NFIT_CMD_ARS_INJECT_SET
+ | 1UL << NFIT_CMD_ARS_INJECT_CLEAR
+ | 1UL << NFIT_CMD_ARS_INJECT_GET,
+ .family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL] =
+ NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK,
.dev = &adev->dev,
};
@@ -2766,21 +2968,21 @@ static int nfit_ctl_test(struct device *dev)
/* basic checkout of a typical 'get config size' command */
- cmd_size = sizeof(cmds.cfg_size);
- cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ cmd_size = sizeof(cmd.cfg_size);
+ cmd.cfg_size = (struct nd_cmd_get_config_size) {
.status = 0,
.config_size = SZ_128K,
.max_xfer = SZ_4K,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
- if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0
- || cmds.cfg_size.config_size != SZ_128K
- || cmds.cfg_size.max_xfer != SZ_4K) {
+ if (rc < 0 || cmd_rc || cmd.cfg_size.status != 0
+ || cmd.cfg_size.config_size != SZ_128K
+ || cmd.cfg_size.max_xfer != SZ_4K) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
__func__, __LINE__, rc, cmd_rc);
return -EIO;
@@ -2789,14 +2991,14 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_status with zero output */
cmd_size = offsetof(struct nd_cmd_ars_status, address);
- cmds.ars_stat = (struct nd_cmd_ars_status) {
+ cmd.ars_stat = (struct nd_cmd_ars_status) {
.out_length = 0,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2806,16 +3008,16 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_cap with benign extended status */
- cmd_size = sizeof(cmds.ars_cap);
- cmds.ars_cap = (struct nd_cmd_ars_cap) {
+ cmd_size = sizeof(cmd.ars_cap);
+ cmd.ars_cap = (struct nd_cmd_ars_cap) {
.status = ND_ARS_PERSISTENT << 16,
};
offset = offsetof(struct nd_cmd_ars_cap, status);
- rc = setup_result(cmds.buf + offset, cmd_size - offset);
+ rc = setup_result(cmd.buf + offset, cmd_size - offset);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2825,19 +3027,19 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_status with 'status' trimmed from 'out_length' */
- cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
- cmds.ars_stat = (struct nd_cmd_ars_status) {
+ cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record);
+ cmd.ars_stat = (struct nd_cmd_ars_status) {
.out_length = cmd_size - 4,
};
- record = &cmds.ars_stat.records[0];
+ record = &cmd.ars_stat.records[0];
*record = (struct nd_ars_record) {
.length = test_val,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc || record->length != test_val) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2847,19 +3049,19 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_status with 'Output (Size)' including 'status' */
- cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
- cmds.ars_stat = (struct nd_cmd_ars_status) {
+ cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record);
+ cmd.ars_stat = (struct nd_cmd_ars_status) {
.out_length = cmd_size,
};
- record = &cmds.ars_stat.records[0];
+ record = &cmd.ars_stat.records[0];
*record = (struct nd_ars_record) {
.length = test_val,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc || record->length != test_val) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2869,15 +3071,15 @@ static int nfit_ctl_test(struct device *dev)
/* test extended status for get_config_size results in failure */
- cmd_size = sizeof(cmds.cfg_size);
- cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ cmd_size = sizeof(cmd.cfg_size);
+ cmd.cfg_size = (struct nd_cmd_get_config_size) {
.status = 1 << 16,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc >= 0) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2886,16 +3088,46 @@ static int nfit_ctl_test(struct device *dev)
}
/* test clear error */
- cmd_size = sizeof(cmds.clear_err);
- cmds.clear_err = (struct nd_cmd_clear_error) {
+ cmd_size = sizeof(cmd.clear_err);
+ cmd.clear_err = (struct nd_cmd_clear_error) {
.length = 512,
.cleared = 512,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
+ if (rc < 0 || cmd_rc) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+ /* test firmware activate bus info */
+ cmd_size = sizeof(cmd.fwa_info);
+ cmd = (struct nfit_ctl_test_cmd) {
+ .pkg = {
+ .nd_command = NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO,
+ .nd_family = NVDIMM_BUS_FAMILY_INTEL,
+ .nd_size_out = cmd_size,
+ .nd_fw_size = cmd_size,
+ },
+ .fwa_info = {
+ .state = ND_INTEL_FWA_IDLE,
+ .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE
+ | ND_INTEL_BUS_FWA_CAP_OSQUIESCE,
+ .activate_tmo = 1,
+ .cpu_quiesce_tmo = 1,
+ .io_quiesce_tmo = 1,
+ .max_quiesce_tmo = 1,
+ },
+ };
+ rc = setup_result(cmd.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CALL,
+ &cmd, sizeof(cmd.pkg) + cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
__func__, __LINE__, rc, cmd_rc);
@@ -3052,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
.id_table = nfit_test_id,
};
-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
enum INJECT {
INJECT_NONE,
@@ -3060,7 +3292,7 @@ enum INJECT {
INJECT_DST,
};
-static void mcsafe_test_init(char *dst, char *src, size_t size)
+static void copy_mc_test_init(char *dst, char *src, size_t size)
{
size_t i;
@@ -3069,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
src[i] = (char) i;
}
-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
size_t size, unsigned long rem)
{
size_t i;
@@ -3090,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
return true;
}
-void mcsafe_test(void)
+void copy_mc_test(void)
{
char *inject_desc[] = { "none", "source", "destination" };
enum INJECT inj;
- if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+ if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
pr_info("%s: run...\n", __func__);
} else {
pr_info("%s: disabled, skip.\n", __func__);
@@ -3113,31 +3345,31 @@ void mcsafe_test(void)
switch (inj) {
case INJECT_NONE:
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(NULL);
- dst = &mcsafe_buf[2048];
- src = &mcsafe_buf[1024 - i];
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(NULL);
+ dst = &copy_mc_buf[2048];
+ src = &copy_mc_buf[1024 - i];
expect = 0;
break;
case INJECT_SRC:
- mcsafe_inject_src(&mcsafe_buf[1024]);
- mcsafe_inject_dst(NULL);
- dst = &mcsafe_buf[2048];
- src = &mcsafe_buf[1024 - i];
+ copy_mc_inject_src(&copy_mc_buf[1024]);
+ copy_mc_inject_dst(NULL);
+ dst = &copy_mc_buf[2048];
+ src = &copy_mc_buf[1024 - i];
expect = 512 - i;
break;
case INJECT_DST:
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(&mcsafe_buf[2048]);
- dst = &mcsafe_buf[2048 - i];
- src = &mcsafe_buf[1024];
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(&copy_mc_buf[2048]);
+ dst = &copy_mc_buf[2048 - i];
+ src = &copy_mc_buf[1024];
expect = 512 - i;
break;
}
- mcsafe_test_init(dst, src, 512);
- rem = __memcpy_mcsafe(dst, src, 512);
- valid = mcsafe_test_validate(dst, src, 512, expect);
+ copy_mc_test_init(dst, src, 512);
+ rem = copy_mc_fragile(dst, src, 512);
+ valid = copy_mc_test_validate(dst, src, 512, expect);
if (rem == expect && valid)
continue;
pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
@@ -3149,8 +3381,8 @@ void mcsafe_test(void)
}
}
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(NULL);
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(NULL);
}
static __init int nfit_test_init(void)
@@ -3161,7 +3393,7 @@ static __init int nfit_test_init(void)
libnvdimm_test();
acpi_nfit_test();
device_dax_test();
- mcsafe_test();
+ copy_mc_test();
dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index db3c07beb9d1..b5f7a996c4d0 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -51,7 +51,7 @@ struct nd_cmd_translate_spa {
__u32 nfit_device_handle;
__u32 _reserved;
__u64 dpa;
- } __packed devices[0];
+ } __packed devices[];
} __packed;
@@ -74,7 +74,7 @@ struct nd_cmd_ars_err_inj_stat {
struct nd_error_stat_query_record {
__u64 err_inj_stat_spa_range_base;
__u64 err_inj_stat_spa_range_length;
- } __packed record[0];
+ } __packed record[];
} __packed;
#define ND_INTEL_SMART 1
@@ -180,7 +180,7 @@ struct nd_intel_fw_send_data {
__u32 context;
__u32 offset;
__u32 length;
- __u8 data[0];
+ __u8 data[];
/* this field is not declared due ot variable data from input */
/* __u32 status; */
} __packed;
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 8995092d541e..3b796dd5e577 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -523,8 +523,27 @@ static void *ida_random_fn(void *arg)
return NULL;
}
+static void *ida_leak_fn(void *arg)
+{
+ struct ida *ida = arg;
+ time_t s = time(NULL);
+ int i, ret;
+
+ rcu_register_thread();
+
+ do for (i = 0; i < 1000; i++) {
+ ret = ida_alloc_range(ida, 128, 128, GFP_KERNEL);
+ if (ret >= 0)
+ ida_free(ida, 128);
+ } while (time(NULL) < s + 2);
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
void ida_thread_tests(void)
{
+ DEFINE_IDA(ida);
pthread_t threads[20];
int i;
@@ -536,6 +555,16 @@ void ida_thread_tests(void)
while (i--)
pthread_join(threads[i], NULL);
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++)
+ if (pthread_create(&threads[i], NULL, ida_leak_fn, &ida)) {
+ perror("creating ida thread");
+ exit(1);
+ }
+
+ while (i--)
+ pthread_join(threads[i], NULL);
+ assert(ida_is_empty(&ida));
}
void ida_tests(void)
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 4568248222ae..39867fd80c8f 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -22,4 +22,5 @@
#define __releases(x)
#define __must_hold(x)
+#define EXPORT_PER_CPU_SYMBOL_GPL(x)
#endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/radix-tree/linux/local_lock.h
new file mode 100644
index 000000000000..b3cf8b233ca4
--- /dev/null
+++ b/tools/testing/radix-tree/linux/local_lock.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_LOCAL_LOCK
+#define _LINUX_LOCAL_LOCK
+typedef struct { } local_lock_t;
+
+static inline void local_lock(local_lock_t *lock) { }
+static inline void local_unlock(local_lock_t *lock) { }
+#define INIT_LOCAL_LOCK(x) { }
+#endif
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 34dab4d18744..7ef7067e942c 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -56,8 +56,4 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag);
unsigned long node_maxindex(struct radix_tree_node *);
unsigned long shift_maxindex(unsigned int shift);
int radix_tree_cpu_dead(unsigned int cpu);
-struct radix_tree_preload {
- unsigned nr;
- struct radix_tree_node *nodes;
-};
extern struct radix_tree_preload radix_tree_preloads;
diff --git a/tools/testing/scatterlist/Makefile b/tools/testing/scatterlist/Makefile
index cbb003d9305e..c65233876622 100644
--- a/tools/testing/scatterlist/Makefile
+++ b/tools/testing/scatterlist/Makefile
@@ -14,7 +14,7 @@ targets: include $(TARGETS)
main: $(OFILES)
clean:
- $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h asm/io.h
+ $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h linux/slab.h asm/io.h
@rmdir asm
scatterlist.c: ../../../lib/scatterlist.c
@@ -28,4 +28,5 @@ include: ../../../include/linux/scatterlist.h
@touch asm/io.h
@touch linux/highmem.h
@touch linux/kmemleak.h
+ @touch linux/slab.h
@cp $< linux/scatterlist.h
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index 6f9ac14aa800..6ae907f375d2 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -114,6 +114,12 @@ static inline void *kmalloc(unsigned int size, unsigned int flags)
return malloc(size);
}
+static inline void *
+kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
+{
+ return malloc(n * size);
+}
+
#define kfree(x) free(x)
#define kmemleak_alloc(a, b, c, d)
@@ -122,4 +128,33 @@ static inline void *kmalloc(unsigned int size, unsigned int flags)
#define PageSlab(p) (0)
#define flush_kernel_dcache_page(p)
+#define MAX_ERRNO 4095
+
+#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error)
+{
+ return (void *) error;
+}
+
+static inline long __must_check PTR_ERR(__force const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline bool __must_check IS_ERR(__force const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
+{
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+ else
+ return 0;
+}
+
+#define IS_ENABLED(x) (0)
+
#endif
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 0a1464181226..b2c7e9f7b8d3 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -5,6 +5,15 @@
#define MAX_PAGES (64)
+struct test {
+ int alloc_ret;
+ unsigned num_pages;
+ unsigned *pfn;
+ unsigned size;
+ unsigned int max_seg;
+ unsigned int expected_segments;
+};
+
static void set_pages(struct page **pages, const unsigned *array, unsigned num)
{
unsigned int i;
@@ -17,17 +26,32 @@ static void set_pages(struct page **pages, const unsigned *array, unsigned num)
#define pfn(...) (unsigned []){ __VA_ARGS__ }
+static void fail(struct test *test, struct sg_table *st, const char *cond)
+{
+ unsigned int i;
+
+ fprintf(stderr, "Failed on '%s'!\n\n", cond);
+
+ printf("size = %u, max segment = %u, expected nents = %u\nst->nents = %u, st->orig_nents= %u\n",
+ test->size, test->max_seg, test->expected_segments, st->nents,
+ st->orig_nents);
+
+ printf("%u input PFNs:", test->num_pages);
+ for (i = 0; i < test->num_pages; i++)
+ printf(" %x", test->pfn[i]);
+ printf("\n");
+
+ exit(1);
+}
+
+#define VALIDATE(cond, st, test) \
+ if (!(cond)) \
+ fail((test), (st), #cond);
+
int main(void)
{
const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
- struct test {
- int alloc_ret;
- unsigned num_pages;
- unsigned *pfn;
- unsigned size;
- unsigned int max_seg;
- unsigned int expected_segments;
- } *test, tests[] = {
+ struct test *test, tests[] = {
{ -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
{ -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
@@ -55,20 +79,19 @@ int main(void)
for (i = 0, test = tests; test->expected_segments; test++, i++) {
struct page *pages[MAX_PAGES];
struct sg_table st;
- int ret;
+ struct scatterlist *sg;
set_pages(pages, test->pfn, test->num_pages);
- ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages,
- 0, test->size, test->max_seg,
- GFP_KERNEL);
- assert(ret == test->alloc_ret);
+ sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
+ test->size, test->max_seg, NULL, 0, GFP_KERNEL);
+ assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
if (test->alloc_ret)
continue;
- assert(st.nents == test->expected_segments);
- assert(st.orig_nents == test->expected_segments);
+ VALIDATE(st.nents == test->expected_segments, &st, test);
+ VALIDATE(st.orig_nents == test->expected_segments, &st, test);
sg_free_table(&st);
}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2ff68702fd41..d9c283503159 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -6,6 +6,7 @@ TARGETS += breakpoints
TARGETS += capabilities
TARGETS += cgroup
TARGETS += clone3
+TARGETS += core
TARGETS += cpufreq
TARGETS += cpu-hotplug
TARGETS += drivers/dma-buf
@@ -15,6 +16,7 @@ TARGETS += filesystems
TARGETS += filesystems/binderfs
TARGETS += filesystems/epoll
TARGETS += firmware
+TARGETS += fpu
TARGETS += ftrace
TARGETS += futex
TARGETS += gpio
@@ -30,6 +32,7 @@ TARGETS += lkdtm
TARGETS += membarrier
TARGETS += memfd
TARGETS += memory-hotplug
+TARGETS += mincore
TARGETS += mount
TARGETS += mqueue
TARGETS += net
@@ -54,6 +57,7 @@ TARGETS += splice
TARGETS += static_keys
TARGETS += sync
TARGETS += sysctl
+TARGETS += tc-testing
TARGETS += timens
ifneq (1, $(quicktest))
TARGETS += timers
@@ -84,10 +88,10 @@ endif
# of the targets gets built.
FORCE_TARGETS ?=
-# Clear LDFLAGS and MAKEFLAGS if called from main
-# Makefile to avoid test build failures when test
-# Makefile doesn't have explicit build rules.
-ifeq (1,$(MAKELEVEL))
+# Clear LDFLAGS and MAKEFLAGS when implicit rules are missing. This provides
+# implicit rules to sub-test Makefiles which avoids build failures in test
+# Makefile that don't have explicit build rules.
+ifeq (,$(LINK.c))
override LDFLAGS =
override MAKEFLAGS =
endif
@@ -202,6 +206,7 @@ KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH))
# Avoid changing the rest of the logic here and lib.mk.
INSTALL_PATH := $(KSFT_INSTALL_PATH)
ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+TEST_LIST := $(INSTALL_PATH)/kselftest-list.txt
install: all
ifdef INSTALL_PATH
@@ -210,6 +215,8 @@ ifdef INSTALL_PATH
install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
+ install -m 744 run_kselftest.sh $(INSTALL_PATH)/
+ rm -f $(TEST_LIST)
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
@@ -218,41 +225,33 @@ ifdef INSTALL_PATH
ret=$$((ret * $$?)); \
done; exit $$ret;
- @# Ask all targets to emit their test scripts
- echo "#!/bin/sh" > $(ALL_SCRIPT)
- echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT)
- echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT)
- echo ". ./kselftest/runner.sh" >> $(ALL_SCRIPT)
- echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
- echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT)
- echo " logfile=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
- echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT)
- echo "fi" >> $(ALL_SCRIPT)
- @# While building run_kselftest.sh skip also non-existent TARGET dirs:
+ @# Ask all targets to emit their test scripts
+ @# While building kselftest-list.text skip also non-existent TARGET dirs:
@# they could be the result of a build failure and should NOT be
@# included in the generated runlist.
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
[ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
- echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
- echo "cd $$TARGET" >> $(ALL_SCRIPT); \
- echo -n "run_many" >> $(ALL_SCRIPT); \
echo -n "Emit Tests for $$TARGET\n"; \
- $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
- echo "" >> $(ALL_SCRIPT); \
- echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+ $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \
+ -C $$TARGET emit_tests >> $(TEST_LIST); \
done;
-
- chmod u+x $(ALL_SCRIPT)
else
$(error Error: set INSTALL_PATH to use install)
endif
+FORMAT ?= .gz
+TAR_PATH = $(abspath ${INSTALL_PATH}/kselftest-packages/kselftest.tar${FORMAT})
+gen_tar: install
+ @mkdir -p ${INSTALL_PATH}/kselftest-packages/
+ @tar caf ${TAR_PATH} --exclude=kselftest-packages -C ${INSTALL_PATH} .
+ @echo "Created ${TAR_PATH}"
+
clean:
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
done;
-.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean
+.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 93b567d23c8b..2c9d012797a7 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal
+ARM64_SUBTARGETS ?= tags signal pauth fp mte
else
ARM64_SUBTARGETS :=
endif
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
new file mode 100644
index 000000000000..d66f76d2a650
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -0,0 +1,5 @@
+fpsimd-test
+sve-probe-vls
+sve-ptrace
+sve-test
+vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
new file mode 100644
index 000000000000..a57009d3a0dc
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls
+TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+
+all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+
+fpsimd-test: fpsimd-test.o
+ $(CC) -nostdlib $^ -o $@
+sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
+sve-probe-vls: sve-probe-vls.o
+sve-test: sve-test.o
+ $(CC) -nostdlib $^ -o $@
+vlset: vlset.o
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README
new file mode 100644
index 000000000000..03e3dad865d8
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/README
@@ -0,0 +1,100 @@
+This directory contains a mix of tests integrated with kselftest and
+standalone stress tests.
+
+kselftest tests
+===============
+
+sve-probe-vls - Checks the SVE vector length enumeration interface
+sve-ptrace - Checks the SVE ptrace interface
+
+Running the non-kselftest tests
+===============================
+
+sve-stress performs an SVE context switch stress test, as described
+below.
+
+(The fpsimd-stress test works the same way; just substitute "fpsimd" for
+"sve" in the following commands.)
+
+
+The test runs until killed by the user.
+
+If no context switch error was detected, you will see output such as
+the following:
+
+$ ./sve-stress
+(wait for some time)
+^C
+Vector length: 512 bits
+PID: 1573
+Terminated by signal 15, no error, iterations=9467, signals=1014
+Vector length: 512 bits
+PID: 1575
+Terminated by signal 15, no error, iterations=9448, signals=1028
+Vector length: 512 bits
+PID: 1577
+Terminated by signal 15, no error, iterations=9436, signals=1039
+Vector length: 512 bits
+PID: 1579
+Terminated by signal 15, no error, iterations=9421, signals=1039
+Vector length: 512 bits
+PID: 1581
+Terminated by signal 15, no error, iterations=9403, signals=1039
+Vector length: 512 bits
+PID: 1583
+Terminated by signal 15, no error, iterations=9385, signals=1036
+Vector length: 512 bits
+PID: 1585
+Terminated by signal 15, no error, iterations=9376, signals=1039
+Vector length: 512 bits
+PID: 1587
+Terminated by signal 15, no error, iterations=9361, signals=1039
+Vector length: 512 bits
+PID: 1589
+Terminated by signal 15, no error, iterations=9350, signals=1039
+
+
+If an error was detected, details of the mismatch will be printed
+instead of "no error".
+
+Ideally, the test should be allowed to run for many minutes or hours
+to maximise test coverage.
+
+
+KVM stress testing
+==================
+
+To try to reproduce the bugs that we have been observing, sve-stress
+should be run in parallel in two KVM guests, while simultaneously
+running on the host.
+
+1) Start 2 guests, using the following command for each:
+
+$ lkvm run --console=virtio -pconsole=hvc0 --sve Image
+
+(Depending on the hardware GIC implementation, you may also need
+--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I
+can't remember whether my branch is new enough for that. Try without
+the option first.)
+
+Kvmtool occupies the terminal until you kill it (Ctrl+A x),
+or until the guest terminates. It is therefore recommended to run
+each instance in separate terminal (use screen or ssh etc.) This
+allows multiple guests to be run in parallel while running other
+commands on the host.
+
+Within the guest, the host filesystem is accessible, mounted on /host.
+
+2) Run the sve-stress on *each* guest with the Vector-Length set to 32:
+guest$ ./vlset --inherit 32 ./sve-stress
+
+3) Run the sve-stress on the host with the maximum Vector-Length:
+host$ ./vlset --inherit --max ./sve-stress
+
+
+Again, the test should be allowed to run for many minutes or hours to
+maximise test coverage.
+
+If no error is detected, you will see output from each sve-stress
+instance similar to that illustrated above; otherwise details of the
+observed mismatches will be printed.
diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h
new file mode 100644
index 000000000000..a180851496ec
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-offsets.h
@@ -0,0 +1,11 @@
+#define sa_sz 32
+#define sa_flags 8
+#define sa_handler 0
+#define sa_mask_sz 8
+#define SIGUSR1 10
+#define SIGTERM 15
+#define SIGINT 2
+#define SIGABRT 6
+#define SA_NODEFER 1073741824
+#define SA_SIGINFO 4
+#define ucontext_regs 184
diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
new file mode 100644
index 000000000000..8944f2189206
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/assembler.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+.macro __for from:req, to:req
+ .if (\from) == (\to)
+ _for__body %\from
+ .else
+ __for \from, %(\from) + ((\to) - (\from)) / 2
+ __for %(\from) + ((\to) - (\from)) / 2 + 1, \to
+ .endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+ .macro _for__body \var:req
+ .noaltmacro
+ \insn
+ .altmacro
+ .endm
+
+ .altmacro
+ __for \from, \to
+ .noaltmacro
+
+ .purgem _for__body
+.endm
+
+.macro function name
+ .macro endfunction
+ .type \name, @function
+ .purgem endfunction
+ .endm
+\name:
+.endm
+
+.macro define_accessor name, num, insn
+ .macro \name\()_entry n
+ \insn \n, 1
+ ret
+ .endm
+
+function \name
+ adr x2, .L__accessor_tbl\@
+ add x2, x2, x0, lsl #3
+ br x2
+
+.L__accessor_tbl\@:
+ _for x, 0, (\num) - 1, \name\()_entry \x
+endfunction
+
+ .purgem \name\()_entry
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress
new file mode 100755
index 000000000000..781b5b022eaf
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-stress
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+trap child_died CHLD
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./fpsimd-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
new file mode 100644
index 000000000000..1c5556bdd11d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple FPSIMD context switch test
+// Repeatedly writes unique test patterns into each FPSIMD register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NVR 32
+#define MAXVL_B (128 / 8)
+
+.macro _vldr Vn:req, Xt:req
+ ld1 {v\Vn\().2d}, [x\Xt]
+.endm
+
+.macro _vstr Vn:req, Xt:req
+ st1 {v\Vn\().2d}, [x\Xt]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// FPSIMD registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getv,setp) or store to (setv,setp)
+// All clobber x0-x2
+define_accessor setv, NVR, _vldr
+define_accessor getv, NVR, _vstr
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+ mov x1, sp
+ str x30, [sp, #-32]! // Result can't be > 20 digits
+
+ mov x2, #0
+ strb w2, [x1, #-1]! // Write the NUL terminator
+
+ mov x2, #10
+0: udiv x3, x0, x2 // div-mod loop to generate the digits
+ msub x0, x3, x2, x0
+ add w0, w0, #'0'
+ strb w0, [x1, #-1]!
+ mov x0, x3
+ cbnz x3, 0b
+
+ ldrb w0, [x1]
+ cbnz w0, 1f
+ mov w0, #'0' // Print "0" for 0, not ""
+ strb w0, [x1, #-1]!
+
+1: mov x0, x1
+ bl puts
+
+ ldr x30, [sp], #32
+ ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+ mov x5, x30
+
+ bl putdec
+ mov x0, #'\n'
+ bl putc
+
+ ret x5
+endfunction
+
+
+// Clobbers x0-x3,x8
+function puthexb
+ str x30, [sp, #-0x10]!
+
+ mov w3, w0
+ lsr w0, w0, #4
+ bl puthexnibble
+ mov w0, w3
+
+ ldr x30, [sp], #0x10
+ // fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+ and w0, w0, #0xf
+ cmp w0, #10
+ blo 1f
+ add w0, w0, #'a' - ('9' + 1)
+1: add w0, w0, #'0'
+ b putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+ str x30, [sp, #-0x10]!
+
+ mov x4, x0
+ mov x5, x1
+
+0: subs x5, x5, #1
+ b.lo 1f
+ ldrb w0, [x4], #1
+ bl puthexb
+ b 0b
+
+1: ldr x30, [sp], #0x10
+ ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+vref:
+ .space MAXVL_B * NVR
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid (16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+function pattern
+ orr w1, w0, w1, lsl #16
+ orr w2, w1, w2, lsl #28
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w2, [x0], #4
+ add w2, w2, #(1 << 22)
+ subs w1, w1, #1
+ bne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for FPSIMD V-register V<xn>
+.macro _adrv xd, xn, nrtmp
+ ldr \xd, =vref
+ mov x\nrtmp, #16
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a FPSIMD V-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_vreg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrv x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setv
+
+ ret x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+ mov w2, #0xae
+ b memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+ mov w2, #0
+endfunction
+ // fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+ cmp x1, #0
+ b.eq 1f
+
+0: strb w2, [x0], #1
+ subs x1, x1, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 1f
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne barf
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Verify that a FPSIMD V-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x5.
+function check_vreg
+ mov x3, x30
+
+ _adrv x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getv
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random V-regs
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #7
+ movi v9.16b, #9
+ movi v31.8b, #31
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+ // Sanity-check and report the vector length
+
+ mov x19, #128
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "Bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "Vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+
+ mov x21, #0 // Set up V-regs & shadow with test pattern
+0: mov x0, x20
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_vreg
+ add x21, x21, #1
+ cmp x21, #NVR
+ b.lo 0b
+
+// Can't do this when SVE state is volatile across SVC:
+ mov x8, #__NR_sched_yield // Encourage preemption
+ svc #0
+
+ mov x21, #0
+0: mov x0, x21
+ bl check_vreg
+ add x21, x21, #1
+ cmp x21, #NVR
+ b.lo 0b
+
+ add x22, x22, #1
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mistatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", reg="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
new file mode 100644
index 000000000000..b29cbc642c57
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/sigcontext.h>
+
+#include "../../kselftest.h"
+
+int main(int argc, char **argv)
+{
+ unsigned int vq;
+ int vl;
+ static unsigned int vqs[SVE_VQ_MAX];
+ unsigned int nvqs = 0;
+
+ ksft_print_header();
+ ksft_set_plan(2);
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ ksft_exit_skip("SVE not available");
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (!sve_vl_valid(vl))
+ ksft_exit_fail_msg("VL %d invalid\n", vl);
+ vq = sve_vq_from_vl(vl);
+
+ if (!(nvqs < SVE_VQ_MAX))
+ ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n",
+ nvqs);
+ vqs[nvqs++] = vq;
+ }
+ ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs);
+ ksft_test_result_pass("All vector lengths valid\n");
+
+ /* Print out the vector lengths in ascending order: */
+ while (nvqs--)
+ ksft_print_msg("%u\n", 16 * vqs[nvqs]);
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
new file mode 100644
index 000000000000..3e81f9fab574
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+#include <asm/unistd.h>
+
+.arch_extension sve
+
+.globl sve_store_patterns
+
+sve_store_patterns:
+ mov x1, x0
+
+ index z0.b, #0, #1
+ str q0, [x1]
+
+ mov w8, #__NR_getpid
+ svc #0
+ str q0, [x1, #0x10]
+
+ mov z1.d, z0.d
+ str q0, [x1, #0x20]
+
+ mov w8, #__NR_getpid
+ svc #0
+ str q0, [x1, #0x30]
+
+ mov z1.d, z0.d
+ str q0, [x1, #0x40]
+
+ ret
+
+.size sve_store_patterns, . - sve_store_patterns
+.type sve_store_patterns, @function
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
new file mode 100644
index 000000000000..b2282be6f938
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_SVE
+#define NT_ARM_SVE 0x405
+#endif
+
+/* Number of registers filled in by sve_store_patterns */
+#define NR_VREGS 5
+
+void sve_store_patterns(__uint128_t v[NR_VREGS]);
+
+static void dump(const void *buf, size_t size)
+{
+ size_t i;
+ const unsigned char *p = buf;
+
+ for (i = 0; i < size; ++i)
+ printf(" %.2x", *p++);
+}
+
+static int check_vregs(const __uint128_t vregs[NR_VREGS])
+{
+ int i;
+ int ok = 1;
+
+ for (i = 0; i < NR_VREGS; ++i) {
+ printf("# v[%d]:", i);
+ dump(&vregs[i], sizeof vregs[i]);
+ putchar('\n');
+
+ if (vregs[i] != vregs[0])
+ ok = 0;
+ }
+
+ return ok;
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+ return EXIT_SUCCESS;
+}
+
+static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
+{
+ struct user_sve_header *sve;
+ void *p;
+ size_t sz = sizeof *sve;
+ struct iovec iov;
+
+ while (1) {
+ if (*size < sz) {
+ p = realloc(*buf, sz);
+ if (!p) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ *buf = p;
+ *size = sz;
+ }
+
+ iov.iov_base = *buf;
+ iov.iov_len = sz;
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov))
+ goto error;
+
+ sve = *buf;
+ if (sve->size <= sz)
+ break;
+
+ sz = sve->size;
+ }
+
+ return sve;
+
+error:
+ return NULL;
+}
+
+static int set_sve(pid_t pid, const struct user_sve_header *sve)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)sve;
+ iov.iov_len = sve->size;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
+}
+
+static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num,
+ unsigned int vlmax)
+{
+ unsigned int vq;
+ unsigned int i;
+
+ if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+ ksft_exit_fail_msg("Dumping non-SVE register\n");
+
+ if (vlmax > sve->vl)
+ vlmax = sve->vl;
+
+ vq = sve_vq_from_vl(sve->vl);
+ for (i = 0; i < num; ++i) {
+ printf("# z%u:", i);
+ dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ vlmax);
+ printf("%s\n", vlmax == sve->vl ? "" : " ...");
+ }
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+ void *svebuf = NULL, *newsvebuf;
+ size_t svebufsz = 0, newsvebufsz;
+ struct user_sve_header *sve, *new_sve;
+ struct user_fpsimd_state *fpsimd;
+ unsigned int i, j;
+ unsigned char *p;
+ unsigned int vq;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n",
+ status);
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ sve = get_sve(pid, &svebuf, &svebufsz);
+ if (!sve) {
+ int e = errno;
+
+ ksft_test_result_fail("get_sve: %s\n", strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ } else {
+ ksft_test_result_pass("get_sve\n");
+ }
+
+ ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
+ "FPSIMD registers\n");
+ if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
+ goto error;
+
+ fpsimd = (struct user_fpsimd_state *)((char *)sve +
+ SVE_PT_FPSIMD_OFFSET);
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&fpsimd->vregs[i];
+
+ for (j = 0; j < sizeof fpsimd->vregs[i]; ++j)
+ p[j] = j;
+ }
+
+ if (set_sve(pid, sve)) {
+ int e = errno;
+
+ ksft_test_result_fail("set_sve(FPSIMD): %s\n",
+ strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ }
+
+ vq = sve_vq_from_vl(sve->vl);
+
+ newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+ new_sve = newsvebuf = malloc(newsvebufsz);
+ if (!new_sve) {
+ errno = ENOMEM;
+ perror(NULL);
+ goto error;
+ }
+
+ *new_sve = *sve;
+ new_sve->flags &= ~SVE_PT_REGS_MASK;
+ new_sve->flags |= SVE_PT_REGS_SVE;
+ memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+ 0, SVE_PT_SVE_ZREG_SIZE(vq));
+ new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+ if (set_sve(pid, new_sve)) {
+ int e = errno;
+
+ ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ }
+
+ new_sve = get_sve(pid, &newsvebuf, &newsvebufsz);
+ if (!new_sve) {
+ int e = errno;
+
+ ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ }
+
+ ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE,
+ "SVE registers\n");
+ if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+ goto error;
+
+ dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]);
+
+ p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+ for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) {
+ unsigned char expected = i;
+
+ if (__BYTE_ORDER == __BIG_ENDIAN)
+ expected = sizeof fpsimd->vregs[0] - 1 - expected;
+
+ ksft_test_result(p[i] == expected, "p[%d] == expected\n", i);
+ if (p[i] != expected)
+ goto error;
+ }
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ __uint128_t v[NR_VREGS];
+ pid_t child;
+
+ ksft_print_header();
+ ksft_set_plan(20);
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ ksft_exit_skip("SVE not available\n");
+
+ sve_store_patterns(v);
+
+ if (!check_vregs(v))
+ ksft_exit_fail_msg("Initial check_vregs() failed\n");
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress
new file mode 100755
index 000000000000..24dd0922cc02
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./sve-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
new file mode 100644
index 000000000000..f95074c9b48b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple Scalable Vector Extension context switch test
+// Repeatedly writes unique test patterns into each SVE register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NZR 32
+#define NPR 16
+#define MAXVL_B (2048 / 8)
+
+.arch_extension sve
+
+.macro _sve_ldr_v zt, xn
+ ldr z\zt, [x\xn]
+.endm
+
+.macro _sve_str_v zt, xn
+ str z\zt, [x\xn]
+.endm
+
+.macro _sve_ldr_p pt, xn
+ ldr p\pt, [x\xn]
+.endm
+
+.macro _sve_str_p pt, xn
+ str p\pt, [x\xn]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// SVE registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
+// All clobber x0-x2
+define_accessor setz, NZR, _sve_ldr_v
+define_accessor getz, NZR, _sve_str_v
+define_accessor setp, NPR, _sve_ldr_p
+define_accessor getp, NPR, _sve_str_p
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+ mov x1, sp
+ str x30, [sp, #-32]! // Result can't be > 20 digits
+
+ mov x2, #0
+ strb w2, [x1, #-1]! // Write the NUL terminator
+
+ mov x2, #10
+0: udiv x3, x0, x2 // div-mod loop to generate the digits
+ msub x0, x3, x2, x0
+ add w0, w0, #'0'
+ strb w0, [x1, #-1]!
+ mov x0, x3
+ cbnz x3, 0b
+
+ ldrb w0, [x1]
+ cbnz w0, 1f
+ mov w0, #'0' // Print "0" for 0, not ""
+ strb w0, [x1, #-1]!
+
+1: mov x0, x1
+ bl puts
+
+ ldr x30, [sp], #32
+ ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+ mov x5, x30
+
+ bl putdec
+ mov x0, #'\n'
+ bl putc
+
+ ret x5
+endfunction
+
+// Clobbers x0-x3,x8
+function puthexb
+ str x30, [sp, #-0x10]!
+
+ mov w3, w0
+ lsr w0, w0, #4
+ bl puthexnibble
+ mov w0, w3
+
+ ldr x30, [sp], #0x10
+ // fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+ and w0, w0, #0xf
+ cmp w0, #10
+ blo 1f
+ add w0, w0, #'a' - ('9' + 1)
+1: add w0, w0, #'0'
+ b putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+ str x30, [sp, #-0x10]!
+
+ mov x4, x0
+ mov x5, x1
+
+0: subs x5, x5, #1
+ b.lo 1f
+ ldrb w0, [x4], #1
+ bl puthexb
+ b 0b
+
+1: ldr x30, [sp], #0x10
+ ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+zref:
+ .space MAXVL_B * NZR
+pref:
+ .space MAXVL_B / 8 * NPR
+ffrref:
+ .space MAXVL_B / 8
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid (16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28 generation number (increments once per test_loop)
+// bits 27:22 32-bit lane index
+// bits 21:16 register number
+// bits 15: 0 pid
+
+function pattern
+ orr w1, w0, w1, lsl #16
+ orr w2, w1, w2, lsl #28
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w2, [x0], #4
+ add w2, w2, #(1 << 22)
+ subs w1, w1, #1
+ bne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for SVE Z-register Z<xn>
+.macro _adrz xd, xn, nrtmp
+ ldr \xd, =zref
+ rdvl x\nrtmp, #1
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Get the address of shadow data for SVE P-register P<xn - NZR>
+.macro _adrp xd, xn, nrtmp
+ ldr \xd, =pref
+ rdvl x\nrtmp, #1
+ lsr x\nrtmp, x\nrtmp, #3
+ sub \xn, \xn, #NZR
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a SVE Z-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_zreg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrz x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setz
+
+ ret x4
+endfunction
+
+// Set up test pattern in a SVE P-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_preg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrp x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setp
+
+ ret x4
+endfunction
+
+// Set up test pattern in the FFR
+// x0: pid
+// x2: generation
+// Beware: corrupts P0.
+function setup_ffr
+ mov x4, x30
+
+ bl pattern
+ ldr x0, =ffrref
+ ldr x1, =scratch
+ rdvl x2, #1
+ lsr x2, x2, #3
+ bl memcpy
+
+ mov x0, #0
+ ldr x1, =ffrref
+ bl setp
+
+ wrffr p0.b
+
+ ret x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+ mov w2, #0xae
+ b memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+ mov w2, #0
+endfunction
+ // fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+ cmp x1, #0
+ b.eq 1f
+
+0: strb w2, [x0], #1
+ subs x1, x1, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 2f
+
+ stp x0, x1, [sp, #-0x20]!
+ str x2, [sp, #0x10]
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne 1f
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ldr x2, [sp, #0x10]
+ ldp x0, x1, [sp], #0x20
+ b.ne barf
+
+2: ret
+endfunction
+
+// Verify that a SVE Z-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_zreg
+ mov x3, x30
+
+ _adrz x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getz
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Verify that a SVE P-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_preg
+ mov x3, x30
+
+ _adrp x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getp
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Verify that the FFR matches its shadow in memory, else abort
+// Beware -- corrupts P0.
+// Clobbers x0-x5.
+function check_ffr
+ mov x3, x30
+
+ ldr x4, =scratch
+ rdvl x5, #1
+ lsr x5, x5, #3
+
+ mov x0, x4
+ mov x1, x5
+ bl memfill_ae
+
+ rdffr p0.b
+ mov x0, #0
+ mov x1, x4
+ bl getp
+
+ ldr x0, =ffrref
+ mov x1, x4
+ mov x2, x5
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random Z-regs
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #1
+ movi v9.16b, #2
+ movi v31.8b, #3
+ // And P0
+ rdffr p0.b
+ // And FFR
+ wrffr p15.b
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+ // Sanity-check and report the vector length
+
+ rdvl x19, #8
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "Bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "Vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+ rdvl x0, #8
+ cmp x0, x19
+ b.ne vl_barf
+
+ mov x21, #0 // Set up Z-regs & shadow with test pattern
+0: mov x0, x20
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_zreg
+ add x21, x21, #1
+ cmp x21, #NZR
+ b.lo 0b
+
+ mov x0, x20 // Set up FFR & shadow with test pattern
+ mov x1, #NZR + NPR
+ and x2, x22, #0xf
+ bl setup_ffr
+
+0: mov x0, x20 // Set up P-regs & shadow with test pattern
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_preg
+ add x21, x21, #1
+ cmp x21, #NZR + NPR
+ b.lo 0b
+
+// Can't do this when SVE state is volatile across SVC:
+// mov x8, #__NR_sched_yield // Encourage preemption
+// svc #0
+
+ mov x21, #0
+0: mov x0, x21
+ bl check_zreg
+ add x21, x21, #1
+ cmp x21, #NZR
+ b.lo 0b
+
+0: mov x0, x21
+ bl check_preg
+ add x21, x21, #1
+ cmp x21, #NZR + NPR
+ b.lo 0b
+
+ bl check_ffr
+
+ add x22, x22, #1
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// end hack
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mistatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", reg="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+
+ mov x8, #__NR_getpid
+ svc #0
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// ^ end of hack
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+// mov x8, #__NR_exit
+// mov x1, #1
+// svc #0
+endfunction
+
+function vl_barf
+ mov x10, x0
+
+ puts "Bad active VL: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
new file mode 100644
index 000000000000..308d27a68226
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2019 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+
+static int inherit = 0;
+static int no_inherit = 0;
+static int force = 0;
+static unsigned long vl;
+
+static const struct option options[] = {
+ { "force", no_argument, NULL, 'f' },
+ { "inherit", no_argument, NULL, 'i' },
+ { "max", no_argument, NULL, 'M' },
+ { "no-inherit", no_argument, &no_inherit, 1 },
+ { "help", no_argument, NULL, '?' },
+ {}
+};
+
+static char const *program_name;
+
+static int parse_options(int argc, char **argv)
+{
+ int c;
+ char *rest;
+
+ program_name = strrchr(argv[0], '/');
+ if (program_name)
+ ++program_name;
+ else
+ program_name = argv[0];
+
+ while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1)
+ switch (c) {
+ case 'M': vl = SVE_VL_MAX; break;
+ case 'f': force = 1; break;
+ case 'i': inherit = 1; break;
+ case 0: break;
+ default: goto error;
+ }
+
+ if (inherit && no_inherit)
+ goto error;
+
+ if (!vl) {
+ /* vector length */
+ if (optind >= argc)
+ goto error;
+
+ errno = 0;
+ vl = strtoul(argv[optind], &rest, 0);
+ if (*rest) {
+ vl = ULONG_MAX;
+ errno = EINVAL;
+ }
+ if (vl == ULONG_MAX && errno) {
+ fprintf(stderr, "%s: %s: %s\n",
+ program_name, argv[optind], strerror(errno));
+ goto error;
+ }
+
+ ++optind;
+ }
+
+ /* command */
+ if (optind >= argc)
+ goto error;
+
+ return 0;
+
+error:
+ fprintf(stderr,
+ "Usage: %s [-f | --force] "
+ "[-i | --inherit | --no-inherit] "
+ "{-M | --max | <vector length>} "
+ "<command> [<arguments> ...]\n",
+ program_name);
+ return -1;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 126; /* same as sh(1) command-not-executable error */
+ long flags;
+ char *path;
+ int t, e;
+
+ if (parse_options(argc, argv))
+ return 2; /* same as sh(1) builtin incorrect-usage */
+
+ if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) {
+ fprintf(stderr, "%s: Invalid vector length %lu\n",
+ program_name, vl);
+ return 2; /* same as sh(1) builtin incorrect-usage */
+ }
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ fprintf(stderr, "%s: Scalable Vector Extension not present\n",
+ program_name);
+
+ if (!force)
+ goto error;
+
+ fputs("Going ahead anyway (--force): "
+ "This is a debug option. Don't rely on it.\n",
+ stderr);
+ }
+
+ flags = PR_SVE_SET_VL_ONEXEC;
+ if (inherit)
+ flags |= PR_SVE_VL_INHERIT;
+
+ t = prctl(PR_SVE_SET_VL, vl | flags);
+ if (t < 0) {
+ fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
+ program_name, strerror(errno));
+ goto error;
+ }
+
+ t = prctl(PR_SVE_GET_VL);
+ if (t == -1) {
+ fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
+ program_name, strerror(errno));
+ goto error;
+ }
+ flags = PR_SVE_VL_LEN_MASK;
+ flags = t & ~flags;
+
+ assert(optind < argc);
+ path = argv[optind];
+
+ execvp(path, &argv[optind]);
+ e = errno;
+ if (errno == ENOENT)
+ ret = 127; /* same as sh(1) not-found error */
+ fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e));
+
+error:
+ return ret; /* same as sh(1) not-executable error */
+}
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
new file mode 100644
index 000000000000..bc3ac63f3314
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -0,0 +1,6 @@
+check_buffer_fill
+check_tags_inclusion
+check_child_memory
+check_mmap_options
+check_ksm_options
+check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
new file mode 100644
index 000000000000..2480226dfe57
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+CFLAGS += -std=gnu99 -I.
+SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
+PROGS := $(patsubst %.c,%,$(SRCS))
+
+#Add mte compiler option
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
+CFLAGS += -march=armv8.5-a+memtag
+endif
+
+#check if the compiler works well
+mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(mte_cc_support),1)
+# Generated binaries to be installed by top KSFT script
+TEST_GEN_PROGS := $(PROGS)
+
+# Get Kernel headers installed and use them.
+KSFT_KHDR_INSTALL := 1
+endif
+
+# Include KSFT lib.mk.
+include ../../lib.mk
+
+ifeq ($(mte_cc_support),1)
+$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+endif
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
new file mode 100644
index 000000000000..242635d79035
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define OVERFLOW_RANGE MT_GRANULE_SIZE
+
+static int sizes[] = {
+ 1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+enum mte_block_test_alloc {
+ UNTAGGED_TAGGED,
+ TAGGED_UNTAGGED,
+ TAGGED_TAGGED,
+ BLOCK_ALLOC_MAX,
+};
+
+static int check_buffer_by_byte(int mem_type, int mode)
+{
+ char *ptr;
+ int i, j, item;
+ bool err;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true);
+ if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS)
+ return KSFT_FAIL;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]);
+ /* Set some value in tagged memory */
+ for (j = 0; j < sizes[i]; j++)
+ ptr[j] = '1';
+ mte_wait_after_trig();
+ err = cur_mte_cxt.fault_valid;
+ /* Check the buffer whether it is filled. */
+ for (j = 0; j < sizes[i] && !err; j++) {
+ if (ptr[j] != '1')
+ err = true;
+ }
+ mte_free_memory((void *)ptr, sizes[i], mem_type, true);
+
+ if (err)
+ break;
+ }
+ if (!err)
+ return KSFT_PASS;
+ else
+ return KSFT_FAIL;
+}
+
+static int check_buffer_underflow_by_byte(int mem_type, int mode,
+ int underflow_range)
+{
+ char *ptr;
+ int i, j, item, last_index;
+ bool err;
+ char *und_ptr = NULL;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+ underflow_range, 0);
+ if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+ underflow_range, 0) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range);
+ last_index = 0;
+ /* Set some value in tagged memory and make the buffer underflow */
+ for (j = sizes[i] - 1; (j >= -underflow_range) &&
+ (cur_mte_cxt.fault_valid == false); j--) {
+ ptr[j] = '1';
+ last_index = j;
+ }
+ mte_wait_after_trig();
+ err = false;
+ /* Check whether the buffer is filled */
+ for (j = 0; j < sizes[i]; j++) {
+ if (ptr[j] != '1') {
+ err = true;
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ j, ptr);
+ break;
+ }
+ }
+ if (err)
+ goto check_buffer_underflow_by_byte_err;
+
+ switch (mode) {
+ case MTE_NONE_ERR:
+ if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) {
+ err = true;
+ break;
+ }
+ /* There were no fault so the underflow area should be filled */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range);
+ for (j = 0 ; j < underflow_range; j++) {
+ if (und_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_ASYNC_ERR:
+ /* Imprecise fault should occur otherwise return error */
+ if (cur_mte_cxt.fault_valid == false) {
+ err = true;
+ break;
+ }
+ /*
+ * The imprecise fault is checked after the write to the buffer,
+ * so the underflow area before the fault should be filled.
+ */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ for (j = last_index ; j < 0 ; j++) {
+ if (und_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_SYNC_ERR:
+ /* Precise fault should occur otherwise return error */
+ if (!cur_mte_cxt.fault_valid || (last_index != (-1))) {
+ err = true;
+ break;
+ }
+ /* Underflow area should not be filled */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ if (und_ptr[-1] == '1')
+ err = true;
+ break;
+ default:
+ err = true;
+ break;
+ }
+check_buffer_underflow_by_byte_err:
+ mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0);
+ if (err)
+ break;
+ }
+ return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_overflow_by_byte(int mem_type, int mode,
+ int overflow_range)
+{
+ char *ptr;
+ int i, j, item, last_index;
+ bool err;
+ size_t tagged_size, overflow_size;
+ char *over_ptr = NULL;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+ 0, overflow_range);
+ if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+ 0, overflow_range) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ tagged_size = MT_ALIGN_UP(sizes[i]);
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range);
+
+ /* Set some value in tagged memory and make the buffer underflow */
+ for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) &&
+ (cur_mte_cxt.fault_valid == false); j++) {
+ ptr[j] = '1';
+ last_index = j;
+ }
+ mte_wait_after_trig();
+ err = false;
+ /* Check whether the buffer is filled */
+ for (j = 0; j < sizes[i]; j++) {
+ if (ptr[j] != '1') {
+ err = true;
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ j, ptr);
+ break;
+ }
+ }
+ if (err)
+ goto check_buffer_overflow_by_byte_err;
+
+ overflow_size = overflow_range - (tagged_size - sizes[i]);
+
+ switch (mode) {
+ case MTE_NONE_ERR:
+ if ((cur_mte_cxt.fault_valid == true) ||
+ (last_index != (sizes[i] + overflow_range - 1))) {
+ err = true;
+ break;
+ }
+ /* There were no fault so the overflow area should be filled */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+ for (j = 0 ; j < overflow_size; j++) {
+ if (over_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_ASYNC_ERR:
+ /* Imprecise fault should occur otherwise return error */
+ if (cur_mte_cxt.fault_valid == false) {
+ err = true;
+ break;
+ }
+ /*
+ * The imprecise fault is checked after the write to the buffer,
+ * so the overflow area should be filled before the fault.
+ */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ for (j = tagged_size ; j < last_index; j++) {
+ if (over_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_SYNC_ERR:
+ /* Precise fault should occur otherwise return error */
+ if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) {
+ err = true;
+ break;
+ }
+ /* Underflow area should not be filled */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+ for (j = 0 ; j < overflow_size; j++) {
+ if (over_ptr[j] == '1')
+ err = true;
+ }
+ break;
+ default:
+ err = true;
+ break;
+ }
+check_buffer_overflow_by_byte_err:
+ mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range);
+ if (err)
+ break;
+ }
+ return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size)
+{
+ char *src, *dst;
+ int j, result = KSFT_PASS;
+ enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED;
+
+ for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) {
+ switch (alloc_type) {
+ case UNTAGGED_TAGGED:
+ src = (char *)mte_allocate_memory(size, mem_type, 0, false);
+ if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)src, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+
+ break;
+ case TAGGED_UNTAGGED:
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, false);
+ if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)dst, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+ break;
+ case TAGGED_TAGGED:
+ src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)src, size, mem_type, true);
+ return KSFT_FAIL;
+ }
+ break;
+ default:
+ return KSFT_FAIL;
+ }
+
+ cur_mte_cxt.fault_valid = false;
+ result = KSFT_PASS;
+ mte_initialize_current_context(mode, (uintptr_t)dst, size);
+ /* Set some value in memory and copy*/
+ memset((void *)src, (int)'1', size);
+ memcpy((void *)dst, (void *)src, size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid) {
+ result = KSFT_FAIL;
+ goto check_buffer_by_block_err;
+ }
+ /* Check the buffer whether it is filled. */
+ for (j = 0; j < size; j++) {
+ if (src[j] != dst[j] || src[j] != '1') {
+ result = KSFT_FAIL;
+ break;
+ }
+ }
+check_buffer_by_block_err:
+ mte_free_memory((void *)src, size, mem_type,
+ MT_FETCH_TAG((uintptr_t)src) ? true : false);
+ mte_free_memory((void *)dst, size, mem_type,
+ MT_FETCH_TAG((uintptr_t)dst) ? true : false);
+ if (result != KSFT_PASS)
+ return result;
+ }
+ return result;
+}
+
+static int check_buffer_by_block(int mem_type, int mode)
+{
+ int i, item, result = KSFT_PASS;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+ cur_mte_cxt.fault_valid = false;
+ for (i = 0; i < item; i++) {
+ result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]);
+ if (result != KSFT_PASS)
+ break;
+ }
+ return result;
+}
+
+static int compare_memory_tags(char *ptr, size_t size, int tag)
+{
+ int i, new_tag;
+
+ for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+ new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+ if (tag != new_tag) {
+ ksft_print_msg("FAIL: child mte tag mismatch\n");
+ return KSFT_FAIL;
+ }
+ }
+ return KSFT_PASS;
+}
+
+static int check_memory_initial_tags(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int run, fd;
+ int total = sizeof(sizes)/sizeof(int);
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ /* check initial tags for anonymous mmap */
+ ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
+ if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+ if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ return KSFT_FAIL;
+ }
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+
+ /* check initial tags for file mmap */
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd);
+ if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ close(fd);
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ size_t page_size = getpagesize();
+ int item = sizeof(sizes)/sizeof(int);
+
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ /* Buffer by byte tests */
+ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR),
+ "Check buffer correctness by byte with sync err mode and mmap memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR),
+ "Check buffer correctness by byte with async err mode and mmap memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR),
+ "Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR),
+ "Check buffer correctness by byte with async err mode and mmap/mprotect memory\n");
+
+ /* Check buffer underflow with underflow size as 16 */
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+ /* Check buffer underflow with underflow size as page size */
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size),
+ "Check buffer write underflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size),
+ "Check buffer write underflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size),
+ "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+ /* Check buffer overflow with overflow size as 16 */
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n");
+
+ /* Buffer by block tests */
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR),
+ "Check buffer write correctness by block with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR),
+ "Check buffer write correctness by block with async mode and mmap memory\n");
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR),
+ "Check buffer write correctness by block with tag fault ignore and mmap memory\n");
+
+ /* Initial tags are supposed to be 0 */
+ evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check initial tags with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check initial tags with shared mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
new file mode 100644
index 000000000000..97bebdecd29e
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE (5 * MT_GRANULE_SIZE)
+#define RUNS (MT_TAG_COUNT)
+#define UNDERFLOW MT_GRANULE_SIZE
+#define OVERFLOW MT_GRANULE_SIZE
+
+static size_t page_size;
+static int sizes[] = {
+ 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_child_tag_inheritance(char *ptr, int size, int mode)
+{
+ int i, parent_tag, child_tag, fault, child_status;
+ pid_t child;
+
+ parent_tag = MT_FETCH_TAG((uintptr_t)ptr);
+ fault = 0;
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("FAIL: child process creation\n");
+ return KSFT_FAIL;
+ } else if (child == 0) {
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ /* Do copy on write */
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+ child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+ if (parent_tag != child_tag) {
+ ksft_print_msg("FAIL: child mte tag mismatch\n");
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memset(ptr + size, '3', OVERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+check_child_tag_inheritance_err:
+ _exit(fault);
+ }
+ /* Wait for child process to terminate */
+ wait(&child_status);
+ if (WIFEXITED(child_status))
+ fault = WEXITSTATUS(child_status);
+ else
+ fault = 1;
+ return (fault) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int check_child_memory_mapping(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int run, result;
+ int item = sizeof(sizes)/sizeof(int);
+
+ item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < item; run++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS)
+ return KSFT_FAIL;
+ result = check_child_tag_inheritance(ptr, sizes[run], mode);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ if (result == KSFT_FAIL)
+ return result;
+ }
+ return KSFT_PASS;
+}
+
+static int check_child_file_mapping(int mem_type, int mode, int mapping)
+{
+ char *ptr, *map_ptr;
+ int run, fd, map_size, result = KSFT_PASS;
+ int total = sizeof(sizes)/sizeof(int);
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on file based memory\n");
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_child_tag_inheritance(ptr, sizes[run], mode);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ int item = sizeof(sizes)/sizeof(int);
+
+ page_size = getpagesize();
+ if (!page_size) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler);
+
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+ evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, precise mode and mmap/mprotect memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
new file mode 100644
index 000000000000..bc41ae630c86
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define TEST_UNIT 10
+#define PATH_KSM "/sys/kernel/mm/ksm/"
+#define MAX_LOOP 4
+
+static size_t page_sz;
+static unsigned long ksm_sysfs[5];
+
+static unsigned long read_sysfs(char *str)
+{
+ FILE *f;
+ unsigned long val = 0;
+
+ f = fopen(str, "r");
+ if (!f) {
+ ksft_print_msg("ERR: missing %s\n", str);
+ return 0;
+ }
+ fscanf(f, "%lu", &val);
+ fclose(f);
+ return val;
+}
+
+static void write_sysfs(char *str, unsigned long val)
+{
+ FILE *f;
+
+ f = fopen(str, "w");
+ if (!f) {
+ ksft_print_msg("ERR: missing %s\n", str);
+ return;
+ }
+ fprintf(f, "%lu", val);
+ fclose(f);
+}
+
+static void mte_ksm_setup(void)
+{
+ ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes");
+ write_sysfs(PATH_KSM "merge_across_nodes", 1);
+ ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs");
+ write_sysfs(PATH_KSM "sleep_millisecs", 0);
+ ksm_sysfs[2] = read_sysfs(PATH_KSM "run");
+ write_sysfs(PATH_KSM "run", 1);
+ ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing");
+ write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT);
+ ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan");
+ write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT);
+}
+
+static void mte_ksm_restore(void)
+{
+ write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]);
+ write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]);
+ write_sysfs(PATH_KSM "run", ksm_sysfs[2]);
+ write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]);
+ write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]);
+}
+
+static void mte_ksm_scan(void)
+{
+ int cur_count = read_sysfs(PATH_KSM "full_scans");
+ int scan_count = cur_count + 1;
+ int max_loop_count = MAX_LOOP;
+
+ while ((cur_count < scan_count) && max_loop_count) {
+ sleep(1);
+ cur_count = read_sysfs(PATH_KSM "full_scans");
+ max_loop_count--;
+ }
+#ifdef DEBUG
+ ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n",
+ read_sysfs(PATH_KSM "pages_shared"),
+ read_sysfs(PATH_KSM "pages_sharing"));
+#endif
+}
+
+static int check_madvise_options(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int err, ret;
+
+ err = KSFT_FAIL;
+ if (access(PATH_KSM, F_OK) == -1) {
+ ksft_print_msg("ERR: Kernel KSM config not enabled\n");
+ return err;
+ }
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
+ if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ /* Insert same data in all the pages */
+ memset(ptr, 'A', TEST_UNIT * page_sz);
+ ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE);
+ if (ret) {
+ ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n");
+ goto madvise_err;
+ }
+ mte_ksm_scan();
+ /* Tagged pages should not merge */
+ if ((read_sysfs(PATH_KSM "pages_shared") < 1) ||
+ (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1)))
+ err = KSFT_PASS;
+madvise_err:
+ mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+ page_sz = getpagesize();
+ if (!page_sz) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler);
+ /* Enable KSM */
+ mte_ksm_setup();
+
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check KSM mte page merge for private mapping, sync mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check KSM mte page merge for private mapping, async mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check KSM mte page merge for shared mapping, sync mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check KSM mte page merge for shared mapping, async mode and mmap memory\n");
+
+ mte_ksm_restore();
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
new file mode 100644
index 000000000000..33b13b86199b
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define RUNS (MT_TAG_COUNT)
+#define UNDERFLOW MT_GRANULE_SIZE
+#define OVERFLOW MT_GRANULE_SIZE
+#define TAG_CHECK_ON 0
+#define TAG_CHECK_OFF 1
+
+static size_t page_size;
+static int sizes[] = {
+ 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+{
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+ return KSFT_FAIL;
+ if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memset(ptr + size, '3', OVERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+ return KSFT_FAIL;
+ if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+ char *ptr, *map_ptr;
+ int run, result, map_size;
+ int item = sizeof(sizes)/sizeof(int);
+
+ item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < item; run++) {
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n");
+ munmap((void *)map_ptr, map_size);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ mte_free_memory((void *)map_ptr, map_size, mem_type, false);
+ if (result == KSFT_FAIL)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+ char *ptr, *map_ptr;
+ int run, fd, map_size;
+ int total = sizeof(sizes)/sizeof(int);
+ int result = KSFT_PASS;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+
+ map_size = sizes[run] + UNDERFLOW + OVERFLOW;
+ map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on file based memory\n");
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ if (result == KSFT_FAIL)
+ break;
+ }
+ return result;
+}
+
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+{
+ char *ptr, *map_ptr;
+ int run, prot_flag, result, fd, map_size;
+ int total = sizeof(sizes)/sizeof(int);
+
+ prot_flag = PROT_READ | PROT_WRITE;
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS)
+ return KSFT_FAIL;
+ map_ptr = ptr - UNDERFLOW;
+ /* Try to clear PROT_MTE property and verify it by tag checking */
+ if (mprotect(map_ptr, map_size, prot_flag)) {
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW);
+ ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW, fd);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ map_ptr = ptr - UNDERFLOW;
+ /* Try to clear PROT_MTE property and verify it by tag checking */
+ if (mprotect(map_ptr, map_size, prot_flag)) {
+ ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ close(fd);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ int item = sizeof(sizes)/sizeof(int);
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+ page_size = getpagesize();
+ if (!page_size) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ mte_enable_pstate_tco();
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
+
+ mte_disable_pstate_tco();
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
+
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+ evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
new file mode 100644
index 000000000000..94d245a0ed56
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE (5 * MT_GRANULE_SIZE)
+#define RUNS (MT_TAG_COUNT * 2)
+#define MTE_LAST_TAG_MASK (0x7FFF)
+
+static int verify_mte_pointer_validity(char *ptr, int mode)
+{
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+ /* Check the validity of the tagged pointer */
+ memset((void *)ptr, '1', BUFFER_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ /* Proceed further for nonzero tags */
+ if (!MT_FETCH_TAG((uintptr_t)ptr))
+ return KSFT_PASS;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1);
+ /* Check the validity outside the range */
+ ptr[BUFFER_SIZE] = '2';
+ mte_wait_after_trig();
+ if (!cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ else
+ return KSFT_PASS;
+}
+
+static int check_single_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int tag, run, result = KSFT_PASS;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Check tag value */
+ if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
+ ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ MT_FETCH_TAG((uintptr_t)ptr),
+ MT_INCLUDE_VALID_TAG(tag));
+ result = KSFT_FAIL;
+ break;
+ }
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ }
+ mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_multiple_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int tag, run, result = KSFT_PASS;
+ unsigned long excl_mask = 0;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
+ excl_mask |= 1 << tag;
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Check tag value */
+ if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
+ ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ MT_FETCH_TAG((uintptr_t)ptr),
+ MT_INCLUDE_VALID_TAGS(excl_mask));
+ result = KSFT_FAIL;
+ break;
+ }
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ }
+ mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_all_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int run, result = KSFT_PASS;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /*
+ * Here tag byte can be between 0x0 to 0xF (full allowed range)
+ * so no need to match so just verify if it is writable.
+ */
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_none_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int run;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; run < RUNS; run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Here all tags exluded so tag value generated should be 0 */
+ if (MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: included tag value found\n");
+ mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true);
+ return KSFT_FAIL;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+ /* Check the write validity of the untagged pointer */
+ memset((void *)ptr, '1', BUFFER_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid)
+ break;
+ }
+ mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+ if (cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ else
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check an included tag value with sync mode\n");
+ evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check different included tags value with sync mode\n");
+ evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check none included tags value with sync mode\n");
+ evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check all included tags value with sync mode\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
new file mode 100644
index 000000000000..594e98e76880
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+static size_t page_sz;
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+{
+ int fd, i, err;
+ char val = 'A';
+ size_t len, read_len;
+ void *ptr, *ptr_next;
+
+ err = KSFT_FAIL;
+ len = 2 * page_sz;
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ for (i = 0; i < len; i++)
+ write(fd, &val, sizeof(val));
+ lseek(fd, 0, 0);
+ ptr = mte_allocate_memory(len, mem_type, mapping, true);
+ if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, len);
+ /* Copy from file into buffer with valid tag */
+ read_len = read(fd, ptr, len);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid || read_len < len)
+ goto usermem_acc_err;
+ /* Verify same pattern is read */
+ for (i = 0; i < len; i++)
+ if (*(char *)(ptr + i) != val)
+ break;
+ if (i < len)
+ goto usermem_acc_err;
+
+ /* Tag the next half of memory with different value */
+ ptr_next = (void *)((unsigned long)ptr + page_sz);
+ ptr_next = mte_insert_new_tag(ptr_next);
+ mte_set_tag_address_range(ptr_next, page_sz);
+
+ lseek(fd, 0, 0);
+ /* Copy from file into buffer with invalid tag */
+ read_len = read(fd, ptr, len);
+ mte_wait_after_trig();
+ /*
+ * Accessing user memory in kernel with invalid tag should fail in sync
+ * mode without fault but may not fail in async mode as per the
+ * implemented MTE userspace support in Arm64 kernel.
+ */
+ if (mode == MTE_SYNC_ERR &&
+ !cur_mte_cxt.fault_valid && read_len < len) {
+ err = KSFT_PASS;
+ } else if (mode == MTE_ASYNC_ERR &&
+ !cur_mte_cxt.fault_valid && read_len == len) {
+ err = KSFT_PASS;
+ }
+usermem_acc_err:
+ mte_free_memory((void *)ptr, len, mem_type, true);
+ close(fd);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ page_sz = getpagesize();
+ if (!page_sz) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ err = mte_default_setup();
+ if (err)
+ return err;
+ /* Register signal handlers */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check memory access from kernel in sync mode, private mapping and mmap memory\n");
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
+
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check memory access from kernel in async mode, private mapping and mmap memory\n");
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
new file mode 100644
index 000000000000..39f8908988ea
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/auxvec.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define INIT_BUFFER_SIZE 256
+
+struct mte_fault_cxt cur_mte_cxt;
+static unsigned int mte_cur_mode;
+static unsigned int mte_cur_pstate_tco;
+
+void mte_default_handler(int signum, siginfo_t *si, void *uc)
+{
+ unsigned long addr = (unsigned long)si->si_addr;
+
+ if (signum == SIGSEGV) {
+#ifdef DEBUG
+ ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+#endif
+ if (si->si_code == SEGV_MTEAERR) {
+ if (cur_mte_cxt.trig_si_code == si->si_code)
+ cur_mte_cxt.fault_valid = true;
+ return;
+ }
+ /* Compare the context for precise error */
+ else if (si->si_code == SEGV_MTESERR) {
+ if (cur_mte_cxt.trig_si_code == si->si_code &&
+ ((cur_mte_cxt.trig_range >= 0 &&
+ addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ (cur_mte_cxt.trig_range < 0 &&
+ addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+ cur_mte_cxt.fault_valid = true;
+ /* Adjust the pc by 4 */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ } else {
+ ksft_print_msg("Invalid MTE synchronous exception caught!\n");
+ exit(1);
+ }
+ } else {
+ ksft_print_msg("Unknown SIGSEGV exception caught!\n");
+ exit(1);
+ }
+ } else if (signum == SIGBUS) {
+ ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+ if ((cur_mte_cxt.trig_range >= 0 &&
+ addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ (cur_mte_cxt.trig_range < 0 &&
+ addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+ cur_mte_cxt.fault_valid = true;
+ /* Adjust the pc by 4 */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ }
+ }
+}
+
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(signal, &sa, NULL);
+}
+
+void mte_wait_after_trig(void)
+{
+ sched_yield();
+}
+
+void *mte_insert_tags(void *ptr, size_t size)
+{
+ void *tag_ptr;
+ int align_size;
+
+ if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+ ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ return NULL;
+ }
+ align_size = MT_ALIGN_UP(size);
+ tag_ptr = mte_insert_random_tag(ptr);
+ mte_set_tag_address_range(tag_ptr, align_size);
+ return tag_ptr;
+}
+
+void mte_clear_tags(void *ptr, size_t size)
+{
+ if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+ ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ return;
+ }
+ size = MT_ALIGN_UP(size);
+ ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr);
+ mte_clear_tag_address_range(ptr, size);
+}
+
+static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after,
+ bool tags, int fd)
+{
+ void *ptr;
+ int prot_flag, map_flag;
+ size_t entire_size = size + range_before + range_after;
+
+ if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
+ mem_type != USE_MPROTECT) {
+ ksft_print_msg("FAIL: Invalid allocate request\n");
+ return NULL;
+ }
+ if (mem_type == USE_MALLOC)
+ return malloc(entire_size) + range_before;
+
+ prot_flag = PROT_READ | PROT_WRITE;
+ if (mem_type == USE_MMAP)
+ prot_flag |= PROT_MTE;
+
+ map_flag = mapping;
+ if (fd == -1)
+ map_flag = MAP_ANONYMOUS | map_flag;
+ if (!(mapping & MAP_SHARED))
+ map_flag |= MAP_PRIVATE;
+ ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0);
+ if (ptr == MAP_FAILED) {
+ ksft_print_msg("FAIL: mmap allocation\n");
+ return NULL;
+ }
+ if (mem_type == USE_MPROTECT) {
+ if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) {
+ munmap(ptr, size);
+ ksft_print_msg("FAIL: mprotect PROT_MTE property\n");
+ return NULL;
+ }
+ }
+ if (tags)
+ ptr = mte_insert_tags(ptr + range_before, size);
+ return ptr;
+}
+
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after)
+{
+ return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+ range_after, true, -1);
+}
+
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags)
+{
+ return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1);
+}
+
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd)
+{
+ int index;
+ char buffer[INIT_BUFFER_SIZE];
+
+ if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+ ksft_print_msg("FAIL: Invalid mmap file request\n");
+ return NULL;
+ }
+ /* Initialize the file for mappable size */
+ lseek(fd, 0, SEEK_SET);
+ for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
+ write(fd, buffer, INIT_BUFFER_SIZE);
+ index -= INIT_BUFFER_SIZE;
+ write(fd, buffer, size - index);
+ return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
+}
+
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after, int fd)
+{
+ int index;
+ char buffer[INIT_BUFFER_SIZE];
+ int map_size = size + range_before + range_after;
+
+ if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+ ksft_print_msg("FAIL: Invalid mmap file request\n");
+ return NULL;
+ }
+ /* Initialize the file for mappable size */
+ lseek(fd, 0, SEEK_SET);
+ for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
+ write(fd, buffer, INIT_BUFFER_SIZE);
+ index -= INIT_BUFFER_SIZE;
+ write(fd, buffer, map_size - index);
+ return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+ range_after, true, fd);
+}
+
+static void __mte_free_memory_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after, bool tags)
+{
+ switch (mem_type) {
+ case USE_MALLOC:
+ free(ptr - range_before);
+ break;
+ case USE_MMAP:
+ case USE_MPROTECT:
+ if (tags)
+ mte_clear_tags(ptr, size);
+ munmap(ptr - range_before, size + range_before + range_after);
+ break;
+ default:
+ ksft_print_msg("FAIL: Invalid free request\n");
+ break;
+ }
+}
+
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after)
+{
+ __mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true);
+}
+
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags)
+{
+ __mte_free_memory_range(ptr, size, mem_type, 0, 0, tags);
+}
+
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
+{
+ cur_mte_cxt.fault_valid = false;
+ cur_mte_cxt.trig_addr = ptr;
+ cur_mte_cxt.trig_range = range;
+ if (mode == MTE_SYNC_ERR)
+ cur_mte_cxt.trig_si_code = SEGV_MTESERR;
+ else if (mode == MTE_ASYNC_ERR)
+ cur_mte_cxt.trig_si_code = SEGV_MTEAERR;
+ else
+ cur_mte_cxt.trig_si_code = 0;
+}
+
+int mte_switch_mode(int mte_option, unsigned long incl_mask)
+{
+ unsigned long en = 0;
+
+ if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
+ mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
+ ksft_print_msg("FAIL: Invalid mte config option\n");
+ return -EINVAL;
+ }
+ en = PR_TAGGED_ADDR_ENABLE;
+ if (mte_option == MTE_SYNC_ERR)
+ en |= PR_MTE_TCF_SYNC;
+ else if (mte_option == MTE_ASYNC_ERR)
+ en |= PR_MTE_TCF_ASYNC;
+ else if (mte_option == MTE_NONE_ERR)
+ en |= PR_MTE_TCF_NONE;
+
+ en |= (incl_mask << PR_MTE_TAG_SHIFT);
+ /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
+ if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+ ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#define ID_AA64PFR1_MTE_SHIFT 8
+#define ID_AA64PFR1_MTE 2
+
+int mte_default_setup(void)
+{
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+ unsigned long en = 0;
+ int ret;
+
+ if (!(hwcaps & HWCAP_CPUID)) {
+ ksft_print_msg("FAIL: CPUID registers unavailable\n");
+ return KSFT_FAIL;
+ }
+ /* Read ID_AA64PFR1_EL1 register */
+ asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
+ if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
+ ksft_print_msg("FAIL: MTE features unavailable\n");
+ return KSFT_SKIP;
+ }
+ /* Get current mte mode */
+ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
+ if (ret < 0) {
+ ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret);
+ return KSFT_FAIL;
+ }
+ if (ret & PR_MTE_TCF_SYNC)
+ mte_cur_mode = MTE_SYNC_ERR;
+ else if (ret & PR_MTE_TCF_ASYNC)
+ mte_cur_mode = MTE_ASYNC_ERR;
+ else if (ret & PR_MTE_TCF_NONE)
+ mte_cur_mode = MTE_NONE_ERR;
+
+ mte_cur_pstate_tco = mte_get_pstate_tco();
+ /* Disable PSTATE.TCO */
+ mte_disable_pstate_tco();
+ return 0;
+}
+
+void mte_restore_setup(void)
+{
+ mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
+ mte_enable_pstate_tco();
+ else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
+ mte_disable_pstate_tco();
+}
+
+int create_temp_file(void)
+{
+ int fd;
+ char filename[] = "/dev/shm/tmp_XXXXXX";
+
+ /* Create a file in the tmpfs filesystem */
+ fd = mkstemp(&filename[0]);
+ if (fd == -1) {
+ ksft_print_msg("FAIL: Unable to open temporary file\n");
+ return 0;
+ }
+ unlink(&filename[0]);
+ return fd;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
new file mode 100644
index 000000000000..195a7d1879e6
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _MTE_COMMON_UTIL_H
+#define _MTE_COMMON_UTIL_H
+
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include "mte_def.h"
+#include "kselftest.h"
+
+enum mte_mem_type {
+ USE_MALLOC,
+ USE_MMAP,
+ USE_MPROTECT,
+};
+
+enum mte_mode {
+ MTE_NONE_ERR,
+ MTE_SYNC_ERR,
+ MTE_ASYNC_ERR,
+};
+
+struct mte_fault_cxt {
+ /* Address start which triggers mte tag fault */
+ unsigned long trig_addr;
+ /* Address range for mte tag fault and negative value means underflow */
+ ssize_t trig_range;
+ /* siginfo si code */
+ unsigned long trig_si_code;
+ /* Flag to denote if correct fault caught */
+ bool fault_valid;
+};
+
+extern struct mte_fault_cxt cur_mte_cxt;
+
+/* MTE utility functions */
+void mte_default_handler(int signum, siginfo_t *si, void *uc);
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_wait_after_trig(void);
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after);
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping,
+ bool tags, int fd);
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after, int fd);
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags);
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after);
+void *mte_insert_tags(void *ptr, size_t size);
+void mte_clear_tags(void *ptr, size_t size);
+int mte_default_setup(void);
+void mte_restore_setup(void);
+int mte_switch_mode(int mte_option, unsigned long incl_mask);
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
+
+/* Common utility functions */
+int create_temp_file(void);
+
+/* Assembly MTE utility functions */
+void *mte_insert_random_tag(void *ptr);
+void *mte_insert_new_tag(void *ptr);
+void *mte_get_tag_address(void *ptr);
+void mte_set_tag_address_range(void *ptr, int range);
+void mte_clear_tag_address_range(void *ptr, int range);
+void mte_disable_pstate_tco(void);
+void mte_enable_pstate_tco(void);
+unsigned int mte_get_pstate_tco(void);
+
+/* Test framework static inline functions/macros */
+static inline void evaluate_test(int err, const char *msg)
+{
+ if (err == KSFT_PASS)
+ ksft_test_result_pass(msg);
+ else if (err == KSFT_FAIL)
+ ksft_test_result_fail(msg);
+}
+
+static inline int check_allocated_memory(void *ptr, size_t size,
+ int mem_type, bool tags)
+{
+ if (ptr == NULL) {
+ ksft_print_msg("FAIL: memory allocation\n");
+ return KSFT_FAIL;
+ }
+
+ if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+ mte_free_memory((void *)ptr, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+
+ return KSFT_PASS;
+}
+
+static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after)
+{
+ if (ptr == NULL) {
+ ksft_print_msg("FAIL: memory allocation\n");
+ return KSFT_FAIL;
+ }
+
+ if (!MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+ mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before,
+ range_after);
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+#endif /* _MTE_COMMON_UTIL_H */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
new file mode 100644
index 000000000000..9b188254b61a
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+/*
+ * Below definitions may be found in kernel headers, However, they are
+ * redefined here to decouple the MTE selftests compilations from them.
+ */
+#ifndef SEGV_MTEAERR
+#define SEGV_MTEAERR 8
+#endif
+#ifndef SEGV_MTESERR
+#define SEGV_MTESERR 9
+#endif
+#ifndef PROT_MTE
+#define PROT_MTE 0x20
+#endif
+#ifndef HWCAP2_MTE
+#define HWCAP2_MTE (1 << 18)
+#endif
+
+#ifndef PR_MTE_TCF_SHIFT
+#define PR_MTE_TCF_SHIFT 1
+#endif
+#ifndef PR_MTE_TCF_NONE
+#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_SYNC
+#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_ASYNC
+#define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TAG_SHIFT
+#define PR_MTE_TAG_SHIFT 3
+#endif
+
+/* MTE Hardware feature definitions below. */
+#define MT_TAG_SHIFT 56
+#define MT_TAG_MASK 0xFUL
+#define MT_FREE_TAG 0x0UL
+#define MT_GRANULE_SIZE 16
+#define MT_TAG_COUNT 16
+#define MT_INCLUDE_TAG_MASK 0xFFFF
+#define MT_EXCLUDE_TAG_MASK 0x0
+
+#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1)
+#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
+#define MT_SET_TAG(x, y) ((x) | (y << MT_TAG_SHIFT))
+#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
+#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+
+#define MT_PSTATE_TCO_SHIFT 25
+#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT)
+#define MT_PSTATE_TCO_EN 1
+#define MT_PSTATE_TCO_DIS 0
+
+#define MT_EXCLUDE_TAG(x) (1 << (x))
+#define MT_INCLUDE_VALID_TAG(x) (MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x))
+#define MT_INCLUDE_VALID_TAGS(x) (MT_INCLUDE_TAG_MASK ^ (x))
+#define MTE_ALLOW_NON_ZERO_TAG MT_INCLUDE_VALID_TAG(0)
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
new file mode 100644
index 000000000000..a02c04cd0aac
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#include "mte_def.h"
+
+#define ENTRY(name) \
+ .globl name ;\
+ .p2align 2;\
+ .type name, @function ;\
+name:
+
+#define ENDPROC(name) \
+ .size name, .-name ;
+
+ .text
+/*
+ * mte_insert_random_tag: Insert random tag and might be same as the source tag if
+ * the source pointer has it.
+ * Input:
+ * x0 - source pointer with a tag/no-tag
+ * Return:
+ * x0 - pointer with random tag
+ */
+ENTRY(mte_insert_random_tag)
+ irg x0, x0, xzr
+ ret
+ENDPROC(mte_insert_random_tag)
+
+/*
+ * mte_insert_new_tag: Insert new tag and different from the source tag if
+ * source pointer has it.
+ * Input:
+ * x0 - source pointer with a tag/no-tag
+ * Return:
+ * x0 - pointer with random tag
+ */
+ENTRY(mte_insert_new_tag)
+ gmi x1, x0, xzr
+ irg x0, x0, x1
+ ret
+ENDPROC(mte_insert_new_tag)
+
+/*
+ * mte_get_tag_address: Get the tag from given address.
+ * Input:
+ * x0 - source pointer
+ * Return:
+ * x0 - pointer with appended tag
+ */
+ENTRY(mte_get_tag_address)
+ ldg x0, [x0]
+ ret
+ENDPROC(mte_get_tag_address)
+
+/*
+ * mte_set_tag_address_range: Set the tag range from the given address
+ * Input:
+ * x0 - source pointer with tag data
+ * x1 - range
+ * Return:
+ * none
+ */
+ENTRY(mte_set_tag_address_range)
+ cbz x1, 2f
+1:
+ stg x0, [x0, #0x0]
+ add x0, x0, #MT_GRANULE_SIZE
+ sub x1, x1, #MT_GRANULE_SIZE
+ cbnz x1, 1b
+2:
+ ret
+ENDPROC(mte_set_tag_address_range)
+
+/*
+ * mt_clear_tag_address_range: Clear the tag range from the given address
+ * Input:
+ * x0 - source pointer with tag data
+ * x1 - range
+ * Return:
+ * none
+ */
+ENTRY(mte_clear_tag_address_range)
+ cbz x1, 2f
+1:
+ stzg x0, [x0, #0x0]
+ add x0, x0, #MT_GRANULE_SIZE
+ sub x1, x1, #MT_GRANULE_SIZE
+ cbnz x1, 1b
+2:
+ ret
+ENDPROC(mte_clear_tag_address_range)
+
+/*
+ * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * none
+ */
+ENTRY(mte_enable_pstate_tco)
+ msr tco, #MT_PSTATE_TCO_EN
+ ret
+ENDPROC(mte_enable_pstate_tco)
+
+/*
+ * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * none
+ */
+ENTRY(mte_disable_pstate_tco)
+ msr tco, #MT_PSTATE_TCO_DIS
+ ret
+ENDPROC(mte_disable_pstate_tco)
+
+/*
+ * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * x0
+ */
+ENTRY(mte_get_pstate_tco)
+ mrs x0, tco
+ ubfx x0, x0, #MT_PSTATE_TCO_SHIFT, #1
+ ret
+ENDPROC(mte_get_pstate_tco)
diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore
new file mode 100644
index 000000000000..155137d92722
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/.gitignore
@@ -0,0 +1,2 @@
+exec_target
+pac
diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
new file mode 100644
index 000000000000..72e290b0b10c
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -mbranch-protection=pac-ret
+# check if the compiler supports ARMv8.3 and branch protection with PAuth
+pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(pauth_cc_support),1)
+TEST_GEN_PROGS := pac
+TEST_GEN_FILES := pac_corruptor.o helper.o
+TEST_GEN_PROGS_EXTENDED := exec_target
+endif
+
+include ../../lib.mk
+
+ifeq ($(pauth_cc_support),1)
+# pac* and aut* instructions are not available on architectures berfore
+# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly
+$(OUTPUT)/pac_corruptor.o: pac_corruptor.S
+ $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+$(OUTPUT)/helper.o: helper.c
+ $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or
+# greater, gcc emits pac* instructions which are not in HINT NOP space,
+# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can
+# run on earlier targets and print a meaningful error messages
+$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o
+ $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+
+$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o
+ $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+endif
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
new file mode 100644
index 000000000000..4435600ca400
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+
+#include "helper.h"
+
+int main(void)
+{
+ struct signatures signed_vals;
+ unsigned long hwcaps;
+ size_t val;
+
+ fread(&val, sizeof(size_t), 1, stdin);
+
+ /* don't try to execute illegal (unimplemented) instructions) caller
+ * should have checked this and keep worker simple
+ */
+ hwcaps = getauxval(AT_HWCAP);
+
+ if (hwcaps & HWCAP_PACA) {
+ signed_vals.keyia = keyia_sign(val);
+ signed_vals.keyib = keyib_sign(val);
+ signed_vals.keyda = keyda_sign(val);
+ signed_vals.keydb = keydb_sign(val);
+ }
+ signed_vals.keyg = (hwcaps & HWCAP_PACG) ? keyg_sign(val) : 0;
+
+ fwrite(&signed_vals, sizeof(struct signatures), 1, stdout);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c
new file mode 100644
index 000000000000..2c201e7d0d50
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include "helper.h"
+
+size_t keyia_sign(size_t ptr)
+{
+ asm volatile("paciza %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyib_sign(size_t ptr)
+{
+ asm volatile("pacizb %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyda_sign(size_t ptr)
+{
+ asm volatile("pacdza %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keydb_sign(size_t ptr)
+{
+ asm volatile("pacdzb %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyg_sign(size_t ptr)
+{
+ /* output is encoded in the upper 32 bits */
+ size_t dest = 0;
+ size_t modifier = 0;
+
+ asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier));
+
+ return dest;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h
new file mode 100644
index 000000000000..652496c7b411
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _HELPER_H_
+#define _HELPER_H_
+
+#include <stdlib.h>
+
+#define NKEYS 5
+
+struct signatures {
+ size_t keyia;
+ size_t keyib;
+ size_t keyda;
+ size_t keydb;
+ size_t keyg;
+};
+
+void pac_corruptor(void);
+
+/* PAuth sign a value with key ia and modifier value 0 */
+size_t keyia_sign(size_t val);
+size_t keyib_sign(size_t val);
+size_t keyda_sign(size_t val);
+size_t keydb_sign(size_t val);
+size_t keyg_sign(size_t val);
+
+#endif
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
new file mode 100644
index 000000000000..592fe538506e
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <sys/auxv.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <sched.h>
+
+#include "../../kselftest_harness.h"
+#include "helper.h"
+
+#define PAC_COLLISION_ATTEMPTS 10
+/*
+ * The kernel sets TBID by default. So bits 55 and above should remain
+ * untouched no matter what.
+ * The VA space size is 48 bits. Bigger is opt-in.
+ */
+#define PAC_MASK (~0xff80ffffffffffff)
+#define ARBITRARY_VALUE (0x1234)
+#define ASSERT_PAUTH_ENABLED() \
+do { \
+ unsigned long hwcaps = getauxval(AT_HWCAP); \
+ /* data key instructions are not in NOP space. This prevents a SIGILL */ \
+ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+} while (0)
+#define ASSERT_GENERIC_PAUTH_ENABLED() \
+do { \
+ unsigned long hwcaps = getauxval(AT_HWCAP); \
+ /* generic key instructions are not in NOP space. This prevents a SIGILL */ \
+ ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+} while (0)
+
+void sign_specific(struct signatures *sign, size_t val)
+{
+ sign->keyia = keyia_sign(val);
+ sign->keyib = keyib_sign(val);
+ sign->keyda = keyda_sign(val);
+ sign->keydb = keydb_sign(val);
+}
+
+void sign_all(struct signatures *sign, size_t val)
+{
+ sign->keyia = keyia_sign(val);
+ sign->keyib = keyib_sign(val);
+ sign->keyda = keyda_sign(val);
+ sign->keydb = keydb_sign(val);
+ sign->keyg = keyg_sign(val);
+}
+
+int n_same(struct signatures *old, struct signatures *new, int nkeys)
+{
+ int res = 0;
+
+ res += old->keyia == new->keyia;
+ res += old->keyib == new->keyib;
+ res += old->keyda == new->keyda;
+ res += old->keydb == new->keydb;
+ if (nkeys == NKEYS)
+ res += old->keyg == new->keyg;
+
+ return res;
+}
+
+int n_same_single_set(struct signatures *sign, int nkeys)
+{
+ size_t vals[nkeys];
+ int same = 0;
+
+ vals[0] = sign->keyia & PAC_MASK;
+ vals[1] = sign->keyib & PAC_MASK;
+ vals[2] = sign->keyda & PAC_MASK;
+ vals[3] = sign->keydb & PAC_MASK;
+
+ if (nkeys >= 4)
+ vals[4] = sign->keyg & PAC_MASK;
+
+ for (int i = 0; i < nkeys - 1; i++) {
+ for (int j = i + 1; j < nkeys; j++) {
+ if (vals[i] == vals[j])
+ same += 1;
+ }
+ }
+ return same;
+}
+
+int exec_sign_all(struct signatures *signed_vals, size_t val)
+{
+ int new_stdin[2];
+ int new_stdout[2];
+ int status;
+ int i;
+ ssize_t ret;
+ pid_t pid;
+ cpu_set_t mask;
+
+ ret = pipe(new_stdin);
+ if (ret == -1) {
+ perror("pipe returned error");
+ return -1;
+ }
+
+ ret = pipe(new_stdout);
+ if (ret == -1) {
+ perror("pipe returned error");
+ return -1;
+ }
+
+ /*
+ * pin this process and all its children to a single CPU, so it can also
+ * guarantee a context switch with its child
+ */
+ sched_getaffinity(0, sizeof(mask), &mask);
+
+ for (i = 0; i < sizeof(cpu_set_t); i++)
+ if (CPU_ISSET(i, &mask))
+ break;
+
+ CPU_ZERO(&mask);
+ CPU_SET(i, &mask);
+ sched_setaffinity(0, sizeof(mask), &mask);
+
+ pid = fork();
+ // child
+ if (pid == 0) {
+ dup2(new_stdin[0], STDIN_FILENO);
+ if (ret == -1) {
+ perror("dup2 returned error");
+ exit(1);
+ }
+
+ dup2(new_stdout[1], STDOUT_FILENO);
+ if (ret == -1) {
+ perror("dup2 returned error");
+ exit(1);
+ }
+
+ close(new_stdin[0]);
+ close(new_stdin[1]);
+ close(new_stdout[0]);
+ close(new_stdout[1]);
+
+ ret = execl("exec_target", "exec_target", (char *)NULL);
+ if (ret == -1) {
+ perror("exec returned error");
+ exit(1);
+ }
+ }
+
+ close(new_stdin[0]);
+ close(new_stdout[1]);
+
+ ret = write(new_stdin[1], &val, sizeof(size_t));
+ if (ret == -1) {
+ perror("write returned error");
+ return -1;
+ }
+
+ /*
+ * wait for the worker to finish, so that read() reads all data
+ * will also context switch with worker so that this function can be used
+ * for context switch tests
+ */
+ waitpid(pid, &status, 0);
+ if (WIFEXITED(status) == 0) {
+ fprintf(stderr, "worker exited unexpectedly\n");
+ return -1;
+ }
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "worker exited with error\n");
+ return -1;
+ }
+
+ ret = read(new_stdout[0], signed_vals, sizeof(struct signatures));
+ if (ret == -1) {
+ perror("read returned error");
+ return -1;
+ }
+
+ return 0;
+}
+
+sigjmp_buf jmpbuf;
+void pac_signal_handler(int signum, siginfo_t *si, void *uc)
+{
+ if (signum == SIGSEGV || signum == SIGILL)
+ siglongjmp(jmpbuf, 1);
+}
+
+/* check that a corrupted PAC results in SIGSEGV or SIGILL */
+TEST(corrupt_pac)
+{
+ struct sigaction sa;
+
+ ASSERT_PAUTH_ENABLED();
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sa.sa_sigaction = pac_signal_handler;
+ sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
+ sigemptyset(&sa.sa_mask);
+
+ sigaction(SIGSEGV, &sa, NULL);
+ sigaction(SIGILL, &sa, NULL);
+
+ pac_corruptor();
+ ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur");
+ }
+}
+
+/*
+ * There are no separate pac* and aut* controls so checking only the pac*
+ * instructions is sufficient
+ */
+TEST(pac_instructions_not_nop)
+{
+ size_t keyia = 0;
+ size_t keyib = 0;
+ size_t keyda = 0;
+ size_t keydb = 0;
+
+ ASSERT_PAUTH_ENABLED();
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ keyia |= keyia_sign(i) & PAC_MASK;
+ keyib |= keyib_sign(i) & PAC_MASK;
+ keyda |= keyda_sign(i) & PAC_MASK;
+ keydb |= keydb_sign(i) & PAC_MASK;
+ }
+
+ ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing");
+ ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing");
+ ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing");
+ ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing");
+}
+
+TEST(pac_instructions_not_nop_generic)
+{
+ size_t keyg = 0;
+
+ ASSERT_GENERIC_PAUTH_ENABLED();
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++)
+ keyg |= keyg_sign(i) & PAC_MASK;
+
+ ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing");
+}
+
+TEST(single_thread_different_keys)
+{
+ int same = 10;
+ int nkeys = NKEYS;
+ int tmp;
+ struct signatures signed_vals;
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+
+ /* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ if (!(hwcaps & HWCAP_PACG)) {
+ TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+ nkeys = NKEYS - 1;
+ }
+
+ /*
+ * In Linux the PAC field can be up to 7 bits wide. Even if keys are
+ * different, there is about 5% chance for PACs to collide with
+ * different addresses. This chance rapidly increases with fewer bits
+ * allocated for the PAC (e.g. wider address). A comparison of the keys
+ * directly will be more reliable.
+ * All signed values need to be different at least once out of n
+ * attempts to be certain that the keys are different
+ */
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ if (nkeys == NKEYS)
+ sign_all(&signed_vals, i);
+ else
+ sign_specific(&signed_vals, i);
+
+ tmp = n_same_single_set(&signed_vals, nkeys);
+ if (tmp < same)
+ same = tmp;
+ }
+
+ ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same);
+}
+
+/*
+ * fork() does not change keys. Only exec() does so call a worker program.
+ * Its only job is to sign a value and report back the resutls
+ */
+TEST(exec_changed_keys)
+{
+ struct signatures new_keys;
+ struct signatures old_keys;
+ int ret;
+ int same = 10;
+ int nkeys = NKEYS;
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+
+ /* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ if (!(hwcaps & HWCAP_PACG)) {
+ TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+ nkeys = NKEYS - 1;
+ }
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ ret = exec_sign_all(&new_keys, i);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ if (nkeys == NKEYS)
+ sign_all(&old_keys, i);
+ else
+ sign_specific(&old_keys, i);
+
+ ret = n_same(&old_keys, &new_keys, nkeys);
+ if (ret < same)
+ same = ret;
+ }
+
+ ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same);
+}
+
+TEST(context_switch_keep_keys)
+{
+ int ret;
+ struct signatures trash;
+ struct signatures before;
+ struct signatures after;
+
+ ASSERT_PAUTH_ENABLED();
+
+ sign_specific(&before, ARBITRARY_VALUE);
+
+ /* will context switch with a process with different keys at least once */
+ ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ sign_specific(&after, ARBITRARY_VALUE);
+
+ ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching");
+ ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching");
+ ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching");
+ ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching");
+}
+
+TEST(context_switch_keep_keys_generic)
+{
+ int ret;
+ struct signatures trash;
+ size_t before;
+ size_t after;
+
+ ASSERT_GENERIC_PAUTH_ENABLED();
+
+ before = keyg_sign(ARBITRARY_VALUE);
+
+ /* will context switch with a process with different keys at least once */
+ ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ after = keyg_sign(ARBITRARY_VALUE);
+
+ ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
new file mode 100644
index 000000000000..aa6588050752
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+.global pac_corruptor
+
+.text
+/*
+ * Corrupting a single bit of the PAC ensures the authentication will fail. It
+ * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no
+ * top byte PAC is tested
+ */
+ pac_corruptor:
+ paciasp
+
+ /* corrupt the top bit of the PAC */
+ eor lr, lr, #1 << 53
+
+ autiasp
+ ret
diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile
index b497cfea4643..ac4ad0005715 100644
--- a/tools/testing/selftests/arm64/signal/Makefile
+++ b/tools/testing/selftests/arm64/signal/Makefile
@@ -21,10 +21,6 @@ include ../../lib.mk
$(TEST_GEN_PROGS): $(PROGS)
cp $(PROGS) $(OUTPUT)/
-clean:
- $(CLEAN)
- rm -f $(PROGS)
-
# Common test-unit targets to build common-layout test-cases executables
# Needs secondary expansion to properly include the testcase c-file in pre-reqs
.SECONDEXPANSION:
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index c30079c86998..3ab1200e172f 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -6,7 +6,6 @@ test_lpm_map
test_tag
FEATURE-DUMP.libbpf
fixdep
-test_align
test_dev_cgroup
/test_progs*
test_tcpbpf_user
@@ -14,9 +13,7 @@ test_verifier_log
feature
test_sock
test_sock_addr
-test_sock_fields
urandom_read
-test_btf
test_sockmap
test_lirc_mode2_user
get_cgroup_id_user
@@ -30,8 +27,6 @@ test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user
test_sysctl
-test_hashmap
-test_btf_dump
test_current_pid_tgid_new_ns
xdping
test_cpp
@@ -39,4 +34,5 @@ test_cpp
/no_alu32
/bpf_gcc
/tools
-
+/runqslower
+/bench
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 7729892e0b04..542768f5195b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -2,6 +2,8 @@
include ../../../../scripts/Kbuild.include
include ../../../scripts/Makefile.arch
+CXX ?= $(CROSS_COMPILE)g++
+
CURDIR := $(abspath .)
TOOLSDIR := $(abspath ../../..)
LIBDIR := $(TOOLSDIR)/lib
@@ -20,19 +22,20 @@ CLANG ?= clang
LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
BPF_GCC ?= $(shell command -v bpf-gcc;)
-CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) \
- -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \
- -I$(APIDIR) \
+SAN_CFLAGS ?=
+CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \
+ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
+ -I$(TOOLSINCDIR) -I$(APIDIR) \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lz -lrt -lpthread
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
- test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
+ test_verifier_log test_dev_cgroup test_tcpbpf_user \
+ test_sock test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage \
- test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
+ test_netcnt test_tcpnotify_user test_sysctl \
test_progs-no_alu32 \
test_current_pid_tgid_new_ns
@@ -65,7 +68,8 @@ TEST_PROGS := test_kmod.sh \
test_tc_edt.sh \
test_xdping.sh \
test_bpftool_build.sh \
- test_bpftool.sh
+ test_bpftool.sh \
+ test_bpftool_metadata.sh \
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
@@ -76,7 +80,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
- test_lirc_mode2_user xdping test_cpp runqslower
+ test_lirc_mode2_user xdping test_cpp runqslower bench
TEST_CUSTOM_PROGS = urandom_read
@@ -99,7 +103,7 @@ endif
OVERRIDE_TARGETS := 1
override define CLEAN
$(call msg,CLEAN)
- $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
endef
include ../lib.mk
@@ -108,6 +112,7 @@ SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
INCLUDE_DIR := $(SCRATCH_DIR)/include
BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids
# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
# to build individual tests.
@@ -119,29 +124,36 @@ $(notdir $(TEST_GEN_PROGS) \
$(TEST_GEN_PROGS_EXTENDED) \
$(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+$(OUTPUT)/%.o: %.c
+ $(call msg,CC,,$@)
+ $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+
$(OUTPUT)/%:%.c
$(call msg,BINARY,,$@)
- $(LINK.c) $^ $(LDLIBS) -o $@
+ $(Q)$(LINK.c) $^ $(LDLIBS) -o $@
$(OUTPUT)/urandom_read: urandom_read.c
$(call msg,BINARY,,$@)
- $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
+ $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1
$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
$(call msg,CC,,$@)
- $(CC) -c $(CFLAGS) -o $@ $<
+ $(Q)$(CC) -c $(CFLAGS) -o $@ $<
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
../../../../vmlinux \
/sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-$(OUTPUT)/runqslower: $(BPFOBJ)
+DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
+
+$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \
- BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)
+ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
+ cp $(SCRATCH_DIR)/runqslower $@
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
@@ -159,13 +171,17 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
-DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(BPFOBJ) | $(BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
OUTPUT=$(BUILD_DIR)/bpftool/ \
prefix= DESTDIR=$(SCRATCH_DIR)/ install
+ $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
+ $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
+ -C $(BPFTOOLDIR)/Documentation \
+ OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \
+ prefix= DESTDIR=$(SCRATCH_DIR)/ install
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
@@ -173,13 +189,28 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
-$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
+$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR):
$(call msg,MKDIR,,$@)
- mkdir -p $@
+ $(Q)mkdir -p $@
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+ifeq ($(VMLINUX_H),)
$(call msg,GEN,,$@)
- $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(call msg,CP,,$@)
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+$(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \
+ $(TOOLSDIR)/bpf/resolve_btfids/main.c \
+ $(TOOLSDIR)/lib/rbtree.c \
+ $(TOOLSDIR)/lib/zalloc.c \
+ $(TOOLSDIR)/lib/string.c \
+ $(TOOLSDIR)/lib/ctype.c \
+ $(TOOLSDIR)/lib/str_error_r.c
+ $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \
+ OUTPUT=$(BUILD_DIR)/resolve_btfids/ BPFOBJ=$(BPFOBJ)
# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
@@ -217,31 +248,31 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
# $4 - LDFLAGS
define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -target bpf -emit-llvm \
+ $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -target bpf -emit-llvm \
+ $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
define CLANG_NATIVE_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -emit-llvm \
+ $(Q)($(CLANG) $3 -O2 -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
- $(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+ $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
endef
-SKEL_BLACKLIST := btf__% test_pinning_invalid.c
+SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
@@ -263,6 +294,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(filter-out $(SKEL_BLACKLIST), \
$$(TRUNNER_BPF_SRCS)))
+TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
$$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2))
@@ -279,7 +311,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),)
$(TRUNNER_OUTPUT)-dir := y
$(TRUNNER_OUTPUT):
$$(call msg,MKDIR,,$$@)
- mkdir -p $$@
+ $(Q)mkdir -p $$@
endif
# ensure we set up BPF objects generation rule just once for a given
@@ -290,7 +322,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
- $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS), \
$(TRUNNER_BPF_LDFLAGS))
@@ -299,7 +331,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
$(TRUNNER_OUTPUT)/%.o \
| $(BPFTOOL) $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
- $$(BPFTOOL) gen skeleton $$< > $$@
+ $(Q)$$(BPFTOOL) gen skeleton $$< > $$@
endif
# ensure we set up tests.h header generation rule just once
@@ -323,7 +355,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_BPF_SKELS) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
- cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+ $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
@@ -331,20 +363,22 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
- $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
# only copy extra resources if in flavored build
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
ifneq ($2,)
$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
- cp -a $$^ $(TRUNNER_OUTPUT)/
+ $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
+ $(RESOLVE_BTFIDS) \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
- $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+ $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
endef
@@ -352,6 +386,7 @@ endef
TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
+ network_helpers.c testing_helpers.c \
flow_dissector_load.h
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(wildcard progs/btf_dump_test_case_*.c)
@@ -396,12 +431,30 @@ verifier/tests.h: verifier/*.c
) > verifier/tests.h)
$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
- $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+
+# Benchmark runner
+$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
+ $(call msg,CC,,$@)
+ $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
+$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
+$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
+ $(OUTPUT)/perfbuf_bench.skel.h
+$(OUTPUT)/bench.o: bench.h testing_helpers.h
+$(OUTPUT)/bench: LDLIBS += -lm
+$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
+ $(OUTPUT)/bench_count.o \
+ $(OUTPUT)/bench_rename.o \
+ $(OUTPUT)/bench_trigger.o \
+ $(OUTPUT)/bench_ringbufs.o
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
new file mode 100644
index 000000000000..ac9eda830187
--- /dev/null
+++ b/tools/testing/selftests/bpf/README.rst
@@ -0,0 +1,104 @@
+==================
+BPF Selftest Notes
+==================
+General instructions on running selftests can be found in
+`Documentation/bpf/bpf_devel_QA.rst`_.
+
+Additional information about selftest failures are
+documented here.
+
+profiler[23] test failures with clang/llvm <12.0.0
+==================================================
+
+With clang/llvm <12.0.0, the profiler[23] test may fail.
+The symptom looks like
+
+.. code-block:: c
+
+ // r9 is a pointer to map_value
+ // r7 is a scalar
+ 17: bf 96 00 00 00 00 00 00 r6 = r9
+ 18: 0f 76 00 00 00 00 00 00 r6 += r7
+ math between map_value pointer and register with unbounded min value is not allowed
+
+ // the instructions below will not be seen in the verifier log
+ 19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1
+ 20: bf 96 00 00 00 00 00 00 r6 = r9
+ // r6 is used here
+
+The verifier will reject such code with above error.
+At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and
+the insn 20 undoes map_value addition. It is currently impossible for the
+verifier to understand such speculative pointer arithmetic.
+Hence
+ https://reviews.llvm.org/D85570
+addresses it on the compiler side. It was committed on llvm 12.
+
+The corresponding C code
+.. code-block:: c
+
+ for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
+ filepart_length = bpf_probe_read_str(payload, ...);
+ if (filepart_length <= MAX_PATH) {
+ barrier_var(filepart_length); // workaround
+ payload += filepart_length;
+ }
+ }
+
+bpf_iter test failures with clang/llvm 10.0.0
+=============================================
+
+With clang/llvm 10.0.0, the following two bpf_iter tests failed:
+ * ``bpf_iter/ipv6_route``
+ * ``bpf_iter/netlink``
+
+The symptom for ``bpf_iter/ipv6_route`` looks like
+
+.. code-block:: c
+
+ 2: (79) r8 = *(u64 *)(r1 +8)
+ ...
+ 14: (bf) r2 = r8
+ 15: (0f) r2 += r1
+ ; BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
+ 16: (7b) *(u64 *)(r8 +64) = r2
+ only read is supported
+
+The symptom for ``bpf_iter/netlink`` looks like
+
+.. code-block:: c
+
+ ; struct netlink_sock *nlk = ctx->sk;
+ 2: (79) r7 = *(u64 *)(r1 +8)
+ ...
+ 15: (bf) r2 = r7
+ 16: (0f) r2 += r1
+ ; BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
+ 17: (7b) *(u64 *)(r7 +0) = r2
+ only read is supported
+
+This is due to a llvm BPF backend bug. The fix
+ https://reviews.llvm.org/D78466
+has been pushed to llvm 10.x release branch and will be
+available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
+
+BPF CO-RE-based tests and Clang version
+=======================================
+
+A set of selftests use BPF target-specific built-ins, which might require
+bleeding-edge Clang versions (Clang 12 nightly at this time).
+
+Few sub-tests of core_reloc test suit (part of test_progs test runner) require
+the following built-ins, listed with corresponding Clang diffs introducing
+them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too
+old to support them, they shouldn't cause build failures or runtime test
+failures:
+
+ - __builtin_btf_type_id() ([0], [1], [2]);
+ - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]).
+
+ [0] https://reviews.llvm.org/D74572
+ [1] https://reviews.llvm.org/D74668
+ [2] https://reviews.llvm.org/D85174
+ [3] https://reviews.llvm.org/D83878
+ [4] https://reviews.llvm.org/D83242
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
new file mode 100644
index 000000000000..332ed2f7b402
--- /dev/null
+++ b/tools/testing/selftests/bpf/bench.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <linux/compiler.h>
+#include <sys/time.h>
+#include <sched.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/sysinfo.h>
+#include <sys/resource.h>
+#include <signal.h>
+#include "bench.h"
+#include "testing_helpers.h"
+
+struct env env = {
+ .warmup_sec = 1,
+ .duration_sec = 5,
+ .affinity = false,
+ .consumer_cnt = 1,
+ .producer_cnt = 1,
+};
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level == LIBBPF_DEBUG && !env.verbose)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+static int bump_memlock_rlimit(void)
+{
+ struct rlimit rlim_new = {
+ .rlim_cur = RLIM_INFINITY,
+ .rlim_max = RLIM_INFINITY,
+ };
+
+ return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+}
+
+void setup_libbpf()
+{
+ int err;
+
+ libbpf_set_print(libbpf_print_fn);
+
+ err = bump_memlock_rlimit();
+ if (err)
+ fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
+}
+
+void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double hits_per_sec, drops_per_sec;
+ double hits_per_prod;
+
+ hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+ drops_per_sec = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+
+ printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
+ hits_per_sec, hits_per_prod, drops_per_sec);
+}
+
+void hits_drops_report_final(struct bench_res res[], int res_cnt)
+{
+ int i;
+ double hits_mean = 0.0, drops_mean = 0.0;
+ double hits_stddev = 0.0, drops_stddev = 0.0;
+
+ for (i = 0; i < res_cnt; i++) {
+ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+ (hits_mean - res[i].hits / 1000000.0) /
+ (res_cnt - 1.0);
+ drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
+ (drops_mean - res[i].drops / 1000000.0) /
+ (res_cnt - 1.0);
+ }
+ hits_stddev = sqrt(hits_stddev);
+ drops_stddev = sqrt(drops_stddev);
+ }
+ printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
+ hits_mean, hits_stddev, hits_mean / env.producer_cnt);
+ printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
+ drops_mean, drops_stddev);
+}
+
+const char *argp_program_version = "benchmark";
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] =
+"benchmark Generic benchmarking framework.\n"
+"\n"
+"This tool runs benchmarks.\n"
+"\n"
+"USAGE: benchmark <bench-name>\n"
+"\n"
+"EXAMPLES:\n"
+" # run 'count-local' benchmark with 1 producer and 1 consumer\n"
+" benchmark count-local\n"
+" # run 'count-local' with 16 producer and 8 consumer thread, pinned to CPUs\n"
+" benchmark -p16 -c8 -a count-local\n";
+
+enum {
+ ARG_PROD_AFFINITY_SET = 1000,
+ ARG_CONS_AFFINITY_SET = 1001,
+};
+
+static const struct argp_option opts[] = {
+ { "list", 'l', NULL, 0, "List available benchmarks"},
+ { "duration", 'd', "SEC", 0, "Duration of benchmark, seconds"},
+ { "warmup", 'w', "SEC", 0, "Warm-up period, seconds"},
+ { "producers", 'p', "NUM", 0, "Number of producer threads"},
+ { "consumers", 'c', "NUM", 0, "Number of consumer threads"},
+ { "verbose", 'v', NULL, 0, "Verbose debug output"},
+ { "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
+ { "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
+ "Set of CPUs for producer threads; implies --affinity"},
+ { "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
+ "Set of CPUs for consumer threads; implies --affinity"},
+ {},
+};
+
+extern struct argp bench_ringbufs_argp;
+
+static const struct argp_child bench_parsers[] = {
+ { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ static int pos_args;
+
+ switch (key) {
+ case 'v':
+ env.verbose = true;
+ break;
+ case 'l':
+ env.list = true;
+ break;
+ case 'd':
+ env.duration_sec = strtol(arg, NULL, 10);
+ if (env.duration_sec <= 0) {
+ fprintf(stderr, "Invalid duration: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'w':
+ env.warmup_sec = strtol(arg, NULL, 10);
+ if (env.warmup_sec <= 0) {
+ fprintf(stderr, "Invalid warm-up duration: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'p':
+ env.producer_cnt = strtol(arg, NULL, 10);
+ if (env.producer_cnt <= 0) {
+ fprintf(stderr, "Invalid producer count: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'c':
+ env.consumer_cnt = strtol(arg, NULL, 10);
+ if (env.consumer_cnt <= 0) {
+ fprintf(stderr, "Invalid consumer count: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'a':
+ env.affinity = true;
+ break;
+ case ARG_PROD_AFFINITY_SET:
+ env.affinity = true;
+ if (parse_num_list(arg, &env.prod_cpus.cpus,
+ &env.prod_cpus.cpus_len)) {
+ fprintf(stderr, "Invalid format of CPU set for producers.");
+ argp_usage(state);
+ }
+ break;
+ case ARG_CONS_AFFINITY_SET:
+ env.affinity = true;
+ if (parse_num_list(arg, &env.cons_cpus.cpus,
+ &env.cons_cpus.cpus_len)) {
+ fprintf(stderr, "Invalid format of CPU set for consumers.");
+ argp_usage(state);
+ }
+ break;
+ case ARGP_KEY_ARG:
+ if (pos_args++) {
+ fprintf(stderr,
+ "Unrecognized positional argument: %s\n", arg);
+ argp_usage(state);
+ }
+ env.bench_name = strdup(arg);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+static void parse_cmdline_args(int argc, char **argv)
+{
+ static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+ .children = bench_parsers,
+ };
+ if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+ exit(1);
+ if (!env.list && !env.bench_name) {
+ argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
+ exit(1);
+ }
+}
+
+static void collect_measurements(long delta_ns);
+
+static __u64 last_time_ns;
+static void sigalarm_handler(int signo)
+{
+ long new_time_ns = get_time_ns();
+ long delta_ns = new_time_ns - last_time_ns;
+
+ collect_measurements(delta_ns);
+
+ last_time_ns = new_time_ns;
+}
+
+/* set up periodic 1-second timer */
+static void setup_timer()
+{
+ static struct sigaction sigalarm_action = {
+ .sa_handler = sigalarm_handler,
+ };
+ struct itimerval timer_settings = {};
+ int err;
+
+ last_time_ns = get_time_ns();
+ err = sigaction(SIGALRM, &sigalarm_action, NULL);
+ if (err < 0) {
+ fprintf(stderr, "failed to install SIGALRM handler: %d\n", -errno);
+ exit(1);
+ }
+ timer_settings.it_interval.tv_sec = 1;
+ timer_settings.it_value.tv_sec = 1;
+ err = setitimer(ITIMER_REAL, &timer_settings, NULL);
+ if (err < 0) {
+ fprintf(stderr, "failed to arm interval timer: %d\n", -errno);
+ exit(1);
+ }
+}
+
+static void set_thread_affinity(pthread_t thread, int cpu)
+{
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
+ fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
+ cpu, errno);
+ exit(1);
+ }
+}
+
+static int next_cpu(struct cpu_set *cpu_set)
+{
+ if (cpu_set->cpus) {
+ int i;
+
+ /* find next available CPU */
+ for (i = cpu_set->next_cpu; i < cpu_set->cpus_len; i++) {
+ if (cpu_set->cpus[i]) {
+ cpu_set->next_cpu = i + 1;
+ return i;
+ }
+ }
+ fprintf(stderr, "Not enough CPUs specified, need CPU #%d or higher.\n", i);
+ exit(1);
+ }
+
+ return cpu_set->next_cpu++;
+}
+
+static struct bench_state {
+ int res_cnt;
+ struct bench_res *results;
+ pthread_t *consumers;
+ pthread_t *producers;
+} state;
+
+const struct bench *bench = NULL;
+
+extern const struct bench bench_count_global;
+extern const struct bench bench_count_local;
+extern const struct bench bench_rename_base;
+extern const struct bench bench_rename_kprobe;
+extern const struct bench bench_rename_kretprobe;
+extern const struct bench bench_rename_rawtp;
+extern const struct bench bench_rename_fentry;
+extern const struct bench bench_rename_fexit;
+extern const struct bench bench_trig_base;
+extern const struct bench bench_trig_tp;
+extern const struct bench bench_trig_rawtp;
+extern const struct bench bench_trig_kprobe;
+extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fentry_sleep;
+extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_rb_libbpf;
+extern const struct bench bench_rb_custom;
+extern const struct bench bench_pb_libbpf;
+extern const struct bench bench_pb_custom;
+
+static const struct bench *benchs[] = {
+ &bench_count_global,
+ &bench_count_local,
+ &bench_rename_base,
+ &bench_rename_kprobe,
+ &bench_rename_kretprobe,
+ &bench_rename_rawtp,
+ &bench_rename_fentry,
+ &bench_rename_fexit,
+ &bench_trig_base,
+ &bench_trig_tp,
+ &bench_trig_rawtp,
+ &bench_trig_kprobe,
+ &bench_trig_fentry,
+ &bench_trig_fentry_sleep,
+ &bench_trig_fmodret,
+ &bench_rb_libbpf,
+ &bench_rb_custom,
+ &bench_pb_libbpf,
+ &bench_pb_custom,
+};
+
+static void setup_benchmark()
+{
+ int i, err;
+
+ if (!env.bench_name) {
+ fprintf(stderr, "benchmark name is not specified\n");
+ exit(1);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(benchs); i++) {
+ if (strcmp(benchs[i]->name, env.bench_name) == 0) {
+ bench = benchs[i];
+ break;
+ }
+ }
+ if (!bench) {
+ fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
+ exit(1);
+ }
+
+ printf("Setting up benchmark '%s'...\n", bench->name);
+
+ state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
+ state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
+ state.results = calloc(env.duration_sec + env.warmup_sec + 2,
+ sizeof(*state.results));
+ if (!state.producers || !state.consumers || !state.results)
+ exit(1);
+
+ if (bench->validate)
+ bench->validate();
+ if (bench->setup)
+ bench->setup();
+
+ for (i = 0; i < env.consumer_cnt; i++) {
+ err = pthread_create(&state.consumers[i], NULL,
+ bench->consumer_thread, (void *)(long)i);
+ if (err) {
+ fprintf(stderr, "failed to create consumer thread #%d: %d\n",
+ i, -errno);
+ exit(1);
+ }
+ if (env.affinity)
+ set_thread_affinity(state.consumers[i],
+ next_cpu(&env.cons_cpus));
+ }
+
+ /* unless explicit producer CPU list is specified, continue after
+ * last consumer CPU
+ */
+ if (!env.prod_cpus.cpus)
+ env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
+
+ for (i = 0; i < env.producer_cnt; i++) {
+ err = pthread_create(&state.producers[i], NULL,
+ bench->producer_thread, (void *)(long)i);
+ if (err) {
+ fprintf(stderr, "failed to create producer thread #%d: %d\n",
+ i, -errno);
+ exit(1);
+ }
+ if (env.affinity)
+ set_thread_affinity(state.producers[i],
+ next_cpu(&env.prod_cpus));
+ }
+
+ printf("Benchmark '%s' started.\n", bench->name);
+}
+
+static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t bench_done = PTHREAD_COND_INITIALIZER;
+
+static void collect_measurements(long delta_ns) {
+ int iter = state.res_cnt++;
+ struct bench_res *res = &state.results[iter];
+
+ bench->measure(res);
+
+ if (bench->report_progress)
+ bench->report_progress(iter, res, delta_ns);
+
+ if (iter == env.duration_sec + env.warmup_sec) {
+ pthread_mutex_lock(&bench_done_mtx);
+ pthread_cond_signal(&bench_done);
+ pthread_mutex_unlock(&bench_done_mtx);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_cmdline_args(argc, argv);
+
+ if (env.list) {
+ int i;
+
+ printf("Available benchmarks:\n");
+ for (i = 0; i < ARRAY_SIZE(benchs); i++) {
+ printf("- %s\n", benchs[i]->name);
+ }
+ return 0;
+ }
+
+ setup_benchmark();
+
+ setup_timer();
+
+ pthread_mutex_lock(&bench_done_mtx);
+ pthread_cond_wait(&bench_done, &bench_done_mtx);
+ pthread_mutex_unlock(&bench_done_mtx);
+
+ if (bench->report_final)
+ /* skip first sample */
+ bench->report_final(state.results + env.warmup_sec,
+ state.res_cnt - env.warmup_sec);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
new file mode 100644
index 000000000000..c1f48a473b02
--- /dev/null
+++ b/tools/testing/selftests/bpf/bench.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#pragma once
+#include <stdlib.h>
+#include <stdbool.h>
+#include <linux/err.h>
+#include <errno.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <math.h>
+#include <time.h>
+#include <sys/syscall.h>
+
+struct cpu_set {
+ bool *cpus;
+ int cpus_len;
+ int next_cpu;
+};
+
+struct env {
+ char *bench_name;
+ int duration_sec;
+ int warmup_sec;
+ bool verbose;
+ bool list;
+ bool affinity;
+ int consumer_cnt;
+ int producer_cnt;
+ struct cpu_set prod_cpus;
+ struct cpu_set cons_cpus;
+};
+
+struct bench_res {
+ long hits;
+ long drops;
+};
+
+struct bench {
+ const char *name;
+ void (*validate)();
+ void (*setup)();
+ void *(*producer_thread)(void *ctx);
+ void *(*consumer_thread)(void *ctx);
+ void (*measure)(struct bench_res* res);
+ void (*report_progress)(int iter, struct bench_res* res, long delta_ns);
+ void (*report_final)(struct bench_res res[], int res_cnt);
+};
+
+struct counter {
+ long value;
+} __attribute__((aligned(128)));
+
+extern struct env env;
+extern const struct bench *bench;
+
+void setup_libbpf();
+void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
+void hits_drops_report_final(struct bench_res res[], int res_cnt);
+
+static inline __u64 get_time_ns() {
+ struct timespec t;
+
+ clock_gettime(CLOCK_MONOTONIC, &t);
+
+ return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
+}
+
+static inline void atomic_inc(long *value)
+{
+ (void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED);
+}
+
+static inline void atomic_add(long *value, long n)
+{
+ (void)__atomic_add_fetch(value, n, __ATOMIC_RELAXED);
+}
+
+static inline long atomic_swap(long *value, long n)
+{
+ return __atomic_exchange_n(value, n, __ATOMIC_RELAXED);
+}
diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c
new file mode 100644
index 000000000000..befba7a82643
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_count.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+
+/* COUNT-GLOBAL benchmark */
+
+static struct count_global_ctx {
+ struct counter hits;
+} count_global_ctx;
+
+static void *count_global_producer(void *input)
+{
+ struct count_global_ctx *ctx = &count_global_ctx;
+
+ while (true) {
+ atomic_inc(&ctx->hits.value);
+ }
+ return NULL;
+}
+
+static void *count_global_consumer(void *input)
+{
+ return NULL;
+}
+
+static void count_global_measure(struct bench_res *res)
+{
+ struct count_global_ctx *ctx = &count_global_ctx;
+
+ res->hits = atomic_swap(&ctx->hits.value, 0);
+}
+
+/* COUNT-local benchmark */
+
+static struct count_local_ctx {
+ struct counter *hits;
+} count_local_ctx;
+
+static void count_local_setup()
+{
+ struct count_local_ctx *ctx = &count_local_ctx;
+
+ ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits));
+ if (!ctx->hits)
+ exit(1);
+}
+
+static void *count_local_producer(void *input)
+{
+ struct count_local_ctx *ctx = &count_local_ctx;
+ int idx = (long)input;
+
+ while (true) {
+ atomic_inc(&ctx->hits[idx].value);
+ }
+ return NULL;
+}
+
+static void *count_local_consumer(void *input)
+{
+ return NULL;
+}
+
+static void count_local_measure(struct bench_res *res)
+{
+ struct count_local_ctx *ctx = &count_local_ctx;
+ int i;
+
+ for (i = 0; i < env.producer_cnt; i++) {
+ res->hits += atomic_swap(&ctx->hits[i].value, 0);
+ }
+}
+
+const struct bench bench_count_global = {
+ .name = "count-global",
+ .producer_thread = count_global_producer,
+ .consumer_thread = count_global_consumer,
+ .measure = count_global_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_count_local = {
+ .name = "count-local",
+ .setup = count_local_setup,
+ .producer_thread = count_local_producer,
+ .consumer_thread = count_local_consumer,
+ .measure = count_local_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
new file mode 100644
index 000000000000..a967674098ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <fcntl.h>
+#include "bench.h"
+#include "test_overhead.skel.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct test_overhead *skel;
+ struct counter hits;
+ int fd;
+} ctx;
+
+static void validate()
+{
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+ exit(1);
+ }
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ char buf[] = "test_overhead";
+ int err;
+
+ while (true) {
+ err = write(ctx.fd, buf, sizeof(buf));
+ if (err < 0) {
+ fprintf(stderr, "write failed\n");
+ exit(1);
+ }
+ atomic_inc(&ctx.hits.value);
+ }
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.hits.value, 0);
+}
+
+static void setup_ctx()
+{
+ setup_libbpf();
+
+ ctx.skel = test_overhead__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+ if (ctx.fd < 0) {
+ fprintf(stderr, "failed to open /proc/self/comm: %d\n", -errno);
+ exit(1);
+ }
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(prog);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void setup_base()
+{
+ setup_ctx();
+}
+
+static void setup_kprobe()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog1);
+}
+
+static void setup_kretprobe()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog2);
+}
+
+static void setup_rawtp()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog3);
+}
+
+static void setup_fentry()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog4);
+}
+
+static void setup_fexit()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog5);
+}
+
+static void *consumer(void *input)
+{
+ return NULL;
+}
+
+const struct bench bench_rename_base = {
+ .name = "rename-base",
+ .validate = validate,
+ .setup = setup_base,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kprobe = {
+ .name = "rename-kprobe",
+ .validate = validate,
+ .setup = setup_kprobe,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kretprobe = {
+ .name = "rename-kretprobe",
+ .validate = validate,
+ .setup = setup_kretprobe,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_rawtp = {
+ .name = "rename-rawtp",
+ .validate = validate,
+ .setup = setup_rawtp,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fentry = {
+ .name = "rename-fentry",
+ .validate = validate,
+ .setup = setup_fentry,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fexit = {
+ .name = "rename-fexit",
+ .validate = validate,
+ .setup = setup_fexit,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
new file mode 100644
index 000000000000..da87c7f31891
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <asm/barrier.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <argp.h>
+#include <stdlib.h>
+#include "bench.h"
+#include "ringbuf_bench.skel.h"
+#include "perfbuf_bench.skel.h"
+
+static struct {
+ bool back2back;
+ int batch_cnt;
+ bool sampled;
+ int sample_rate;
+ int ringbuf_sz; /* per-ringbuf, in bytes */
+ bool ringbuf_use_output; /* use slower output API */
+ int perfbuf_sz; /* per-CPU size, in pages */
+} args = {
+ .back2back = false,
+ .batch_cnt = 500,
+ .sampled = false,
+ .sample_rate = 500,
+ .ringbuf_sz = 512 * 1024,
+ .ringbuf_use_output = false,
+ .perfbuf_sz = 128,
+};
+
+enum {
+ ARG_RB_BACK2BACK = 2000,
+ ARG_RB_USE_OUTPUT = 2001,
+ ARG_RB_BATCH_CNT = 2002,
+ ARG_RB_SAMPLED = 2003,
+ ARG_RB_SAMPLE_RATE = 2004,
+};
+
+static const struct argp_option opts[] = {
+ { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
+ { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
+ { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
+ { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
+ { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_RB_BACK2BACK:
+ args.back2back = true;
+ break;
+ case ARG_RB_USE_OUTPUT:
+ args.ringbuf_use_output = true;
+ break;
+ case ARG_RB_BATCH_CNT:
+ args.batch_cnt = strtol(arg, NULL, 10);
+ if (args.batch_cnt < 0) {
+ fprintf(stderr, "Invalid batch count.");
+ argp_usage(state);
+ }
+ break;
+ case ARG_RB_SAMPLED:
+ args.sampled = true;
+ break;
+ case ARG_RB_SAMPLE_RATE:
+ args.sample_rate = strtol(arg, NULL, 10);
+ if (args.sample_rate < 0) {
+ fprintf(stderr, "Invalid perfbuf sample rate.");
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_ringbufs_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* RINGBUF-LIBBPF benchmark */
+
+static struct counter buf_hits;
+
+static inline void bufs_trigger_batch()
+{
+ (void)syscall(__NR_getpgid);
+}
+
+static void bufs_validate()
+{
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+
+ if (args.back2back && env.producer_cnt > 1) {
+ fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
+ exit(1);
+ }
+}
+
+static void *bufs_sample_producer(void *input)
+{
+ if (args.back2back) {
+ /* initial batch to get everything started */
+ bufs_trigger_batch();
+ return NULL;
+ }
+
+ while (true)
+ bufs_trigger_batch();
+ return NULL;
+}
+
+static struct ringbuf_libbpf_ctx {
+ struct ringbuf_bench *skel;
+ struct ring_buffer *ringbuf;
+} ringbuf_libbpf_ctx;
+
+static void ringbuf_libbpf_measure(struct bench_res *res)
+{
+ struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+ res->hits = atomic_swap(&buf_hits.value, 0);
+ res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct ringbuf_bench *ringbuf_setup_skeleton()
+{
+ struct ringbuf_bench *skel;
+
+ setup_libbpf();
+
+ skel = ringbuf_bench__open();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ skel->rodata->batch_cnt = args.batch_cnt;
+ skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+
+ if (args.sampled)
+ /* record data + header take 16 bytes */
+ skel->rodata->wakeup_data_size = args.sample_rate * 16;
+
+ bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+
+ if (ringbuf_bench__load(skel)) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ return skel;
+}
+
+static int buf_process_sample(void *ctx, void *data, size_t len)
+{
+ atomic_inc(&buf_hits.value);
+ return 0;
+}
+
+static void ringbuf_libbpf_setup()
+{
+ struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+ struct bpf_link *link;
+
+ ctx->skel = ringbuf_setup_skeleton();
+ ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
+ buf_process_sample, NULL, NULL);
+ if (!ctx->ringbuf) {
+ fprintf(stderr, "failed to create ringbuf\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void *ringbuf_libbpf_consumer(void *input)
+{
+ struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+ while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
+ if (args.back2back)
+ bufs_trigger_batch();
+ }
+ fprintf(stderr, "ringbuf polling failed!\n");
+ return NULL;
+}
+
+/* RINGBUF-CUSTOM benchmark */
+struct ringbuf_custom {
+ __u64 *consumer_pos;
+ __u64 *producer_pos;
+ __u64 mask;
+ void *data;
+ int map_fd;
+};
+
+static struct ringbuf_custom_ctx {
+ struct ringbuf_bench *skel;
+ struct ringbuf_custom ringbuf;
+ int epoll_fd;
+ struct epoll_event event;
+} ringbuf_custom_ctx;
+
+static void ringbuf_custom_measure(struct bench_res *res)
+{
+ struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+
+ res->hits = atomic_swap(&buf_hits.value, 0);
+ res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static void ringbuf_custom_setup()
+{
+ struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+ const size_t page_size = getpagesize();
+ struct bpf_link *link;
+ struct ringbuf_custom *r;
+ void *tmp;
+ int err;
+
+ ctx->skel = ringbuf_setup_skeleton();
+
+ ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (ctx->epoll_fd < 0) {
+ fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
+ exit(1);
+ }
+
+ r = &ctx->ringbuf;
+ r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+ r->mask = args.ringbuf_sz - 1;
+
+ /* Map writable consumer page */
+ tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ r->map_fd, 0);
+ if (tmp == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
+ exit(1);
+ }
+ r->consumer_pos = tmp;
+
+ /* Map read-only producer page and data pages. */
+ tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
+ r->map_fd, page_size);
+ if (tmp == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
+ exit(1);
+ }
+ r->producer_pos = tmp;
+ r->data = tmp + page_size;
+
+ ctx->event.events = EPOLLIN;
+ err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
+ if (err < 0) {
+ fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program\n");
+ exit(1);
+ }
+}
+
+#define RINGBUF_BUSY_BIT (1 << 31)
+#define RINGBUF_DISCARD_BIT (1 << 30)
+#define RINGBUF_META_LEN 8
+
+static inline int roundup_len(__u32 len)
+{
+ /* clear out top 2 bits */
+ len <<= 2;
+ len >>= 2;
+ /* add length prefix */
+ len += RINGBUF_META_LEN;
+ /* round up to 8 byte alignment */
+ return (len + 7) / 8 * 8;
+}
+
+static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
+{
+ unsigned long cons_pos, prod_pos;
+ int *len_ptr, len;
+ bool got_new_data;
+
+ cons_pos = smp_load_acquire(r->consumer_pos);
+ while (true) {
+ got_new_data = false;
+ prod_pos = smp_load_acquire(r->producer_pos);
+ while (cons_pos < prod_pos) {
+ len_ptr = r->data + (cons_pos & r->mask);
+ len = smp_load_acquire(len_ptr);
+
+ /* sample not committed yet, bail out for now */
+ if (len & RINGBUF_BUSY_BIT)
+ return;
+
+ got_new_data = true;
+ cons_pos += roundup_len(len);
+
+ atomic_inc(&buf_hits.value);
+ }
+ if (got_new_data)
+ smp_store_release(r->consumer_pos, cons_pos);
+ else
+ break;
+ };
+}
+
+static void *ringbuf_custom_consumer(void *input)
+{
+ struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+ int cnt;
+
+ do {
+ if (args.back2back)
+ bufs_trigger_batch();
+ cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
+ if (cnt > 0)
+ ringbuf_custom_process_ring(&ctx->ringbuf);
+ } while (cnt >= 0);
+ fprintf(stderr, "ringbuf polling failed!\n");
+ return 0;
+}
+
+/* PERFBUF-LIBBPF benchmark */
+static struct perfbuf_libbpf_ctx {
+ struct perfbuf_bench *skel;
+ struct perf_buffer *perfbuf;
+} perfbuf_libbpf_ctx;
+
+static void perfbuf_measure(struct bench_res *res)
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+ res->hits = atomic_swap(&buf_hits.value, 0);
+ res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct perfbuf_bench *perfbuf_setup_skeleton()
+{
+ struct perfbuf_bench *skel;
+
+ setup_libbpf();
+
+ skel = perfbuf_bench__open();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ skel->rodata->batch_cnt = args.batch_cnt;
+
+ if (perfbuf_bench__load(skel)) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ return skel;
+}
+
+static enum bpf_perf_event_ret
+perfbuf_process_sample_raw(void *input_ctx, int cpu,
+ struct perf_event_header *e)
+{
+ switch (e->type) {
+ case PERF_RECORD_SAMPLE:
+ atomic_inc(&buf_hits.value);
+ break;
+ case PERF_RECORD_LOST:
+ break;
+ default:
+ return LIBBPF_PERF_EVENT_ERROR;
+ }
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void perfbuf_libbpf_setup()
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+ struct perf_event_attr attr;
+ struct perf_buffer_raw_opts pb_opts = {
+ .event_cb = perfbuf_process_sample_raw,
+ .ctx = (void *)(long)0,
+ .attr = &attr,
+ };
+ struct bpf_link *link;
+
+ ctx->skel = perfbuf_setup_skeleton();
+
+ memset(&attr, 0, sizeof(attr));
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ /* notify only every Nth sample */
+ if (args.sampled) {
+ attr.sample_period = args.sample_rate;
+ attr.wakeup_events = args.sample_rate;
+ } else {
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ }
+
+ if (args.sample_rate > args.batch_cnt) {
+ fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
+ args.sample_rate, args.batch_cnt);
+ exit(1);
+ }
+
+ ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
+ args.perfbuf_sz, &pb_opts);
+ if (!ctx->perfbuf) {
+ fprintf(stderr, "failed to create perfbuf\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program\n");
+ exit(1);
+ }
+}
+
+static void *perfbuf_libbpf_consumer(void *input)
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+ while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
+ if (args.back2back)
+ bufs_trigger_batch();
+ }
+ fprintf(stderr, "perfbuf polling failed!\n");
+ return NULL;
+}
+
+/* PERFBUF-CUSTOM benchmark */
+
+/* copies of internal libbpf definitions */
+struct perf_cpu_buf {
+ struct perf_buffer *pb;
+ void *base; /* mmap()'ed memory */
+ void *buf; /* for reconstructing segmented data */
+ size_t buf_size;
+ int fd;
+ int cpu;
+ int map_key;
+};
+
+struct perf_buffer {
+ perf_buffer_event_fn event_cb;
+ perf_buffer_sample_fn sample_cb;
+ perf_buffer_lost_fn lost_cb;
+ void *ctx; /* passed into callbacks */
+
+ size_t page_size;
+ size_t mmap_size;
+ struct perf_cpu_buf **cpu_bufs;
+ struct epoll_event *events;
+ int cpu_cnt; /* number of allocated CPU buffers */
+ int epoll_fd; /* perf event FD */
+ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void *perfbuf_custom_consumer(void *input)
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+ struct perf_buffer *pb = ctx->perfbuf;
+ struct perf_cpu_buf *cpu_buf;
+ struct perf_event_mmap_page *header;
+ size_t mmap_mask = pb->mmap_size - 1;
+ struct perf_event_header *ehdr;
+ __u64 data_head, data_tail;
+ size_t ehdr_size;
+ void *base;
+ int i, cnt;
+
+ while (true) {
+ if (args.back2back)
+ bufs_trigger_batch();
+ cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
+ if (cnt <= 0) {
+ fprintf(stderr, "perf epoll failed: %d\n", -errno);
+ exit(1);
+ }
+
+ for (i = 0; i < cnt; ++i) {
+ cpu_buf = pb->events[i].data.ptr;
+ header = cpu_buf->base;
+ base = ((void *)header) + pb->page_size;
+
+ data_head = ring_buffer_read_head(header);
+ data_tail = header->data_tail;
+ while (data_head != data_tail) {
+ ehdr = base + (data_tail & mmap_mask);
+ ehdr_size = ehdr->size;
+
+ if (ehdr->type == PERF_RECORD_SAMPLE)
+ atomic_inc(&buf_hits.value);
+
+ data_tail += ehdr_size;
+ }
+ ring_buffer_write_tail(header, data_tail);
+ }
+ }
+ return NULL;
+}
+
+const struct bench bench_rb_libbpf = {
+ .name = "rb-libbpf",
+ .validate = bufs_validate,
+ .setup = ringbuf_libbpf_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = ringbuf_libbpf_consumer,
+ .measure = ringbuf_libbpf_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rb_custom = {
+ .name = "rb-custom",
+ .validate = bufs_validate,
+ .setup = ringbuf_custom_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = ringbuf_custom_consumer,
+ .measure = ringbuf_custom_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_libbpf = {
+ .name = "pb-libbpf",
+ .validate = bufs_validate,
+ .setup = perfbuf_libbpf_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = perfbuf_libbpf_consumer,
+ .measure = perfbuf_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_custom = {
+ .name = "pb-custom",
+ .validate = bufs_validate,
+ .setup = perfbuf_libbpf_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = perfbuf_custom_consumer,
+ .measure = perfbuf_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
new file mode 100644
index 000000000000..2a0b6c9885a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+#include "trigger_bench.skel.h"
+
+/* BPF triggering benchmarks */
+static struct trigger_ctx {
+ struct trigger_bench *skel;
+} ctx;
+
+static struct counter base_hits;
+
+static void trigger_validate()
+{
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+}
+
+static void *trigger_base_producer(void *input)
+{
+ while (true) {
+ (void)syscall(__NR_getpgid);
+ atomic_inc(&base_hits.value);
+ }
+ return NULL;
+}
+
+static void trigger_base_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&base_hits.value, 0);
+}
+
+static void *trigger_producer(void *input)
+{
+ while (true)
+ (void)syscall(__NR_getpgid);
+ return NULL;
+}
+
+static void trigger_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void setup_ctx()
+{
+ setup_libbpf();
+
+ ctx.skel = trigger_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(prog);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void trigger_tp_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_tp);
+}
+
+static void trigger_rawtp_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
+}
+
+static void trigger_kprobe_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
+}
+
+static void trigger_fentry_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fentry);
+}
+
+static void trigger_fentry_sleep_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+}
+
+static void trigger_fmodret_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
+}
+
+static void *trigger_consumer(void *input)
+{
+ return NULL;
+}
+
+const struct bench bench_trig_base = {
+ .name = "trig-base",
+ .validate = trigger_validate,
+ .producer_thread = trigger_base_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_base_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_tp = {
+ .name = "trig-tp",
+ .validate = trigger_validate,
+ .setup = trigger_tp_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_rawtp = {
+ .name = "trig-rawtp",
+ .validate = trigger_validate,
+ .setup = trigger_rawtp_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kprobe = {
+ .name = "trig-kprobe",
+ .validate = trigger_validate,
+ .setup = trigger_kprobe_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fentry = {
+ .name = "trig-fentry",
+ .validate = trigger_validate,
+ .setup = trigger_fentry_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fentry_sleep = {
+ .name = "trig-fentry-sleep",
+ .validate = trigger_validate,
+ .setup = trigger_fentry_sleep_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fmodret = {
+ .name = "trig-fmodret",
+ .validate = trigger_validate,
+ .setup = trigger_fmodret_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
new file mode 100755
index 000000000000..16f774b1cdbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base kprobe kretprobe rawtp fentry fexit fmodret
+do
+ summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-10s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
new file mode 100755
index 000000000000..af4aa04caba6
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function hits()
+{
+ echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+ echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function header()
+{
+ local len=${#1}
+
+ printf "\n%s\n" "$1"
+ for i in $(seq 1 $len); do printf '='; done
+ printf '\n'
+}
+
+function summarize()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+header "Single-producer, parallel producer"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH $b)"
+done
+
+header "Single-producer, parallel producer, sampled notification"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH --rb-sampled $b)"
+done
+
+header "Single-producer, back-to-back mode"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH --rb-b2b $b)"
+ summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+done
+
+header "Ringbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+ summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+done
+header "Perfbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+ summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+done
+
+header "Ringbuf back-to-back, reserve+commit vs output"
+summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)"
+summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+
+header "Ringbuf sampled, reserve+commit vs output"
+summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)"
+summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+
+header "Single-producer, consumer/producer competing on the same CPU, low batch count"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+done
+
+header "Ringbuf, multi-producer contention"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+ summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+done
+
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
new file mode 100755
index 000000000000..78e83f243294
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base tp rawtp kprobe fentry fmodret
+do
+ summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-10s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
index 6f8988738bc1..719ab56cdb5d 100644
--- a/tools/testing/selftests/bpf/bpf_legacy.h
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -2,20 +2,6 @@
#ifndef __BPF_LEGACY__
#define __BPF_LEGACY__
-/*
- * legacy bpf_map_def with extra fields supported only by bpf_load(), do not
- * use outside of samples/bpf
- */
-struct bpf_map_def_legacy {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
- unsigned int inner_map_idx;
- unsigned int numa_node;
-};
-
#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
struct ____btf_map_##name { \
type_key key; \
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 5bf2fe9b1efa..2915664c335d 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -16,6 +16,7 @@ BPF_PROG(name, args)
struct sock_common {
unsigned char skc_state;
+ __u16 skc_num;
} __attribute__((preserve_access_index));
enum sk_pacing {
@@ -45,6 +46,10 @@ struct inet_connection_sock {
__u64 icsk_ca_priv[104 / sizeof(__u64)];
} __attribute__((preserve_access_index));
+struct request_sock {
+ struct sock_common __req_common;
+} __attribute__((preserve_access_index));
+
struct tcp_sock {
struct inet_connection_sock inet_conn;
@@ -115,14 +120,6 @@ enum tcp_ca_event {
CA_EVENT_ECN_IS_CE = 5,
};
-enum tcp_ca_state {
- TCP_CA_Open = 0,
- TCP_CA_Disorder = 1,
- TCP_CA_CWR = 2,
- TCP_CA_Recovery = 3,
- TCP_CA_Loss = 4
-};
-
struct ack_sample {
__u32 pkts_acked;
__s32 rtt_us;
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 0fb910df5387..033051717ba5 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -290,3 +290,26 @@ free_mem:
free(fhp);
return ret;
}
+
+int cgroup_setup_and_join(const char *path) {
+ int cg_fd;
+
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "Failed to setup cgroup environment\n");
+ return -EINVAL;
+ }
+
+ cg_fd = create_and_get_cgroup(path);
+ if (cg_fd < 0) {
+ fprintf(stderr, "Failed to create test cgroup\n");
+ cleanup_cgroup_environment();
+ return cg_fd;
+ }
+
+ if (join_cgroup(path)) {
+ fprintf(stderr, "Failed to join cgroup\n");
+ cleanup_cgroup_environment();
+ return -EINVAL;
+ }
+ return cg_fd;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index d64bb8957090..5fe3d88e4f0d 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -9,6 +9,7 @@
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+int cgroup_setup_and_join(const char *path);
int create_and_get_cgroup(const char *path);
int join_cgroup(const char *path);
int setup_cgroup_environment(void);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 60e3ae5d4e48..2118e23ac07a 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -25,6 +25,7 @@ CONFIG_XDP_SOCKETS=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_IPV6_TUNNEL=y
CONFIG_IPV6_GRE=y
+CONFIG_IPV6_SEG6_BPF=y
CONFIG_NET_FOU=m
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_IPV6_FOU=m
@@ -37,3 +38,4 @@ CONFIG_IPV6_SIT=m
CONFIG_BPF_JIT=y
CONFIG_BPF_LSM=y
CONFIG_SECURITY=y
+CONFIG_LIRC=y
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index daeaeb518894..7290401ec172 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -23,7 +23,13 @@ static inline int bpf_flow_load(struct bpf_object **obj,
if (ret)
return ret;
- main_prog = bpf_object__find_program_by_title(*obj, section_name);
+ main_prog = NULL;
+ bpf_object__for_each_program(prog, *obj) {
+ if (strcmp(section_name, bpf_program__section_name(prog)) == 0) {
+ main_prog = prog;
+ break;
+ }
+ }
if (!main_prog)
return -1;
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index e8da7b39158d..b8d6aef99db4 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -58,20 +58,10 @@ int main(int argc, char **argv)
int exit_code = 1;
char buf[256];
- err = setup_cgroup_environment();
- if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
- errno))
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
return 1;
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
- cgroup_fd, errno))
- goto cleanup_cgroup_env;
-
- err = join_cgroup(TEST_CGROUP);
- if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
- goto cleanup_cgroup_env;
-
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
goto cleanup_cgroup_env;
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
new file mode 100644
index 000000000000..12ee40284da0
--- /dev/null
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+
+#include <linux/err.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+#include "bpf_util.h"
+#include "network_helpers.h"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) ({ \
+ int __save = errno; \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, clean_errno(), \
+ ##__VA_ARGS__); \
+ errno = __save; \
+})
+
+struct ipv4_packet pkt_v4 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_TCP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+ .tcp.doff = 5,
+};
+
+struct ipv6_packet pkt_v6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+ .tcp.doff = 5,
+};
+
+static int settimeo(int fd, int timeout_ms)
+{
+ struct timeval timeout = { .tv_sec = 3 };
+
+ if (timeout_ms > 0) {
+ timeout.tv_sec = timeout_ms / 1000;
+ timeout.tv_usec = (timeout_ms % 1000) * 1000;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
+ sizeof(timeout))) {
+ log_err("Failed to set SO_RCVTIMEO");
+ return -1;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
+ sizeof(timeout))) {
+ log_err("Failed to set SO_SNDTIMEO");
+ return -1;
+ }
+
+ return 0;
+}
+
+#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
+
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr = {};
+ socklen_t len;
+ int fd;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &len))
+ return -1;
+
+ fd = socket(family, type, 0);
+ if (fd < 0) {
+ log_err("Failed to create server socket");
+ return -1;
+ }
+
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
+ if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+ log_err("Failed to bind socket");
+ goto error_close;
+ }
+
+ if (type == SOCK_STREAM) {
+ if (listen(fd, 1) < 0) {
+ log_err("Failed to listed on socket");
+ goto error_close;
+ }
+ }
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ struct sockaddr_in *addr_in;
+ int fd, ret;
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ addr_in = (struct sockaddr_in *)&addr;
+ fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
+ ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
+ addrlen);
+ if (ret != data_len) {
+ log_err("sendto(data, %u) != %d\n", data_len, ret);
+ goto error_close;
+ }
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
+static int connect_fd_to_addr(int fd,
+ const struct sockaddr_storage *addr,
+ socklen_t addrlen)
+{
+ if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
+ log_err("Failed to connect to server");
+ return -1;
+ }
+
+ return 0;
+}
+
+int connect_to_fd(int server_fd, int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr_in;
+ socklen_t addrlen, optlen;
+ int fd, type;
+
+ optlen = sizeof(type);
+ if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
+ log_err("getsockopt(SOL_TYPE)");
+ return -1;
+ }
+
+ addrlen = sizeof(addr);
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ addr_in = (struct sockaddr_in *)&addr;
+ fd = socket(addr_in->sin_family, type, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
+ if (connect_fd_to_addr(fd, &addr, addrlen))
+ goto error_close;
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+
+ if (settimeo(client_fd, timeout_ms))
+ return -1;
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ if (connect_fd_to_addr(client_fd, &addr, len))
+ return -1;
+
+ return 0;
+}
+
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+ struct sockaddr_storage *addr, socklen_t *len)
+{
+ if (family == AF_INET) {
+ struct sockaddr_in *sin = (void *)addr;
+
+ sin->sin_family = AF_INET;
+ sin->sin_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
+ log_err("inet_pton(AF_INET, %s)", addr_str);
+ return -1;
+ }
+ if (len)
+ *len = sizeof(*sin);
+ return 0;
+ } else if (family == AF_INET6) {
+ struct sockaddr_in6 *sin6 = (void *)addr;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
+ log_err("inet_pton(AF_INET6, %s)", addr_str);
+ return -1;
+ }
+ if (len)
+ *len = sizeof(*sin6);
+ return 0;
+ }
+ return -1;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
new file mode 100644
index 000000000000..7205f8afdba1
--- /dev/null
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NETWORK_HELPERS_H
+#define __NETWORK_HELPERS_H
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <linux/types.h>
+typedef __u16 __sum16;
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <netinet/tcp.h>
+#include <bpf/bpf_endian.h>
+
+#define MAGIC_VAL 0x1234
+#define NUM_ITER 100000
+#define VIP_NUM 5
+#define MAGIC_BYTES 123
+
+/* ipv4 test vector */
+struct ipv4_packet {
+ struct ethhdr eth;
+ struct iphdr iph;
+ struct tcphdr tcp;
+} __packed;
+extern struct ipv4_packet pkt_v4;
+
+/* ipv6 test vector */
+struct ipv6_packet {
+ struct ethhdr eth;
+ struct ipv6hdr iph;
+ struct tcphdr tcp;
+} __packed;
+extern struct ipv6_packet pkt_v6;
+
+int start_server(int family, int type, const char *addr, __u16 port,
+ int timeout_ms);
+int connect_to_fd(int server_fd, int timeout_ms);
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+ int timeout_ms);
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+ struct sockaddr_storage *addr, socklen_t *len);
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 0262f7b374f9..52414058a627 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -1,24 +1,5 @@
-#include <asm/types.h>
-#include <linux/types.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <stddef.h>
-#include <stdbool.h>
-
-#include <linux/unistd.h>
-#include <linux/filter.h>
-#include <linux/bpf_perf_event.h>
-#include <linux/bpf.h>
-
-#include <bpf/bpf.h>
-
-#include "../../../include/linux/filter.h"
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
#define MAX_INSNS 512
#define MAX_MATCHES 16
@@ -214,13 +195,13 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
},
@@ -359,15 +340,15 @@ static struct bpf_align_test tests[] = {
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
- {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+ {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
},
},
{
@@ -410,15 +391,15 @@ static struct bpf_align_test tests[] = {
/* Adding 14 makes R6 be (4n+2) */
{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* Packet pointer has (4n+2) offset */
- {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
- {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+ {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
@@ -426,15 +407,15 @@ static struct bpf_align_test tests[] = {
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
- {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+ {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
},
},
{
@@ -469,16 +450,16 @@ static struct bpf_align_test tests[] = {
.matches = {
{4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+ {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {7, "R5_w=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+ {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
- {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
/* packet pointer + nonnegative (4n+2) */
- {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
- {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+ {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -486,7 +467,7 @@ static struct bpf_align_test tests[] = {
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
- {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
}
},
{
@@ -528,7 +509,7 @@ static struct bpf_align_test tests[] = {
/* New unknown value in R7 is (4n) */
{11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Subtracting it from R6 blows our unsigned bounds */
- {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
+ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* At the time the word size load is performed from R5,
@@ -537,7 +518,8 @@ static struct bpf_align_test tests[] = {
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
+
},
},
{
@@ -579,18 +561,18 @@ static struct bpf_align_test tests[] = {
/* Adding 14 makes R6 be (4n+2) */
{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+ {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ {20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
},
},
};
@@ -669,51 +651,16 @@ static int do_test_single(struct bpf_align_test *test)
return ret;
}
-static int do_test(unsigned int from, unsigned int to)
+void test_align(void)
{
- int all_pass = 0;
- int all_fail = 0;
unsigned int i;
- for (i = from; i < to; i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_align_test *test = &tests[i];
- int fail;
-
- printf("Test %3d: %s ... ",
- i, test->descr);
- fail = do_test_single(test);
- if (fail) {
- all_fail++;
- printf("FAIL\n");
- } else {
- all_pass++;
- printf("PASS\n");
- }
- }
- printf("Results: %d pass %d fail\n",
- all_pass, all_fail);
- return all_fail ? EXIT_FAILURE : EXIT_SUCCESS;
-}
-
-int main(int argc, char **argv)
-{
- unsigned int from = 0, to = ARRAY_SIZE(tests);
- if (argc == 3) {
- unsigned int l = atoi(argv[argc - 2]);
- unsigned int u = atoi(argv[argc - 1]);
+ if (!test__start_subtest(test->descr))
+ continue;
- if (l < to && u < to) {
- from = l;
- to = u + 1;
- }
- } else if (argc == 2) {
- unsigned int t = atoi(argv[argc - 1]);
-
- if (t < to) {
- from = t;
- to = t + 1;
- }
+ CHECK_FAIL(do_test_single(test));
}
- return do_test(from, to);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/autoload.c b/tools/testing/selftests/bpf/prog_tests/autoload.c
new file mode 100644
index 000000000000..3693f7d133eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoload.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_autoload.skel.h"
+
+void test_autoload(void)
+{
+ int duration = 0, err;
+ struct test_autoload* skel;
+
+ skel = test_autoload__open_and_load();
+ /* prog3 should be broken */
+ if (CHECK(skel, "skel_open_and_load", "unexpected success\n"))
+ goto cleanup;
+
+ skel = test_autoload__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ goto cleanup;
+
+ /* don't load prog3 */
+ bpf_program__set_autoload(skel->progs.prog3, false);
+
+ err = test_autoload__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ err = test_autoload__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(!skel->bss->prog1_called, "prog1", "not called\n");
+ CHECK(!skel->bss->prog2_called, "prog2", "not called\n");
+ CHECK(skel->bss->prog3_called, "prog3", "called?!\n");
+
+cleanup:
+ test_autoload__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
new file mode 100644
index 000000000000..448885b95eed
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -0,0 +1,1074 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "bpf_iter_ipv6_route.skel.h"
+#include "bpf_iter_netlink.skel.h"
+#include "bpf_iter_bpf_map.skel.h"
+#include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_stack.skel.h"
+#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_task_btf.skel.h"
+#include "bpf_iter_tcp4.skel.h"
+#include "bpf_iter_tcp6.skel.h"
+#include "bpf_iter_udp4.skel.h"
+#include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_test_kern1.skel.h"
+#include "bpf_iter_test_kern2.skel.h"
+#include "bpf_iter_test_kern3.skel.h"
+#include "bpf_iter_test_kern4.skel.h"
+#include "bpf_iter_bpf_hash_map.skel.h"
+#include "bpf_iter_bpf_percpu_hash_map.skel.h"
+#include "bpf_iter_bpf_array_map.skel.h"
+#include "bpf_iter_bpf_percpu_array_map.skel.h"
+#include "bpf_iter_bpf_sk_storage_map.skel.h"
+#include "bpf_iter_test_kern5.skel.h"
+#include "bpf_iter_test_kern6.skel.h"
+
+static int duration;
+
+static void test_btf_id_or_null(void)
+{
+ struct bpf_iter_test_kern3 *skel;
+
+ skel = bpf_iter_test_kern3__open_and_load();
+ if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
+ "skeleton open_and_load unexpectedly succeeded\n")) {
+ bpf_iter_test_kern3__destroy(skel);
+ return;
+ }
+}
+
+static void do_dummy_read(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ char buf[16] = {};
+ int iter_fd, len;
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* not check contents, but ensure read() ends without error */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+
+ close(iter_fd);
+
+free_link:
+ bpf_link__destroy(link);
+}
+
+static void test_ipv6_route(void)
+{
+ struct bpf_iter_ipv6_route *skel;
+
+ skel = bpf_iter_ipv6_route__open_and_load();
+ if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_ipv6_route);
+
+ bpf_iter_ipv6_route__destroy(skel);
+}
+
+static void test_netlink(void)
+{
+ struct bpf_iter_netlink *skel;
+
+ skel = bpf_iter_netlink__open_and_load();
+ if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_netlink);
+
+ bpf_iter_netlink__destroy(skel);
+}
+
+static void test_bpf_map(void)
+{
+ struct bpf_iter_bpf_map *skel;
+
+ skel = bpf_iter_bpf_map__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_bpf_map);
+
+ bpf_iter_bpf_map__destroy(skel);
+}
+
+static void test_task(void)
+{
+ struct bpf_iter_task *skel;
+
+ skel = bpf_iter_task__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task);
+
+ bpf_iter_task__destroy(skel);
+}
+
+static void test_task_stack(void)
+{
+ struct bpf_iter_task_stack *skel;
+
+ skel = bpf_iter_task_stack__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task_stack);
+
+ bpf_iter_task_stack__destroy(skel);
+}
+
+static void *do_nothing(void *arg)
+{
+ pthread_exit(arg);
+}
+
+static void test_task_file(void)
+{
+ struct bpf_iter_task_file *skel;
+ pthread_t thread_id;
+ void *ret;
+
+ skel = bpf_iter_task_file__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ skel->bss->tgid = getpid();
+
+ if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
+ "pthread_create", "pthread_create failed\n"))
+ goto done;
+
+ do_dummy_read(skel->progs.dump_task_file);
+
+ if (CHECK(pthread_join(thread_id, &ret) || ret != NULL,
+ "pthread_join", "pthread_join failed\n"))
+ goto done;
+
+ CHECK(skel->bss->count != 0, "check_count",
+ "invalid non pthread file visit count %d\n", skel->bss->count);
+
+done:
+ bpf_iter_task_file__destroy(skel);
+}
+
+#define TASKBUFSZ 32768
+
+static char taskbuf[TASKBUFSZ];
+
+static int do_btf_read(struct bpf_iter_task_btf *skel)
+{
+ struct bpf_program *prog = skel->progs.dump_task_struct;
+ struct bpf_iter_task_btf__bss *bss = skel->bss;
+ int iter_fd = -1, len = 0, bufleft = TASKBUFSZ;
+ struct bpf_link *link;
+ char *buf = taskbuf;
+ int ret = 0;
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ return ret;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ do {
+ len = read(iter_fd, buf, bufleft);
+ if (len > 0) {
+ buf += len;
+ bufleft -= len;
+ }
+ } while (len > 0);
+
+ if (bss->skip) {
+ printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+ ret = 1;
+ test__skip();
+ goto free_link;
+ }
+
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto free_link;
+
+ CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
+ "check for btf representation of task_struct in iter data",
+ "struct task_struct not found");
+free_link:
+ if (iter_fd > 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ return ret;
+}
+
+static void test_task_btf(void)
+{
+ struct bpf_iter_task_btf__bss *bss;
+ struct bpf_iter_task_btf *skel;
+ int ret;
+
+ skel = bpf_iter_task_btf__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task_btf__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ bss = skel->bss;
+
+ ret = do_btf_read(skel);
+ if (ret)
+ goto cleanup;
+
+ if (CHECK(bss->tasks == 0, "check if iterated over tasks",
+ "no task iteration, did BPF program run?\n"))
+ goto cleanup;
+
+ CHECK(bss->seq_err != 0, "check for unexpected err",
+ "bpf_seq_printf_btf returned %ld", bss->seq_err);
+
+cleanup:
+ bpf_iter_task_btf__destroy(skel);
+}
+
+static void test_tcp4(void)
+{
+ struct bpf_iter_tcp4 *skel;
+
+ skel = bpf_iter_tcp4__open_and_load();
+ if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_tcp4);
+
+ bpf_iter_tcp4__destroy(skel);
+}
+
+static void test_tcp6(void)
+{
+ struct bpf_iter_tcp6 *skel;
+
+ skel = bpf_iter_tcp6__open_and_load();
+ if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_tcp6);
+
+ bpf_iter_tcp6__destroy(skel);
+}
+
+static void test_udp4(void)
+{
+ struct bpf_iter_udp4 *skel;
+
+ skel = bpf_iter_udp4__open_and_load();
+ if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_udp4);
+
+ bpf_iter_udp4__destroy(skel);
+}
+
+static void test_udp6(void)
+{
+ struct bpf_iter_udp6 *skel;
+
+ skel = bpf_iter_udp6__open_and_load();
+ if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_udp6);
+
+ bpf_iter_udp6__destroy(skel);
+}
+
+/* The expected string is less than 16 bytes */
+static int do_read_with_fd(int iter_fd, const char *expected,
+ bool read_one_char)
+{
+ int err = -1, len, read_buf_len, start;
+ char buf[16] = {};
+
+ read_buf_len = read_one_char ? 1 : 16;
+ start = 0;
+ while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
+ start += len;
+ if (CHECK(start >= 16, "read", "read len %d\n", len))
+ return -1;
+ read_buf_len = read_one_char ? 1 : 16 - start;
+ }
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ return -1;
+
+ err = strcmp(buf, expected);
+ if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
+ buf, expected))
+ return -1;
+
+ return 0;
+}
+
+static void test_anon_iter(bool read_one_char)
+{
+ struct bpf_iter_test_kern1 *skel;
+ struct bpf_link *link;
+ int iter_fd, err;
+
+ skel = bpf_iter_test_kern1__open_and_load();
+ if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ err = bpf_iter_test_kern1__attach(skel);
+ if (CHECK(err, "bpf_iter_test_kern1__attach",
+ "skeleton attach failed\n")) {
+ goto out;
+ }
+
+ link = skel->links.dump_task;
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto out;
+
+ do_read_with_fd(iter_fd, "abcd", read_one_char);
+ close(iter_fd);
+
+out:
+ bpf_iter_test_kern1__destroy(skel);
+}
+
+static int do_read(const char *path, const char *expected)
+{
+ int err, iter_fd;
+
+ iter_fd = open(path, O_RDONLY);
+ if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
+ path, strerror(errno)))
+ return -1;
+
+ err = do_read_with_fd(iter_fd, expected, false);
+ close(iter_fd);
+ return err;
+}
+
+static void test_file_iter(void)
+{
+ const char *path = "/sys/fs/bpf/bpf_iter_test1";
+ struct bpf_iter_test_kern1 *skel1;
+ struct bpf_iter_test_kern2 *skel2;
+ struct bpf_link *link;
+ int err;
+
+ skel1 = bpf_iter_test_kern1__open_and_load();
+ if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ /* unlink this path if it exists. */
+ unlink(path);
+
+ err = bpf_link__pin(link, path);
+ if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+ goto free_link;
+
+ err = do_read(path, "abcd");
+ if (err)
+ goto unlink_path;
+
+ /* file based iterator seems working fine. Let us a link update
+ * of the underlying link and `cat` the iterator again, its content
+ * should change.
+ */
+ skel2 = bpf_iter_test_kern2__open_and_load();
+ if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
+ "skeleton open_and_load failed\n"))
+ goto unlink_path;
+
+ err = bpf_link__update_program(link, skel2->progs.dump_task);
+ if (CHECK(err, "update_prog", "update_prog failed\n"))
+ goto destroy_skel2;
+
+ do_read(path, "ABCD");
+
+destroy_skel2:
+ bpf_iter_test_kern2__destroy(skel2);
+unlink_path:
+ unlink(path);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_test_kern1__destroy(skel1);
+}
+
+static void test_overflow(bool test_e2big_overflow, bool ret1)
+{
+ __u32 map_info_len, total_read_len, expected_read_len;
+ int err, iter_fd, map1_fd, map2_fd, len;
+ struct bpf_map_info map_info = {};
+ struct bpf_iter_test_kern4 *skel;
+ struct bpf_link *link;
+ __u32 iter_size;
+ char *buf;
+
+ skel = bpf_iter_test_kern4__open();
+ if (CHECK(!skel, "bpf_iter_test_kern4__open",
+ "skeleton open failed\n"))
+ return;
+
+ /* create two maps: bpf program will only do bpf_seq_write
+ * for these two maps. The goal is one map output almost
+ * fills seq_file buffer and then the other will trigger
+ * overflow and needs restart.
+ */
+ map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ if (CHECK(map1_fd < 0, "bpf_create_map",
+ "map_creation failed: %s\n", strerror(errno)))
+ goto out;
+ map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ if (CHECK(map2_fd < 0, "bpf_create_map",
+ "map_creation failed: %s\n", strerror(errno)))
+ goto free_map1;
+
+ /* bpf_seq_printf kernel buffer is 8 pages, so one map
+ * bpf_seq_write will mostly fill it, and the other map
+ * will partially fill and then trigger overflow and need
+ * bpf_seq_read restart.
+ */
+ iter_size = sysconf(_SC_PAGE_SIZE) << 3;
+
+ if (test_e2big_overflow) {
+ skel->rodata->print_len = (iter_size + 8) / 8;
+ expected_read_len = 2 * (iter_size + 8);
+ } else if (!ret1) {
+ skel->rodata->print_len = (iter_size - 8) / 8;
+ expected_read_len = 2 * (iter_size - 8);
+ } else {
+ skel->rodata->print_len = 1;
+ expected_read_len = 2 * 8;
+ }
+ skel->rodata->ret1 = ret1;
+
+ if (CHECK(bpf_iter_test_kern4__load(skel),
+ "bpf_iter_test_kern4__load", "skeleton load failed\n"))
+ goto free_map2;
+
+ /* setup filtering map_id in bpf program */
+ map_info_len = sizeof(map_info);
+ err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len);
+ if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+ strerror(errno)))
+ goto free_map2;
+ skel->bss->map1_id = map_info.id;
+
+ err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len);
+ if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+ strerror(errno)))
+ goto free_map2;
+ skel->bss->map2_id = map_info.id;
+
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto free_map2;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ buf = malloc(expected_read_len);
+ if (!buf)
+ goto close_iter;
+
+ /* do read */
+ total_read_len = 0;
+ if (test_e2big_overflow) {
+ while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+ total_read_len += len;
+
+ CHECK(len != -1 || errno != E2BIG, "read",
+ "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
+ len, strerror(errno));
+ goto free_buf;
+ } else if (!ret1) {
+ while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+ total_read_len += len;
+
+ if (CHECK(len < 0, "read", "read failed: %s\n",
+ strerror(errno)))
+ goto free_buf;
+ } else {
+ do {
+ len = read(iter_fd, buf, expected_read_len);
+ if (len > 0)
+ total_read_len += len;
+ } while (len > 0 || len == -EAGAIN);
+
+ if (CHECK(len < 0, "read", "read failed: %s\n",
+ strerror(errno)))
+ goto free_buf;
+ }
+
+ if (CHECK(total_read_len != expected_read_len, "read",
+ "total len %u, expected len %u\n", total_read_len,
+ expected_read_len))
+ goto free_buf;
+
+ if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
+ "expected 1 actual %d\n", skel->bss->map1_accessed))
+ goto free_buf;
+
+ if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
+ "expected 2 actual %d\n", skel->bss->map2_accessed))
+ goto free_buf;
+
+ CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
+ "map2_seqnum", "two different seqnum %lld %lld\n",
+ skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
+
+free_buf:
+ free(buf);
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+free_map2:
+ close(map2_fd);
+free_map1:
+ close(map1_fd);
+out:
+ bpf_iter_test_kern4__destroy(skel);
+}
+
+static void test_bpf_hash_map(void)
+{
+ __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_hash_map *skel;
+ int err, i, len, map_fd, iter_fd;
+ union bpf_iter_link_info linfo;
+ __u64 val, expected_val = 0;
+ struct bpf_link *link;
+ struct key_t {
+ int a;
+ int b;
+ int c;
+ } key;
+ char buf[64];
+
+ skel = bpf_iter_bpf_hash_map__open();
+ if (CHECK(!skel, "bpf_iter_bpf_hash_map__open",
+ "skeleton open failed\n"))
+ return;
+
+ skel->bss->in_test_mode = true;
+
+ err = bpf_iter_bpf_hash_map__load(skel);
+ if (CHECK(!skel, "bpf_iter_bpf_hash_map__load",
+ "skeleton load failed\n"))
+ goto out;
+
+ /* iterator with hashmap2 and hashmap3 should fail */
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(!IS_ERR(link), "attach_iter",
+ "attach_iter for hashmap2 unexpected succeeded\n"))
+ goto out;
+
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(!IS_ERR(link), "attach_iter",
+ "attach_iter for hashmap3 unexpected succeeded\n"))
+ goto out;
+
+ /* hashmap1 should be good, update map values here */
+ map_fd = bpf_map__fd(skel->maps.hashmap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+ key.a = i + 1;
+ key.b = i + 2;
+ key.c = i + 3;
+ val = i + 4;
+ expected_key_a += key.a;
+ expected_key_b += key.b;
+ expected_key_c += key.c;
+ expected_val += val;
+
+ err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ linfo.map.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->key_sum_a != expected_key_a,
+ "key_sum_a", "got %u expected %u\n",
+ skel->bss->key_sum_a, expected_key_a))
+ goto close_iter;
+ if (CHECK(skel->bss->key_sum_b != expected_key_b,
+ "key_sum_b", "got %u expected %u\n",
+ skel->bss->key_sum_b, expected_key_b))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %llu expected %llu\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_hash_map__destroy(skel);
+}
+
+static void test_bpf_percpu_hash_map(void)
+{
+ __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_percpu_hash_map *skel;
+ int err, i, j, len, map_fd, iter_fd;
+ union bpf_iter_link_info linfo;
+ __u32 expected_val = 0;
+ struct bpf_link *link;
+ struct key_t {
+ int a;
+ int b;
+ int c;
+ } key;
+ char buf[64];
+ void *val;
+
+ val = malloc(8 * bpf_num_possible_cpus());
+
+ skel = bpf_iter_bpf_percpu_hash_map__open();
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open",
+ "skeleton open failed\n"))
+ return;
+
+ skel->rodata->num_cpus = bpf_num_possible_cpus();
+
+ err = bpf_iter_bpf_percpu_hash_map__load(skel);
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load",
+ "skeleton load failed\n"))
+ goto out;
+
+ /* update map values here */
+ map_fd = bpf_map__fd(skel->maps.hashmap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+ key.a = i + 1;
+ key.b = i + 2;
+ key.c = i + 3;
+ expected_key_a += key.a;
+ expected_key_b += key.b;
+ expected_key_c += key.c;
+
+ for (j = 0; j < bpf_num_possible_cpus(); j++) {
+ *(__u32 *)(val + j * 8) = i + j;
+ expected_val += i + j;
+ }
+
+ err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->key_sum_a != expected_key_a,
+ "key_sum_a", "got %u expected %u\n",
+ skel->bss->key_sum_a, expected_key_a))
+ goto close_iter;
+ if (CHECK(skel->bss->key_sum_b != expected_key_b,
+ "key_sum_b", "got %u expected %u\n",
+ skel->bss->key_sum_b, expected_key_b))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %u expected %u\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_percpu_hash_map__destroy(skel);
+}
+
+static void test_bpf_array_map(void)
+{
+ __u64 val, expected_val = 0, res_first_val, first_val = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ __u32 expected_key = 0, res_first_key;
+ struct bpf_iter_bpf_array_map *skel;
+ union bpf_iter_link_info linfo;
+ int err, i, map_fd, iter_fd;
+ struct bpf_link *link;
+ char buf[64] = {};
+ int len, start;
+
+ skel = bpf_iter_bpf_array_map__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.arraymap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ val = i + 4;
+ expected_key += i;
+ expected_val += val;
+
+ if (i == 0)
+ first_val = val;
+
+ err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ start = 0;
+ while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
+ start += len;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ res_first_key = *(__u32 *)buf;
+ res_first_val = *(__u64 *)(buf + sizeof(__u32));
+ if (CHECK(res_first_key != 0 || res_first_val != first_val,
+ "bpf_seq_write",
+ "seq_write failure: first key %u vs expected 0, "
+ " first value %llu vs expected %llu\n",
+ res_first_key, res_first_val, first_val))
+ goto close_iter;
+
+ if (CHECK(skel->bss->key_sum != expected_key,
+ "key_sum", "got %u expected %u\n",
+ skel->bss->key_sum, expected_key))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %llu expected %llu\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ err = bpf_map_lookup_elem(map_fd, &i, &val);
+ if (CHECK(err, "map_lookup", "map_lookup failed\n"))
+ goto out;
+ if (CHECK(i != val, "invalid_val",
+ "got value %llu expected %u\n", val, i))
+ goto out;
+ }
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_array_map__destroy(skel);
+}
+
+static void test_bpf_percpu_array_map(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_percpu_array_map *skel;
+ __u32 expected_key = 0, expected_val = 0;
+ union bpf_iter_link_info linfo;
+ int err, i, j, map_fd, iter_fd;
+ struct bpf_link *link;
+ char buf[64];
+ void *val;
+ int len;
+
+ val = malloc(8 * bpf_num_possible_cpus());
+
+ skel = bpf_iter_bpf_percpu_array_map__open();
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open",
+ "skeleton open failed\n"))
+ return;
+
+ skel->rodata->num_cpus = bpf_num_possible_cpus();
+
+ err = bpf_iter_bpf_percpu_array_map__load(skel);
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load",
+ "skeleton load failed\n"))
+ goto out;
+
+ /* update map values here */
+ map_fd = bpf_map__fd(skel->maps.arraymap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ expected_key += i;
+
+ for (j = 0; j < bpf_num_possible_cpus(); j++) {
+ *(__u32 *)(val + j * 8) = i + j;
+ expected_val += i + j;
+ }
+
+ err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->key_sum != expected_key,
+ "key_sum", "got %u expected %u\n",
+ skel->bss->key_sum, expected_key))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %u expected %u\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_percpu_array_map__destroy(skel);
+}
+
+static void test_bpf_sk_storage_map(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ int err, i, len, map_fd, iter_fd, num_sockets;
+ struct bpf_iter_bpf_sk_storage_map *skel;
+ union bpf_iter_link_info linfo;
+ int sock_fd[3] = {-1, -1, -1};
+ __u32 val, expected_val = 0;
+ struct bpf_link *link;
+ char buf[64];
+
+ skel = bpf_iter_bpf_sk_storage_map__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+ num_sockets = ARRAY_SIZE(sock_fd);
+ for (i = 0; i < num_sockets; i++) {
+ sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0);
+ if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno))
+ goto out;
+
+ val = i + 1;
+ expected_val += val;
+
+ err = bpf_map_update_elem(map_fd, &sock_fd[i], &val,
+ BPF_NOEXIST);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->ipv6_sk_count != num_sockets,
+ "ipv6_sk_count", "got %u expected %u\n",
+ skel->bss->ipv6_sk_count, num_sockets))
+ goto close_iter;
+
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %u expected %u\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ for (i = 0; i < num_sockets; i++) {
+ if (sock_fd[i] >= 0)
+ close(sock_fd[i]);
+ }
+ bpf_iter_bpf_sk_storage_map__destroy(skel);
+}
+
+static void test_rdonly_buf_out_of_bound(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_test_kern5 *skel;
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+
+ skel = bpf_iter_test_kern5__open_and_load();
+ if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+ bpf_link__destroy(link);
+
+ bpf_iter_test_kern5__destroy(skel);
+}
+
+static void test_buf_neg_offset(void)
+{
+ struct bpf_iter_test_kern6 *skel;
+
+ skel = bpf_iter_test_kern6__open_and_load();
+ if (CHECK(skel, "bpf_iter_test_kern6__open_and_load",
+ "skeleton open_and_load unexpected success\n"))
+ bpf_iter_test_kern6__destroy(skel);
+}
+
+void test_bpf_iter(void)
+{
+ if (test__start_subtest("btf_id_or_null"))
+ test_btf_id_or_null();
+ if (test__start_subtest("ipv6_route"))
+ test_ipv6_route();
+ if (test__start_subtest("netlink"))
+ test_netlink();
+ if (test__start_subtest("bpf_map"))
+ test_bpf_map();
+ if (test__start_subtest("task"))
+ test_task();
+ if (test__start_subtest("task_stack"))
+ test_task_stack();
+ if (test__start_subtest("task_file"))
+ test_task_file();
+ if (test__start_subtest("task_btf"))
+ test_task_btf();
+ if (test__start_subtest("tcp4"))
+ test_tcp4();
+ if (test__start_subtest("tcp6"))
+ test_tcp6();
+ if (test__start_subtest("udp4"))
+ test_udp4();
+ if (test__start_subtest("udp6"))
+ test_udp6();
+ if (test__start_subtest("anon"))
+ test_anon_iter(false);
+ if (test__start_subtest("anon-read-one-char"))
+ test_anon_iter(true);
+ if (test__start_subtest("file"))
+ test_file_iter();
+ if (test__start_subtest("overflow"))
+ test_overflow(false, false);
+ if (test__start_subtest("overflow-e2big"))
+ test_overflow(true, false);
+ if (test__start_subtest("prog-ret-1"))
+ test_overflow(false, true);
+ if (test__start_subtest("bpf_hash_map"))
+ test_bpf_hash_map();
+ if (test__start_subtest("bpf_percpu_hash_map"))
+ test_bpf_percpu_hash_map();
+ if (test__start_subtest("bpf_array_map"))
+ test_bpf_array_map();
+ if (test__start_subtest("bpf_percpu_array_map"))
+ test_bpf_percpu_array_map();
+ if (test__start_subtest("bpf_sk_storage_map"))
+ test_bpf_sk_storage_map();
+ if (test__start_subtest("rdonly-buf-out-of-bound"))
+ test_rdonly_buf_out_of_bound();
+ if (test__start_subtest("buf-neg-offset"))
+ test_buf_neg_offset();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index f10029821e16..284d5921c345 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -1,26 +1,30 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#define nr_iters 2
+
void test_bpf_obj_id(void)
{
const __u64 array_magic_value = 0xfaceb00c;
const __u32 array_key = 0;
- const int nr_iters = 2;
const char *file = "./test_obj_id.o";
const char *expected_prog_name = "test_obj_id";
const char *expected_map_name = "test_map_id";
const __u64 nsec_per_sec = 1000000000;
- struct bpf_object *objs[nr_iters];
+ struct bpf_object *objs[nr_iters] = {};
+ struct bpf_link *links[nr_iters] = {};
+ struct bpf_program *prog;
int prog_fds[nr_iters], map_fds[nr_iters];
/* +1 to test for the info_len returned by kernel */
struct bpf_prog_info prog_infos[nr_iters + 1];
struct bpf_map_info map_infos[nr_iters + 1];
+ struct bpf_link_info link_infos[nr_iters + 1];
/* Each prog only uses one map. +1 to test nr_map_ids
* returned by kernel.
*/
__u32 map_ids[nr_iters + 1];
- char jited_insns[128], xlated_insns[128], zeros[128];
+ char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
__u32 i, next_id, info_len, nr_id_found, duration = 0;
struct timespec real_time_ts, boot_time_ts;
int err = 0;
@@ -36,14 +40,15 @@ void test_bpf_obj_id(void)
CHECK(err >= 0 || errno != ENOENT,
"get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
- for (i = 0; i < nr_iters; i++)
- objs[i] = NULL;
+ err = bpf_link_get_fd_by_id(0);
+ CHECK(err >= 0 || errno != ENOENT,
+ "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
/* Check bpf_obj_get_info_by_fd() */
bzero(zeros, sizeof(zeros));
for (i = 0; i < nr_iters; i++) {
now = time(NULL);
- err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
&objs[i], &prog_fds[i]);
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
@@ -60,6 +65,17 @@ void test_bpf_obj_id(void)
if (CHECK_FAIL(err))
goto done;
+ prog = bpf_object__find_program_by_title(objs[i],
+ "raw_tp/sys_enter");
+ if (CHECK_FAIL(!prog))
+ goto done;
+ links[i] = bpf_program__attach(prog);
+ err = libbpf_get_error(links[i]);
+ if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+ links[i] = NULL;
+ goto done;
+ }
+
/* Check getting map info */
info_len = sizeof(struct bpf_map_info) * 2;
bzero(&map_infos[i], info_len);
@@ -107,7 +123,7 @@ void test_bpf_obj_id(void)
load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+ (prog_infos[i].load_time / nsec_per_sec);
if (CHECK(err ||
- prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
+ prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
info_len != sizeof(struct bpf_prog_info) ||
(env.jit_enabled && !prog_infos[i].jited_prog_len) ||
(env.jit_enabled &&
@@ -120,7 +136,11 @@ void test_bpf_obj_id(void)
*(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
strcmp((char *)prog_infos[i].name, expected_prog_name),
"get-prog-info(fd)",
- "err %d errno %d i %d type %d(%d) info_len %u(%zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+ "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
+ "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
+ "jited_prog %d xlated_prog %d load_time %lu(%lu) "
+ "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
+ "name %s(%s)\n",
err, errno, i,
prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
info_len, sizeof(struct bpf_prog_info),
@@ -135,6 +155,33 @@ void test_bpf_obj_id(void)
*(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
prog_infos[i].name, expected_prog_name))
goto done;
+
+ /* Check getting link info */
+ info_len = sizeof(struct bpf_link_info) * 2;
+ bzero(&link_infos[i], info_len);
+ link_infos[i].raw_tracepoint.tp_name = ptr_to_u64(&tp_name);
+ link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]),
+ &link_infos[i], &info_len);
+ if (CHECK(err ||
+ link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
+ link_infos[i].prog_id != prog_infos[i].id ||
+ link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) ||
+ strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
+ "sys_enter") ||
+ info_len != sizeof(struct bpf_link_info),
+ "get-link-info(fd)",
+ "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
+ "prog_id %d (%d) tp_name %s(%s)\n",
+ err, errno,
+ info_len, sizeof(struct bpf_link_info),
+ link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
+ link_infos[i].id,
+ link_infos[i].prog_id, prog_infos[i].id,
+ (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
+ "sys_enter"))
+ goto done;
+
}
/* Check bpf_prog_get_next_id() */
@@ -247,7 +294,52 @@ void test_bpf_obj_id(void)
"nr_id_found %u(%u)\n",
nr_id_found, nr_iters);
+ /* Check bpf_link_get_next_id() */
+ nr_id_found = 0;
+ next_id = 0;
+ while (!bpf_link_get_next_id(next_id, &next_id)) {
+ struct bpf_link_info link_info;
+ int link_fd, cmp_res;
+
+ info_len = sizeof(link_info);
+ memset(&link_info, 0, info_len);
+
+ link_fd = bpf_link_get_fd_by_id(next_id);
+ if (link_fd < 0 && errno == ENOENT)
+ /* The bpf_link is in the dead row */
+ continue;
+ if (CHECK(link_fd < 0, "get-link-fd(next_id)",
+ "link_fd %d next_id %u errno %d\n",
+ link_fd, next_id, errno))
+ break;
+
+ for (i = 0; i < nr_iters; i++)
+ if (link_infos[i].id == next_id)
+ break;
+
+ if (i == nr_iters)
+ continue;
+
+ nr_id_found++;
+
+ err = bpf_obj_get_info_by_fd(link_fd, &link_info, &info_len);
+ cmp_res = memcmp(&link_info, &link_infos[i],
+ offsetof(struct bpf_link_info, raw_tracepoint));
+ CHECK(err || info_len != sizeof(link_info) || cmp_res,
+ "check get-link-info(next_id->fd)",
+ "err %d errno %d info_len %u(%zu) memcmp %d\n",
+ err, errno, info_len, sizeof(struct bpf_link_info),
+ cmp_res);
+
+ close(link_fd);
+ }
+ CHECK(nr_id_found != nr_iters,
+ "check total link id found by get_next_id",
+ "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+
done:
- for (i = 0; i < nr_iters; i++)
+ for (i = 0; i < nr_iters; i++) {
+ bpf_link__destroy(links[i]);
bpf_object__close(objs[i]);
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e9f2f12ba06b..e698ee6bb6c2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -49,6 +49,7 @@ void test_bpf_verif_scale(void)
{ "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
{ "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
/* full unroll by llvm */
{ "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
@@ -86,6 +87,9 @@ void test_bpf_verif_scale(void)
{ "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
{ "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ /* non-inlined subprogs */
+ { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+
{ "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
{ "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 305fae8f80a9..93162484c2ca 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -24,40 +24,17 @@
#include "bpf_rlimit.h"
#include "bpf_util.h"
-#include "test_btf.h"
+#include "../test_btf.h"
+#include "test_progs.h"
#define MAX_INSNS 512
#define MAX_SUBPROGS 16
-static uint32_t pass_cnt;
-static uint32_t error_cnt;
-static uint32_t skip_cnt;
+static int duration = 0;
+static bool always_log;
-#define CHECK(condition, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
- fprintf(stderr, format); \
- } \
- __ret; \
-})
-
-static int count_result(int err)
-{
- if (err)
- error_cnt++;
- else
- pass_cnt++;
-
- fprintf(stderr, "\n");
- return err;
-}
-
-static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
- const char *format, va_list args)
-{
- return vfprintf(stderr, format, args);
-}
+#undef CHECK
+#define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
#define BTF_END_RAW 0xdeadbeef
#define NAME_TBD 0xdeadb33f
@@ -69,21 +46,6 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
#define MAX_NR_RAW_U32 1024
#define BTF_LOG_BUF_SIZE 65535
-static struct args {
- unsigned int raw_test_num;
- unsigned int file_test_num;
- unsigned int get_info_test_num;
- unsigned int info_raw_test_num;
- unsigned int dedup_test_num;
- bool raw_test;
- bool file_test;
- bool get_info_test;
- bool pprint_test;
- bool always_log;
- bool info_raw_test;
- bool dedup_test;
-} args;
-
static char btf_log_buf[BTF_LOG_BUF_SIZE];
static struct btf_header hdr_tmpl = {
@@ -3664,7 +3626,7 @@ done:
return raw_btf;
}
-static int do_test_raw(unsigned int test_num)
+static void do_test_raw(unsigned int test_num)
{
struct btf_raw_test *test = &raw_tests[test_num - 1];
struct bpf_create_map_attr create_attr = {};
@@ -3674,15 +3636,16 @@ static int do_test_raw(unsigned int test_num)
void *raw_btf;
int err;
- fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl,
test->raw_types,
test->str_sec,
test->str_sec_size,
&raw_btf_size, NULL);
-
if (!raw_btf)
- return -1;
+ return;
hdr = raw_btf;
@@ -3694,7 +3657,7 @@ static int do_test_raw(unsigned int test_num)
*btf_log_buf = '\0';
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
free(raw_btf);
err = ((btf_fd == -1) != test->btf_load_err);
@@ -3725,32 +3688,12 @@ static int do_test_raw(unsigned int test_num)
map_fd, test->map_create_err);
done:
- if (!err)
- fprintf(stderr, "OK");
-
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
-
if (btf_fd != -1)
close(btf_fd);
if (map_fd != -1)
close(map_fd);
-
- return err;
-}
-
-static int test_raw(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.raw_test_num)
- return count_result(do_test_raw(args.raw_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
- err |= count_result(do_test_raw(i));
-
- return err;
}
struct btf_get_info_test {
@@ -3814,11 +3757,6 @@ const struct btf_get_info_test get_info_tests[] = {
},
};
-static inline __u64 ptr_to_u64(const void *ptr)
-{
- return (__u64)(unsigned long)ptr;
-}
-
static int test_big_btf_info(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
@@ -3851,7 +3789,7 @@ static int test_big_btf_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
if (CHECK(btf_fd == -1, "errno:%d", errno)) {
err = -1;
goto done;
@@ -3883,7 +3821,7 @@ static int test_big_btf_info(unsigned int test_num)
info_garbage.garbage = 0;
err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
if (CHECK(err || info_len != sizeof(*info),
- "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+ "err:%d errno:%d info_len:%u sizeof(*info):%zu",
err, errno, info_len, sizeof(*info))) {
err = -1;
goto done;
@@ -3892,7 +3830,7 @@ static int test_big_btf_info(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
@@ -3939,7 +3877,7 @@ static int test_btf_id(unsigned int test_num)
btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
err = -1;
goto done;
@@ -4024,7 +3962,7 @@ static int test_btf_id(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
@@ -4039,7 +3977,7 @@ done:
return err;
}
-static int do_test_get_info(unsigned int test_num)
+static void do_test_get_info(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
unsigned int raw_btf_size, user_btf_size, expected_nbytes;
@@ -4048,11 +3986,14 @@ static int do_test_get_info(unsigned int test_num)
int btf_fd = -1, err, ret;
uint32_t info_len;
- fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
- test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
- if (test->special_test)
- return test->special_test(test_num);
+ if (test->special_test) {
+ err = test->special_test(test_num);
+ if (CHECK(err, "failed: %d\n", err))
+ return;
+ }
raw_btf = btf_raw_create(&hdr_tmpl,
test->raw_types,
@@ -4061,7 +4002,7 @@ static int do_test_get_info(unsigned int test_num)
&raw_btf_size, NULL);
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
@@ -4073,7 +4014,7 @@ static int do_test_get_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
if (CHECK(btf_fd == -1, "errno:%d", errno)) {
err = -1;
goto done;
@@ -4094,7 +4035,7 @@ static int do_test_get_info(unsigned int test_num)
if (CHECK(err || !info.id || info_len != sizeof(info) ||
info.btf_size != raw_btf_size ||
(ret = memcmp(raw_btf, user_btf, expected_nbytes)),
- "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+ "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%zu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
err, errno, info.id, info_len, sizeof(info),
raw_btf_size, info.btf_size, expected_nbytes, ret)) {
err = -1;
@@ -4114,7 +4055,7 @@ static int do_test_get_info(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
@@ -4122,22 +4063,6 @@ done:
if (btf_fd != -1)
close(btf_fd);
-
- return err;
-}
-
-static int test_get_info(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.get_info_test_num)
- return count_result(do_test_get_info(args.get_info_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
- err |= count_result(do_test_get_info(i));
-
- return err;
}
struct btf_file_test {
@@ -4151,7 +4076,7 @@ static struct btf_file_test file_tests[] = {
{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
};
-static int do_test_file(unsigned int test_num)
+static void do_test_file(unsigned int test_num)
{
const struct btf_file_test *test = &file_tests[test_num - 1];
const char *expected_fnames[] = {"_dummy_tracepoint",
@@ -4169,17 +4094,17 @@ static int do_test_file(unsigned int test_num)
struct bpf_map *map;
int i, err, prog_fd;
- fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
- test->file);
+ if (!test__start_subtest(test->file))
+ return;
btf = btf__parse_elf(test->file, &btf_ext);
if (IS_ERR(btf)) {
if (PTR_ERR(btf) == -ENOENT) {
- fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
- skip_cnt++;
- return 0;
+ printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
+ test__skip();
+ return;
}
- return PTR_ERR(btf);
+ return;
}
btf__free(btf);
@@ -4188,7 +4113,7 @@ static int do_test_file(unsigned int test_num)
obj = bpf_object__open(test->file);
if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
- return PTR_ERR(obj);
+ return;
prog = bpf_program__next(NULL, obj);
if (CHECK(!prog, "Cannot find bpf_prog")) {
@@ -4310,21 +4235,6 @@ skip:
done:
free(func_info);
bpf_object__close(obj);
- return err;
-}
-
-static int test_file(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.file_test_num)
- return count_result(do_test_file(args.file_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
- err |= count_result(do_test_file(i));
-
- return err;
}
const char *pprint_enum_str[] = {
@@ -4428,7 +4338,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
{
@@ -4493,7 +4403,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
{
@@ -4564,7 +4474,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
#ifdef __SIZEOF_INT128__
@@ -4591,7 +4501,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv_int128),
.key_type_id = 1,
.value_type_id = 4,
- .max_entries = 128 * 1024,
+ .max_entries = 128,
.mapv_kind = PPRINT_MAPV_KIND_INT128,
},
#endif
@@ -4730,7 +4640,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
nexpected_line = snprintf(expected_line, line_size,
"%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
- "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
+ "{%llu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
"%u,0x%x,[[%d,%d],[%d,%d]]}\n",
percpu_map ? "\tcpu" : "",
percpu_map ? cpu : next_key,
@@ -4738,7 +4648,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
v->unused_bits2a,
v->bits28,
v->unused_bits2b,
- v->ui64,
+ (__u64)v->ui64,
v->ui8a[0], v->ui8a[1],
v->ui8a[2], v->ui8a[3],
v->ui8a[4], v->ui8a[5],
@@ -4790,7 +4700,7 @@ static int check_line(const char *expected_line, int nexpected_line,
}
-static int do_test_pprint(int test_num)
+static void do_test_pprint(int test_num)
{
const struct btf_raw_test *test = &pprint_test_template[test_num];
enum pprint_mapv_kind_t mapv_kind = test->mapv_kind;
@@ -4809,18 +4719,20 @@ static int do_test_pprint(int test_num)
uint8_t *raw_btf;
ssize_t nread;
- fprintf(stderr, "%s(#%d)......", test->descr, test_num);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
test->str_sec, test->str_sec_size,
&raw_btf_size, NULL);
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
free(raw_btf);
if (CHECK(btf_fd == -1, "errno:%d", errno)) {
@@ -4971,7 +4883,7 @@ done:
free(mapv);
if (!err)
fprintf(stderr, "OK");
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
if (btf_fd != -1)
close(btf_fd);
@@ -4981,14 +4893,11 @@ done:
fclose(pin_file);
unlink(pin_path);
free(line);
-
- return err;
}
-static int test_pprint(void)
+static void test_pprint(void)
{
unsigned int i;
- int err = 0;
/* test various maps with the first test template */
for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
@@ -4999,7 +4908,7 @@ static int test_pprint(void)
pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map;
pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map;
- err |= count_result(do_test_pprint(0));
+ do_test_pprint(0);
}
/* test rest test templates with the first map */
@@ -5010,10 +4919,8 @@ static int test_pprint(void)
pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map;
pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map;
pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map;
- err |= count_result(do_test_pprint(i));
+ do_test_pprint(i);
}
-
- return err;
}
#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \
@@ -6178,7 +6085,7 @@ done:
return err;
}
-static int do_test_info_raw(unsigned int test_num)
+static void do_test_info_raw(unsigned int test_num)
{
const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1];
unsigned int raw_btf_size, linfo_str_off, linfo_size;
@@ -6187,18 +6094,19 @@ static int do_test_info_raw(unsigned int test_num)
const char *ret_next_str;
union bpf_attr attr = {};
- fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
test->str_sec, test->str_sec_size,
&raw_btf_size, &ret_next_str);
-
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
free(raw_btf);
if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
@@ -6206,7 +6114,7 @@ static int do_test_info_raw(unsigned int test_num)
goto done;
}
- if (*btf_log_buf && args.always_log)
+ if (*btf_log_buf && always_log)
fprintf(stderr, "\n%s", btf_log_buf);
*btf_log_buf = '\0';
@@ -6261,10 +6169,7 @@ static int do_test_info_raw(unsigned int test_num)
goto done;
done:
- if (!err)
- fprintf(stderr, "OK");
-
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
if (btf_fd != -1)
@@ -6274,22 +6179,6 @@ done:
if (!IS_ERR(patched_linfo))
free(patched_linfo);
-
- return err;
-}
-
-static int test_info_raw(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.info_raw_test_num)
- return count_result(do_test_info_raw(args.info_raw_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
- err |= count_result(do_test_info_raw(i));
-
- return err;
}
struct btf_raw_data {
@@ -6754,7 +6643,7 @@ static void dump_btf_strings(const char *strs, __u32 len)
}
}
-static int do_test_dedup(unsigned int test_num)
+static void do_test_dedup(unsigned int test_num)
{
const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
@@ -6769,13 +6658,15 @@ static int do_test_dedup(unsigned int test_num)
void *raw_btf;
int err = 0, i;
- fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types,
test->input.str_sec, test->input.str_sec_size,
&raw_btf_size, &ret_test_next_str);
if (!raw_btf)
- return -1;
+ return;
+
test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
free(raw_btf);
if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
@@ -6789,7 +6680,7 @@ static int do_test_dedup(unsigned int test_num)
test->expect.str_sec_size,
&raw_btf_size, &ret_expect_next_str);
if (!raw_btf)
- return -1;
+ return;
expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
free(raw_btf);
if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
@@ -6894,174 +6785,27 @@ static int do_test_dedup(unsigned int test_num)
}
done:
- if (!err)
- fprintf(stderr, "OK");
if (!IS_ERR(test_btf))
btf__free(test_btf);
if (!IS_ERR(expect_btf))
btf__free(expect_btf);
-
- return err;
}
-static int test_dedup(void)
+void test_btf(void)
{
- unsigned int i;
- int err = 0;
+ int i;
- if (args.dedup_test_num)
- return count_result(do_test_dedup(args.dedup_test_num));
+ always_log = env.verbosity > VERBOSE_NONE;
+ for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+ do_test_raw(i);
+ for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+ do_test_get_info(i);
+ for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+ do_test_file(i);
+ for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
+ do_test_info_raw(i);
for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++)
- err |= count_result(do_test_dedup(i));
-
- return err;
-}
-
-static void usage(const char *cmd)
-{
- fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n"
- "\t[-g btf_get_info_test_num (1 - %zu)] |\n"
- "\t[-f btf_file_test_num (1 - %zu)] |\n"
- "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n"
- "\t[-p (pretty print test)] |\n"
- "\t[-d btf_dedup_test_num (1 - %zu)]]\n",
- cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
- ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests),
- ARRAY_SIZE(dedup_tests));
-}
-
-static int parse_args(int argc, char **argv)
-{
- const char *optstr = "hlpk:f:r:g:d:";
- int opt;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
- switch (opt) {
- case 'l':
- args.always_log = true;
- break;
- case 'f':
- args.file_test_num = atoi(optarg);
- args.file_test = true;
- break;
- case 'r':
- args.raw_test_num = atoi(optarg);
- args.raw_test = true;
- break;
- case 'g':
- args.get_info_test_num = atoi(optarg);
- args.get_info_test = true;
- break;
- case 'p':
- args.pprint_test = true;
- break;
- case 'k':
- args.info_raw_test_num = atoi(optarg);
- args.info_raw_test = true;
- break;
- case 'd':
- args.dedup_test_num = atoi(optarg);
- args.dedup_test = true;
- break;
- case 'h':
- usage(argv[0]);
- exit(0);
- default:
- usage(argv[0]);
- return -1;
- }
- }
-
- if (args.raw_test_num &&
- (args.raw_test_num < 1 ||
- args.raw_test_num > ARRAY_SIZE(raw_tests))) {
- fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
- ARRAY_SIZE(raw_tests));
- return -1;
- }
-
- if (args.file_test_num &&
- (args.file_test_num < 1 ||
- args.file_test_num > ARRAY_SIZE(file_tests))) {
- fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
- ARRAY_SIZE(file_tests));
- return -1;
- }
-
- if (args.get_info_test_num &&
- (args.get_info_test_num < 1 ||
- args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
- fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
- ARRAY_SIZE(get_info_tests));
- return -1;
- }
-
- if (args.info_raw_test_num &&
- (args.info_raw_test_num < 1 ||
- args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) {
- fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n",
- ARRAY_SIZE(info_raw_tests));
- return -1;
- }
-
- if (args.dedup_test_num &&
- (args.dedup_test_num < 1 ||
- args.dedup_test_num > ARRAY_SIZE(dedup_tests))) {
- fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n",
- ARRAY_SIZE(dedup_tests));
- return -1;
- }
-
- return 0;
-}
-
-static void print_summary(void)
-{
- fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
- pass_cnt - skip_cnt, skip_cnt, error_cnt);
-}
-
-int main(int argc, char **argv)
-{
- int err = 0;
-
- err = parse_args(argc, argv);
- if (err)
- return err;
-
- if (args.always_log)
- libbpf_set_print(__base_pr);
-
- if (args.raw_test)
- err |= test_raw();
-
- if (args.get_info_test)
- err |= test_get_info();
-
- if (args.file_test)
- err |= test_file();
-
- if (args.pprint_test)
- err |= test_pprint();
-
- if (args.info_raw_test)
- err |= test_info_raw();
-
- if (args.dedup_test)
- err |= test_dedup();
-
- if (args.raw_test || args.get_info_test || args.file_test ||
- args.pprint_test || args.info_raw_test || args.dedup_test)
- goto done;
-
- err |= test_raw();
- err |= test_get_info();
- err |= test_file();
- err |= test_info_raw();
- err |= test_dedup();
-
-done:
- print_summary();
- return err;
+ do_test_dedup(i);
+ test_pprint();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index cb33a7ee4e04..c60091ee8a21 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -12,15 +12,16 @@ void btf_dump_printf(void *ctx, const char *fmt, va_list args)
static struct btf_dump_test_case {
const char *name;
const char *file;
+ bool known_ptr_sz;
struct btf_dump_opts opts;
} btf_dump_test_cases[] = {
- {"btf_dump: syntax", "btf_dump_test_case_syntax", {}},
- {"btf_dump: ordering", "btf_dump_test_case_ordering", {}},
- {"btf_dump: padding", "btf_dump_test_case_padding", {}},
- {"btf_dump: packing", "btf_dump_test_case_packing", {}},
- {"btf_dump: bitfields", "btf_dump_test_case_bitfields", {}},
- {"btf_dump: multidim", "btf_dump_test_case_multidim", {}},
- {"btf_dump: namespacing", "btf_dump_test_case_namespacing", {}},
+ {"btf_dump: syntax", "btf_dump_test_case_syntax", true, {}},
+ {"btf_dump: ordering", "btf_dump_test_case_ordering", false, {}},
+ {"btf_dump: padding", "btf_dump_test_case_padding", true, {}},
+ {"btf_dump: packing", "btf_dump_test_case_packing", true, {}},
+ {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true, {}},
+ {"btf_dump: multidim", "btf_dump_test_case_multidim", false, {}},
+ {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false, {}},
};
static int btf_dump_all_types(const struct btf *btf,
@@ -62,6 +63,18 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
goto done;
}
+ /* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
+ * so it's impossible to determine correct pointer size; but if they
+ * do, it should be 8 regardless of host architecture, becaues BPF
+ * target is always 64-bit
+ */
+ if (!t->known_ptr_sz) {
+ btf__set_pointer_size(btf, 8);
+ } else {
+ CHECK(btf__pointer_size(btf) != 8, "ptr_sz", "exp %d, got %zu\n",
+ 8, btf__pointer_size(btf));
+ }
+
snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file);
fd = mkstemp(out_file);
if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) {
@@ -116,6 +129,109 @@ done:
return err;
}
+static char *dump_buf;
+static size_t dump_buf_sz;
+static FILE *dump_buf_file;
+
+void test_btf_dump_incremental(void)
+{
+ struct btf *btf = NULL;
+ struct btf_dump *d = NULL;
+ struct btf_dump_opts opts;
+ int id, err, i;
+
+ dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
+ if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
+ return;
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "new_empty"))
+ goto err_out;
+ opts.ctx = dump_buf_file;
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+ if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+ goto err_out;
+
+ /* First, generate BTF corresponding to the following C code:
+ *
+ * enum { VAL = 1 };
+ *
+ * struct s { int x; };
+ *
+ */
+ id = btf__add_enum(btf, NULL, 4);
+ ASSERT_EQ(id, 1, "enum_id");
+ err = btf__add_enum_value(btf, "VAL", 1);
+ ASSERT_OK(err, "enum_val_ok");
+
+ id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(id, 2, "int_id");
+
+ id = btf__add_struct(btf, "s", 4);
+ ASSERT_EQ(id, 3, "struct_id");
+ err = btf__add_field(btf, "x", 2, 0, 0);
+ ASSERT_OK(err, "field_ok");
+
+ for (i = 1; i <= btf__get_nr_types(btf); i++) {
+ err = btf_dump__dump_type(d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(dump_buf_file);
+ dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+ ASSERT_STREQ(dump_buf,
+"enum {\n"
+" VAL = 1,\n"
+"};\n"
+"\n"
+"struct s {\n"
+" int x;\n"
+"};\n\n", "c_dump1");
+
+ /* Now, after dumping original BTF, append another struct that embeds
+ * anonymous enum. It also has a name conflict with the first struct:
+ *
+ * struct s___2 {
+ * enum { VAL___2 = 1 } x;
+ * struct s s;
+ * };
+ *
+ * This will test that btf_dump'er maintains internal state properly.
+ * Note that VAL___2 enum value. It's because we've already emitted
+ * that enum as a global anonymous enum, so btf_dump will ensure that
+ * enum values don't conflict;
+ *
+ */
+ fseek(dump_buf_file, 0, SEEK_SET);
+
+ id = btf__add_struct(btf, "s", 4);
+ ASSERT_EQ(id, 4, "struct_id");
+ err = btf__add_field(btf, "x", 1, 0, 0);
+ ASSERT_OK(err, "field_ok");
+ err = btf__add_field(btf, "s", 3, 32, 0);
+ ASSERT_OK(err, "field_ok");
+
+ for (i = 1; i <= btf__get_nr_types(btf); i++) {
+ err = btf_dump__dump_type(d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(dump_buf_file);
+ dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+ ASSERT_STREQ(dump_buf,
+"struct s___2 {\n"
+" enum {\n"
+" VAL___2 = 1,\n"
+" } x;\n"
+" struct s s;\n"
+"};\n\n" , "c_dump1");
+
+err_out:
+ fclose(dump_buf_file);
+ free(dump_buf);
+ btf_dump__free(d);
+ btf__free(btf);
+}
+
void test_btf_dump() {
int i;
@@ -127,4 +243,6 @@ void test_btf_dump() {
test_btf_dump_case(i, &btf_dump_test_cases[i]);
}
+ if (test__start_subtest("btf_dump: incremental"))
+ test_btf_dump_incremental();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
new file mode 100644
index 000000000000..8c52d72c876e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <string.h>
+#include <byteswap.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void test_btf_endian() {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ enum btf_endianness endian = BTF_LITTLE_ENDIAN;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ enum btf_endianness endian = BTF_BIG_ENDIAN;
+#else
+#error "Unrecognized __BYTE_ORDER"
+#endif
+ enum btf_endianness swap_endian = 1 - endian;
+ struct btf *btf = NULL, *swap_btf = NULL;
+ const void *raw_data, *swap_raw_data;
+ const struct btf_type *t;
+ const struct btf_header *hdr;
+ __u32 raw_sz, swap_raw_sz;
+ int var_id;
+
+ /* Load BTF in native endianness */
+ btf = btf__parse_elf("btf_dump_test_case_syntax.o", NULL);
+ if (!ASSERT_OK_PTR(btf, "parse_native_btf"))
+ goto err_out;
+
+ ASSERT_EQ(btf__endianness(btf), endian, "endian");
+ btf__set_endianness(btf, swap_endian);
+ ASSERT_EQ(btf__endianness(btf), swap_endian, "endian");
+
+ /* Get raw BTF data in non-native endianness... */
+ raw_data = btf__get_raw_data(btf, &raw_sz);
+ if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+ goto err_out;
+
+ /* ...and open it as a new BTF instance */
+ swap_btf = btf__new(raw_data, raw_sz);
+ if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+ goto err_out;
+
+ ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+ ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+
+ swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+ if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+ goto err_out;
+
+ /* both raw data should be identical (with non-native endianness) */
+ ASSERT_OK(memcmp(raw_data, swap_raw_data, raw_sz), "mem_identical");
+
+ /* make sure that at least BTF header data is really swapped */
+ hdr = swap_raw_data;
+ ASSERT_EQ(bswap_16(hdr->magic), BTF_MAGIC, "btf_magic_swapped");
+ ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+ /* swap it back to native endianness */
+ btf__set_endianness(swap_btf, endian);
+ swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+ if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+ goto err_out;
+
+ /* now header should have native BTF_MAGIC */
+ hdr = swap_raw_data;
+ ASSERT_EQ(hdr->magic, BTF_MAGIC, "btf_magic_native");
+ ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+ /* now modify original BTF */
+ var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1);
+ CHECK(var_id <= 0, "var_id", "failed %d\n", var_id);
+
+ btf__free(swap_btf);
+ swap_btf = NULL;
+
+ btf__set_endianness(btf, swap_endian);
+ raw_data = btf__get_raw_data(btf, &raw_sz);
+ if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+ goto err_out;
+
+ /* and re-open swapped raw data again */
+ swap_btf = btf__new(raw_data, raw_sz);
+ if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+ goto err_out;
+
+ ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+ ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+
+ /* the type should appear as if it was stored in native endianness */
+ t = btf__type_by_id(swap_btf, var_id);
+ ASSERT_STREQ(btf__str_by_offset(swap_btf, t->name_off), "some_var", "var_name");
+ ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_linkage");
+ ASSERT_EQ(t->type, 1, "var_type");
+
+err_out:
+ btf__free(btf);
+ btf__free(swap_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
new file mode 100644
index 000000000000..76ebe4c250f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+
+#include "test_btf_map_in_map.skel.h"
+
+static int duration;
+
+static __u32 bpf_map_id(struct bpf_map *map)
+{
+ struct bpf_map_info info;
+ __u32 info_len = sizeof(info);
+ int err;
+
+ memset(&info, 0, info_len);
+ err = bpf_obj_get_info_by_fd(bpf_map__fd(map), &info, &info_len);
+ if (err)
+ return 0;
+ return info.id;
+}
+
+/*
+ * Trigger synchronize_rcu() in kernel.
+ *
+ * ARRAY_OF_MAPS/HASH_OF_MAPS lookup/update operations trigger synchronize_rcu()
+ * if looking up an existing non-NULL element or updating the map with a valid
+ * inner map FD. Use this fact to trigger synchronize_rcu(): create map-in-map,
+ * create a trivial ARRAY map, update map-in-map with ARRAY inner map. Then
+ * cleanup. At the end, at least one synchronize_rcu() would be called.
+ */
+static int kern_sync_rcu(void)
+{
+ int inner_map_fd, outer_map_fd, err, zero = 0;
+
+ inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0);
+ if (CHECK(inner_map_fd < 0, "inner_map_create", "failed %d\n", -errno))
+ return -1;
+
+ outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
+ sizeof(int), inner_map_fd, 1, 0);
+ if (CHECK(outer_map_fd < 0, "outer_map_create", "failed %d\n", -errno)) {
+ close(inner_map_fd);
+ return -1;
+ }
+
+ err = bpf_map_update_elem(outer_map_fd, &zero, &inner_map_fd, 0);
+ if (err)
+ err = -errno;
+ CHECK(err, "outer_map_update", "failed %d\n", err);
+ close(inner_map_fd);
+ close(outer_map_fd);
+ return err;
+}
+
+static void test_lookup_update(void)
+{
+ int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id;
+ int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd;
+ struct test_btf_map_in_map *skel;
+ int err, key = 0, val, i, fd;
+
+ skel = test_btf_map_in_map__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+ return;
+
+ err = test_btf_map_in_map__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ map1_fd = bpf_map__fd(skel->maps.inner_map1);
+ map2_fd = bpf_map__fd(skel->maps.inner_map2);
+ map3_fd = bpf_map__fd(skel->maps.inner_map3);
+ map4_fd = bpf_map__fd(skel->maps.inner_map4);
+ map5_fd = bpf_map__fd(skel->maps.inner_map5);
+ outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn);
+ outer_arr_fd = bpf_map__fd(skel->maps.outer_arr);
+ outer_hash_fd = bpf_map__fd(skel->maps.outer_hash);
+
+ /* inner1 = input, inner2 = input + 1, inner3 = input + 2 */
+ bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0);
+ bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0);
+ bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0);
+ skel->bss->input = 1;
+ usleep(1);
+ bpf_map_lookup_elem(map1_fd, &key, &val);
+ CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
+ bpf_map_lookup_elem(map2_fd, &key, &val);
+ CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
+ bpf_map_lookup_elem(map3_fd, &key, &val);
+ CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3);
+
+ /* inner2 = input, inner1 = input + 1, inner4 = input + 2 */
+ bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0);
+ bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0);
+ bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0);
+ skel->bss->input = 3;
+ usleep(1);
+ bpf_map_lookup_elem(map1_fd, &key, &val);
+ CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
+ bpf_map_lookup_elem(map2_fd, &key, &val);
+ CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
+ bpf_map_lookup_elem(map4_fd, &key, &val);
+ CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5);
+
+ /* inner5 = input + 2 */
+ bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0);
+ skel->bss->input = 5;
+ usleep(1);
+ bpf_map_lookup_elem(map5_fd, &key, &val);
+ CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7);
+
+ for (i = 0; i < 5; i++) {
+ val = i % 2 ? map1_fd : map2_fd;
+ err = bpf_map_update_elem(outer_hash_fd, &key, &val, 0);
+ if (CHECK_FAIL(err)) {
+ printf("failed to update hash_of_maps on iter #%d\n", i);
+ goto cleanup;
+ }
+ err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0);
+ if (CHECK_FAIL(err)) {
+ printf("failed to update array_of_maps on iter #%d\n", i);
+ goto cleanup;
+ }
+ val = i % 2 ? map4_fd : map5_fd;
+ err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0);
+ if (CHECK_FAIL(err)) {
+ printf("failed to update array_of_maps (dyn) on iter #%d\n", i);
+ goto cleanup;
+ }
+ }
+
+ map1_id = bpf_map_id(skel->maps.inner_map1);
+ map2_id = bpf_map_id(skel->maps.inner_map2);
+ CHECK(map1_id == 0, "map1_id", "failed to get ID 1\n");
+ CHECK(map2_id == 0, "map2_id", "failed to get ID 2\n");
+
+ test_btf_map_in_map__destroy(skel);
+ skel = NULL;
+
+ /* we need to either wait for or force synchronize_rcu(), before
+ * checking for "still exists" condition, otherwise map could still be
+ * resolvable by ID, causing false positives.
+ *
+ * Older kernels (5.8 and earlier) freed map only after two
+ * synchronize_rcu()s, so trigger two, to be entirely sure.
+ */
+ CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
+ CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
+
+ fd = bpf_map_get_fd_by_id(map1_id);
+ if (CHECK(fd >= 0, "map1_leak", "inner_map1 leaked!\n")) {
+ close(fd);
+ goto cleanup;
+ }
+ fd = bpf_map_get_fd_by_id(map2_id);
+ if (CHECK(fd >= 0, "map2_leak", "inner_map2 leaked!\n")) {
+ close(fd);
+ goto cleanup;
+ }
+
+cleanup:
+ test_btf_map_in_map__destroy(skel);
+}
+
+static void test_diff_size(void)
+{
+ struct test_btf_map_in_map *skel;
+ int err, inner_map_fd, zero = 0;
+
+ skel = test_btf_map_in_map__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+ return;
+
+ inner_map_fd = bpf_map__fd(skel->maps.sockarr_sz2);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr), &zero,
+ &inner_map_fd, 0);
+ CHECK(err, "outer_sockarr inner map size check",
+ "cannot use a different size inner_map\n");
+
+ inner_map_fd = bpf_map__fd(skel->maps.inner_map_sz2);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &zero,
+ &inner_map_fd, 0);
+ CHECK(!err, "outer_arr inner map size check",
+ "incorrectly updated with a different size inner_map\n");
+
+ test_btf_map_in_map__destroy(skel);
+}
+
+void test_btf_map_in_map(void)
+{
+ if (test__start_subtest("lookup_update"))
+ test_lookup_update();
+
+ if (test__start_subtest("diff_size"))
+ test_diff_size();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
new file mode 100644
index 000000000000..86ccf37e26b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <linux/compiler.h>
+#include <bpf/libbpf.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_btf_skc_cls_ingress.skel.h"
+
+static struct test_btf_skc_cls_ingress *skel;
+struct sockaddr_in6 srv_sa6;
+static __u32 duration;
+
+#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
+
+static int write_sysctl(const char *sysctl, const char *value)
+{
+ int fd, err, len;
+
+ fd = open(sysctl, O_WRONLY);
+ if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
+ sysctl, strerror(errno), errno))
+ return -1;
+
+ len = strlen(value);
+ err = write(fd, value, len);
+ close(fd);
+ if (CHECK(err != len, "write sysctl",
+ "write(%s, %s, %d): err:%d %s (%d)\n",
+ sysctl, value, len, err, strerror(errno), errno))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_netns(void)
+{
+ if (CHECK(unshare(CLONE_NEWNET), "create netns",
+ "unshare(CLONE_NEWNET): %s (%d)",
+ strerror(errno), errno))
+ return -1;
+
+ if (CHECK(system("ip link set dev lo up"),
+ "ip link set dev lo up", "failed\n"))
+ return -1;
+
+ if (CHECK(system("tc qdisc add dev lo clsact"),
+ "tc qdisc add dev lo clsact", "failed\n"))
+ return -1;
+
+ if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE),
+ "install tc cls-prog at ingress", "failed\n"))
+ return -1;
+
+ /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
+ * bpf_tcp_gen_syncookie() helper.
+ */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") ||
+ write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") ||
+ write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1"))
+ return -1;
+
+ return 0;
+}
+
+static void reset_test(void)
+{
+ memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6));
+ skel->bss->listen_tp_sport = 0;
+ skel->bss->req_sk_sport = 0;
+ skel->bss->recv_cookie = 0;
+ skel->bss->gen_cookie = 0;
+ skel->bss->linum = 0;
+}
+
+static void print_err_line(void)
+{
+ if (skel->bss->linum)
+ printf("bpf prog error at line %u\n", skel->bss->linum);
+}
+
+static void test_conn(void)
+{
+ int listen_fd = -1, cli_fd = -1, err;
+ socklen_t addrlen = sizeof(srv_sa6);
+ int srv_port;
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK_FAIL(listen_fd == -1))
+ return;
+
+ err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+ if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+ errno))
+ goto done;
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+ srv_port = ntohs(srv_sa6.sin6_port);
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
+ if (CHECK(skel->bss->listen_tp_sport != srv_port ||
+ skel->bss->req_sk_sport != srv_port,
+ "Unexpected sk src port",
+ "listen_tp_sport:%u req_sk_sport:%u expected:%u\n",
+ skel->bss->listen_tp_sport, skel->bss->req_sk_sport,
+ srv_port))
+ goto done;
+
+ if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie,
+ "Unexpected syncookie states",
+ "gen_cookie:%u recv_cookie:%u\n",
+ skel->bss->gen_cookie, skel->bss->recv_cookie))
+ goto done;
+
+ CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+ skel->bss->linum);
+
+done:
+ if (listen_fd != -1)
+ close(listen_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
+
+static void test_syncookie(void)
+{
+ int listen_fd = -1, cli_fd = -1, err;
+ socklen_t addrlen = sizeof(srv_sa6);
+ int srv_port;
+
+ /* Enforce syncookie mode */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+ return;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK_FAIL(listen_fd == -1))
+ return;
+
+ err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+ if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+ errno))
+ goto done;
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+ srv_port = ntohs(srv_sa6.sin6_port);
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
+ if (CHECK(skel->bss->listen_tp_sport != srv_port,
+ "Unexpected tp src port",
+ "listen_tp_sport:%u expected:%u\n",
+ skel->bss->listen_tp_sport, srv_port))
+ goto done;
+
+ if (CHECK(skel->bss->req_sk_sport,
+ "Unexpected req_sk src port",
+ "req_sk_sport:%u expected:0\n",
+ skel->bss->req_sk_sport))
+ goto done;
+
+ if (CHECK(!skel->bss->gen_cookie ||
+ skel->bss->gen_cookie != skel->bss->recv_cookie,
+ "Unexpected syncookie states",
+ "gen_cookie:%u recv_cookie:%u\n",
+ skel->bss->gen_cookie, skel->bss->recv_cookie))
+ goto done;
+
+ CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+ skel->bss->linum);
+
+done:
+ if (listen_fd != -1)
+ close(listen_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
+
+struct test {
+ const char *desc;
+ void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, test_##name }
+static struct test tests[] = {
+ DEF_TEST(conn),
+ DEF_TEST(syncookie),
+};
+
+void test_btf_skc_cls_ingress(void)
+{
+ int i, err;
+
+ skel = test_btf_skc_cls_ingress__open_and_load();
+ if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
+ return;
+
+ err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE);
+ if (CHECK(err, "bpf_program__pin",
+ "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) {
+ test_btf_skc_cls_ingress__destroy(skel);
+ return;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (!test__start_subtest(tests[i].desc))
+ continue;
+
+ if (prepare_netns())
+ break;
+
+ tests[i].run();
+
+ print_err_line();
+ reset_test();
+ }
+
+ bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE);
+ test_btf_skc_cls_ingress__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
new file mode 100644
index 000000000000..314e1e7c36df
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void test_btf_write() {
+ const struct btf_var_secinfo *vi;
+ const struct btf_type *t;
+ const struct btf_member *m;
+ const struct btf_enum *v;
+ const struct btf_param *p;
+ struct btf *btf;
+ int id, err, str_off;
+
+ btf = btf__new_empty();
+ if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+ return;
+
+ str_off = btf__find_str(btf, "int");
+ ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off");
+
+ str_off = btf__add_str(btf, "int");
+ ASSERT_EQ(str_off, 1, "int_str_off");
+
+ str_off = btf__find_str(btf, "int");
+ ASSERT_EQ(str_off, 1, "int_str_found_off");
+
+ /* BTF_KIND_INT */
+ id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(id, 1, "int_id");
+
+ t = btf__type_by_id(btf, 1);
+ /* should re-use previously added "int" string */
+ ASSERT_EQ(t->name_off, str_off, "int_name_off");
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "int", "int_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_INT, "int_kind");
+ ASSERT_EQ(t->size, 4, "int_sz");
+ ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc");
+ ASSERT_EQ(btf_int_bits(t), 32, "int_bits");
+
+ /* invalid int size */
+ id = btf__add_int(btf, "bad sz int", 7, 0);
+ ASSERT_ERR(id, "int_bad_sz");
+ /* invalid encoding */
+ id = btf__add_int(btf, "bad enc int", 4, 123);
+ ASSERT_ERR(id, "int_bad_enc");
+ /* NULL name */
+ id = btf__add_int(btf, NULL, 4, 0);
+ ASSERT_ERR(id, "int_bad_null_name");
+ /* empty name */
+ id = btf__add_int(btf, "", 4, 0);
+ ASSERT_ERR(id, "int_bad_empty_name");
+
+ /* PTR/CONST/VOLATILE/RESTRICT */
+ id = btf__add_ptr(btf, 1);
+ ASSERT_EQ(id, 2, "ptr_id");
+ t = btf__type_by_id(btf, 2);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind");
+ ASSERT_EQ(t->type, 1, "ptr_type");
+
+ id = btf__add_const(btf, 5); /* points forward to restrict */
+ ASSERT_EQ(id, 3, "const_id");
+ t = btf__type_by_id(btf, 3);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind");
+ ASSERT_EQ(t->type, 5, "const_type");
+
+ id = btf__add_volatile(btf, 3);
+ ASSERT_EQ(id, 4, "volatile_id");
+ t = btf__type_by_id(btf, 4);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind");
+ ASSERT_EQ(t->type, 3, "volatile_type");
+
+ id = btf__add_restrict(btf, 4);
+ ASSERT_EQ(id, 5, "restrict_id");
+ t = btf__type_by_id(btf, 5);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind");
+ ASSERT_EQ(t->type, 4, "restrict_type");
+
+ /* ARRAY */
+ id = btf__add_array(btf, 1, 2, 10); /* int *[10] */
+ ASSERT_EQ(id, 6, "array_id");
+ t = btf__type_by_id(btf, 6);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ARRAY, "array_kind");
+ ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type");
+ ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type");
+ ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems");
+
+ /* STRUCT */
+ err = btf__add_field(btf, "field", 1, 0, 0);
+ ASSERT_ERR(err, "no_struct_field");
+ id = btf__add_struct(btf, "s1", 8);
+ ASSERT_EQ(id, 7, "struct_id");
+ err = btf__add_field(btf, "f1", 1, 0, 0);
+ ASSERT_OK(err, "f1_res");
+ err = btf__add_field(btf, "f2", 1, 32, 16);
+ ASSERT_OK(err, "f2_res");
+
+ t = btf__type_by_id(btf, 7);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "s1", "struct_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_STRUCT, "struct_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "struct_vlen");
+ ASSERT_EQ(btf_kflag(t), true, "struct_kflag");
+ ASSERT_EQ(t->size, 8, "struct_sz");
+ m = btf_members(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+ ASSERT_EQ(m->type, 1, "f1_type");
+ ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+ ASSERT_EQ(btf_member_bitfield_size(t, 0), 0, "f1_bit_sz");
+ m = btf_members(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f2", "f2_name");
+ ASSERT_EQ(m->type, 1, "f2_type");
+ ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off");
+ ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz");
+
+ /* UNION */
+ id = btf__add_union(btf, "u1", 8);
+ ASSERT_EQ(id, 8, "union_id");
+
+ /* invalid, non-zero offset */
+ err = btf__add_field(btf, "field", 1, 1, 0);
+ ASSERT_ERR(err, "no_struct_field");
+
+ err = btf__add_field(btf, "f1", 1, 0, 16);
+ ASSERT_OK(err, "f1_res");
+
+ t = btf__type_by_id(btf, 8);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "u1", "union_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_UNION, "union_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "union_vlen");
+ ASSERT_EQ(btf_kflag(t), true, "union_kflag");
+ ASSERT_EQ(t->size, 8, "union_sz");
+ m = btf_members(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+ ASSERT_EQ(m->type, 1, "f1_type");
+ ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+ ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz");
+
+ /* ENUM */
+ id = btf__add_enum(btf, "e1", 4);
+ ASSERT_EQ(id, 9, "enum_id");
+ err = btf__add_enum_value(btf, "v1", 1);
+ ASSERT_OK(err, "v1_res");
+ err = btf__add_enum_value(btf, "v2", 2);
+ ASSERT_OK(err, "v2_res");
+
+ t = btf__type_by_id(btf, 9);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "enum_vlen");
+ ASSERT_EQ(t->size, 4, "enum_sz");
+ v = btf_enum(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v1", "v1_name");
+ ASSERT_EQ(v->val, 1, "v1_val");
+ v = btf_enum(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
+ ASSERT_EQ(v->val, 2, "v2_val");
+
+ /* FWDs */
+ id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT);
+ ASSERT_EQ(id, 10, "struct_fwd_id");
+ t = btf__type_by_id(btf, 10);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+ ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag");
+
+ id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION);
+ ASSERT_EQ(id, 11, "union_fwd_id");
+ t = btf__type_by_id(btf, 11);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+ ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag");
+
+ id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM);
+ ASSERT_EQ(id, 12, "enum_fwd_id");
+ t = btf__type_by_id(btf, 12);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "enum_fwd", "fwd_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind");
+ ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
+ ASSERT_EQ(t->size, 4, "enum_fwd_sz");
+
+ /* TYPEDEF */
+ id = btf__add_typedef(btf, "typedef1", 1);
+ ASSERT_EQ(id, 13, "typedef_fwd_id");
+ t = btf__type_by_id(btf, 13);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind");
+ ASSERT_EQ(t->type, 1, "typedef_type");
+
+ /* FUNC & FUNC_PROTO */
+ id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15);
+ ASSERT_EQ(id, 14, "func_id");
+ t = btf__type_by_id(btf, 14);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "func1", "func_name");
+ ASSERT_EQ(t->type, 15, "func_type");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind");
+ ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen");
+
+ id = btf__add_func_proto(btf, 1);
+ ASSERT_EQ(id, 15, "func_proto_id");
+ err = btf__add_func_param(btf, "p1", 1);
+ ASSERT_OK(err, "p1_res");
+ err = btf__add_func_param(btf, "p2", 2);
+ ASSERT_OK(err, "p2_res");
+
+ t = btf__type_by_id(btf, 15);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC_PROTO, "func_proto_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "func_proto_vlen");
+ ASSERT_EQ(t->type, 1, "func_proto_ret_type");
+ p = btf_params(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p1", "p1_name");
+ ASSERT_EQ(p->type, 1, "p1_type");
+ p = btf_params(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name");
+ ASSERT_EQ(p->type, 2, "p2_type");
+
+ /* VAR */
+ id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1);
+ ASSERT_EQ(id, 16, "var_id");
+ t = btf__type_by_id(btf, 16);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "var1", "var_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind");
+ ASSERT_EQ(t->type, 1, "var_type");
+ ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type");
+
+ /* DATASECT */
+ id = btf__add_datasec(btf, "datasec1", 12);
+ ASSERT_EQ(id, 17, "datasec_id");
+ err = btf__add_datasec_var_info(btf, 1, 4, 8);
+ ASSERT_OK(err, "v1_res");
+
+ t = btf__type_by_id(btf, 17);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "datasec1", "datasec_name");
+ ASSERT_EQ(t->size, 12, "datasec_sz");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_DATASEC, "datasec_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "datasec_vlen");
+ vi = btf_var_secinfos(t) + 0;
+ ASSERT_EQ(vi->type, 1, "v1_type");
+ ASSERT_EQ(vi->offset, 4, "v1_off");
+ ASSERT_EQ(vi->size, 8, "v1_sz");
+
+ btf__free(btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
new file mode 100644
index 000000000000..643dfa35419c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+#include "cg_storage_multi_egress_only.skel.h"
+#include "cg_storage_multi_isolated.skel.h"
+#include "cg_storage_multi_shared.skel.h"
+
+#define PARENT_CGROUP "/cgroup_storage"
+#define CHILD_CGROUP "/cgroup_storage/child"
+
+static int duration;
+
+static bool assert_storage(struct bpf_map *map, const void *key,
+ struct cgroup_value *expected)
+{
+ struct cgroup_value value;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+
+ if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) < 0,
+ "map-lookup", "errno %d", errno))
+ return true;
+ if (CHECK(memcmp(&value, expected, sizeof(struct cgroup_value)),
+ "assert-storage", "storages differ"))
+ return true;
+
+ return false;
+}
+
+static bool assert_storage_noexist(struct bpf_map *map, const void *key)
+{
+ struct cgroup_value value;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+
+ if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) == 0,
+ "map-lookup", "succeeded, expected ENOENT"))
+ return true;
+ if (CHECK(errno != ENOENT,
+ "map-lookup", "errno %d, expected ENOENT", errno))
+ return true;
+
+ return false;
+}
+
+static bool connect_send(const char *cgroup_path)
+{
+ bool res = true;
+ int server_fd = -1, client_fd = -1;
+
+ if (join_cgroup(cgroup_path))
+ goto out_clean;
+
+ server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+ if (server_fd < 0)
+ goto out_clean;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (client_fd < 0)
+ goto out_clean;
+
+ if (send(client_fd, "message", strlen("message"), 0) < 0)
+ goto out_clean;
+
+ res = false;
+
+out_clean:
+ close(client_fd);
+ close(server_fd);
+ return res;
+}
+
+static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_egress_only *obj;
+ struct cgroup_value expected_cgroup_value;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_link *parent_link = NULL, *child_link = NULL;
+ bool err;
+
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+
+ obj = cg_storage_multi_egress_only__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is only one run and in that run the storage is
+ * parent cgroup's storage.
+ * Also assert that child cgroup's storage does not exist
+ */
+ parent_link = bpf_program__attach_cgroup(obj->progs.egress,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
+ "err %ld", PTR_ERR(parent_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 1,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there are two additional runs, one that run with parent
+ * cgroup's storage and one with child cgroup's storage.
+ */
+ child_link = bpf_program__attach_cgroup(obj->progs.egress,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_link), "child-cg-attach",
+ "err %ld", PTR_ERR(child_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ if (!IS_ERR(parent_link))
+ bpf_link__destroy(parent_link);
+ if (!IS_ERR(child_link))
+ bpf_link__destroy(child_link);
+
+ cg_storage_multi_egress_only__destroy(obj);
+}
+
+static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_isolated *obj;
+ struct cgroup_value expected_cgroup_value;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+ struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+ struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+ bool err;
+
+ obj = cg_storage_multi_isolated__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is three runs, two with parent cgroup egress and
+ * one with parent cgroup ingress, stored in separate parent storages.
+ * Also assert that child cgroup's storages does not exist
+ */
+ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+ "err %ld", PTR_ERR(parent_egress1_link)))
+ goto close_bpf_object;
+ parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+ "err %ld", PTR_ERR(parent_egress2_link)))
+ goto close_bpf_object;
+ parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+ "err %ld", PTR_ERR(parent_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there is six additional runs, parent cgroup egresses and
+ * ingress, child cgroup egresses and ingress.
+ * Assert that egree and ingress storages are separate.
+ */
+ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+ "err %ld", PTR_ERR(child_egress1_link)))
+ goto close_bpf_object;
+ child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+ "err %ld", PTR_ERR(child_egress2_link)))
+ goto close_bpf_object;
+ child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+ "err %ld", PTR_ERR(child_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 9,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 4 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ if (!IS_ERR(parent_egress1_link))
+ bpf_link__destroy(parent_egress1_link);
+ if (!IS_ERR(parent_egress2_link))
+ bpf_link__destroy(parent_egress2_link);
+ if (!IS_ERR(parent_ingress_link))
+ bpf_link__destroy(parent_ingress_link);
+ if (!IS_ERR(child_egress1_link))
+ bpf_link__destroy(child_egress1_link);
+ if (!IS_ERR(child_egress2_link))
+ bpf_link__destroy(child_egress2_link);
+ if (!IS_ERR(child_ingress_link))
+ bpf_link__destroy(child_ingress_link);
+
+ cg_storage_multi_isolated__destroy(obj);
+}
+
+static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_shared *obj;
+ struct cgroup_value expected_cgroup_value;
+ __u64 key;
+ struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+ struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+ struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+ bool err;
+
+ obj = cg_storage_multi_shared__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is three runs, two with parent cgroup egress and
+ * one with parent cgroup ingress.
+ * Also assert that child cgroup's storage does not exist
+ */
+ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+ "err %ld", PTR_ERR(parent_egress1_link)))
+ goto close_bpf_object;
+ parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+ "err %ld", PTR_ERR(parent_egress2_link)))
+ goto close_bpf_object;
+ parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+ "err %ld", PTR_ERR(parent_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 2,
+ .ingress_pkts = 1,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key = get_cgroup_id(CHILD_CGROUP);
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there is six additional runs, parent cgroup egresses and
+ * ingress, child cgroup egresses and ingress.
+ */
+ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+ "err %ld", PTR_ERR(child_egress1_link)))
+ goto close_bpf_object;
+ child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+ "err %ld", PTR_ERR(child_egress2_link)))
+ goto close_bpf_object;
+ child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+ "err %ld", PTR_ERR(child_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 9,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 4,
+ .ingress_pkts = 2,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key = get_cgroup_id(CHILD_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 2,
+ .ingress_pkts = 1,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ if (!IS_ERR(parent_egress1_link))
+ bpf_link__destroy(parent_egress1_link);
+ if (!IS_ERR(parent_egress2_link))
+ bpf_link__destroy(parent_egress2_link);
+ if (!IS_ERR(parent_ingress_link))
+ bpf_link__destroy(parent_ingress_link);
+ if (!IS_ERR(child_egress1_link))
+ bpf_link__destroy(child_egress1_link);
+ if (!IS_ERR(child_egress2_link))
+ bpf_link__destroy(child_egress2_link);
+ if (!IS_ERR(child_ingress_link))
+ bpf_link__destroy(child_ingress_link);
+
+ cg_storage_multi_shared__destroy(obj);
+}
+
+void test_cg_storage_multi(void)
+{
+ int parent_cgroup_fd = -1, child_cgroup_fd = -1;
+
+ parent_cgroup_fd = test__join_cgroup(PARENT_CGROUP);
+ if (CHECK(parent_cgroup_fd < 0, "cg-create-parent", "errno %d", errno))
+ goto close_cgroup_fd;
+ child_cgroup_fd = create_and_get_cgroup(CHILD_CGROUP);
+ if (CHECK(child_cgroup_fd < 0, "cg-create-child", "errno %d", errno))
+ goto close_cgroup_fd;
+
+ if (test__start_subtest("egress_only"))
+ test_egress_only(parent_cgroup_fd, child_cgroup_fd);
+
+ if (test__start_subtest("isolated"))
+ test_isolated(parent_cgroup_fd, child_cgroup_fd);
+
+ if (test__start_subtest("shared"))
+ test_shared(parent_cgroup_fd, child_cgroup_fd);
+
+close_cgroup_fd:
+ close(child_cgroup_fd);
+ close(parent_cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 139f8e82c7c6..b549fcfacc0b 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -230,6 +230,13 @@ void test_cgroup_attach_multi(void)
"prog_replace", "errno=%d\n", errno))
goto err;
+ /* replace program with itself */
+ attach_opts.replace_prog_fd = allow_prog[6];
+ if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "prog_replace", "errno=%d\n", errno))
+ goto err;
+
value = 0;
CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
CHECK_FAIL(system(PING_CMD));
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 6e04f8d1d15b..4d9b514b3fd9 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -2,6 +2,7 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#include "test_cgroup_link.skel.h"
static __u32 duration = 0;
@@ -37,7 +38,8 @@ void test_cgroup_link(void)
int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
- __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+ __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags, prog_id;
+ struct bpf_link_info info;
int i = 0, err, prog_fd;
bool detach_legacy = false;
@@ -219,6 +221,22 @@ void test_cgroup_link(void)
/* BPF programs should still get called */
ping_and_check(0, cg_nr);
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id == 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ err = bpf_link__detach(links[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto cleanup;
+
+ /* cgroup_id should be zero in link_info */
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id != 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ /* First BPF program shouldn't be called anymore */
+ ping_and_check(0, cg_nr - 1);
+
/* leave cgroup and remove them, don't detach programs */
cleanup_cgroup_environment();
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
new file mode 100644
index 000000000000..464edc1c1708
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "cgroup_skb_sk_lookup_kern.skel.h"
+
+static void run_lookup_test(__u16 *g_serv_port, int out_sk)
+{
+ int serv_sk = -1, in_sk = -1, serv_in_sk = -1, err;
+ struct sockaddr_in6 addr = {};
+ socklen_t addr_len = sizeof(addr);
+ __u32 duration = 0;
+
+ serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+ return;
+
+ err = getsockname(serv_sk, (struct sockaddr *)&addr, &addr_len);
+ if (CHECK(err, "getsockname", "errno %d\n", errno))
+ goto cleanup;
+
+ *g_serv_port = addr.sin6_port;
+
+ /* Client outside of test cgroup should fail to connect by timeout. */
+ err = connect_fd_to_fd(out_sk, serv_sk, 1000);
+ if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
+ "unexpected result err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ /* Client inside test cgroup should connect just fine. */
+ in_sk = connect_to_fd(serv_sk, 0);
+ if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
+ goto cleanup;
+
+ serv_in_sk = accept(serv_sk, NULL, NULL);
+ if (CHECK(serv_in_sk < 0, "accept", "errno %d\n", errno))
+ goto cleanup;
+
+cleanup:
+ close(serv_in_sk);
+ close(in_sk);
+ close(serv_sk);
+}
+
+static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
+{
+ struct cgroup_skb_sk_lookup_kern *skel;
+ struct bpf_link *link;
+ __u32 duration = 0;
+ int cgfd = -1;
+
+ skel = cgroup_skb_sk_lookup_kern__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ return;
+
+ cgfd = test__join_cgroup(cg_path);
+ if (CHECK(cgfd < 0, "cgroup_join", "cgroup setup failed\n"))
+ goto cleanup;
+
+ link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
+ if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+
+ run_lookup_test(&skel->bss->g_serv_port, out_sk);
+
+ bpf_link__destroy(link);
+
+cleanup:
+ close(cgfd);
+ cgroup_skb_sk_lookup_kern__destroy(skel);
+}
+
+void test_cgroup_skb_sk_lookup(void)
+{
+ const char *cg_path = "/foo";
+ int out_sk;
+
+ /* Create a socket before joining testing cgroup so that its cgroup id
+ * differs from that of testing cgroup. Moving selftests process to
+ * testing cgroup won't change cgroup id of an already created socket.
+ */
+ out_sk = socket(AF_INET6, SOCK_STREAM, 0);
+ if (CHECK_FAIL(out_sk < 0))
+ return;
+
+ run_cgroup_bpf_test(cg_path, out_sk);
+
+ close(out_sk);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
new file mode 100644
index 000000000000..9781d85cb223
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <string.h>
+
+#include <linux/pkt_cls.h>
+
+#include <test_progs.h>
+
+#include "progs/test_cls_redirect.h"
+#include "test_cls_redirect.skel.h"
+#include "test_cls_redirect_subprogs.skel.h"
+
+#define ENCAP_IP INADDR_LOOPBACK
+#define ENCAP_PORT (1234)
+
+static int duration = 0;
+
+struct addr_port {
+ in_port_t port;
+ union {
+ struct in_addr in_addr;
+ struct in6_addr in6_addr;
+ };
+};
+
+struct tuple {
+ int family;
+ struct addr_port src;
+ struct addr_port dst;
+};
+
+static int start_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+ int fd = socket(addr->sa_family, type, 0);
+ if (CHECK_FAIL(fd == -1))
+ return -1;
+ if (CHECK_FAIL(bind(fd, addr, len) == -1))
+ goto err;
+ if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
+ goto err;
+
+ return fd;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static int connect_to_server(const struct sockaddr *addr, socklen_t len,
+ int type)
+{
+ int fd = socket(addr->sa_family, type, 0);
+ if (CHECK_FAIL(fd == -1))
+ return -1;
+ if (CHECK_FAIL(connect(fd, addr, len)))
+ goto err;
+
+ return fd;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
+{
+ const struct sockaddr_in6 *in6;
+ const struct sockaddr_in *in;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ in = (const struct sockaddr_in *)sa;
+ ap->in_addr = in->sin_addr;
+ ap->port = in->sin_port;
+ return true;
+
+ case AF_INET6:
+ in6 = (const struct sockaddr_in6 *)sa;
+ ap->in6_addr = in6->sin6_addr;
+ ap->port = in6->sin6_port;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
+ int *server, int *conn, struct tuple *tuple)
+{
+ struct sockaddr_storage ss;
+ socklen_t slen = sizeof(ss);
+ struct sockaddr *sa = (struct sockaddr *)&ss;
+
+ *server = start_server(addr, len, type);
+ if (*server < 0)
+ return false;
+
+ if (CHECK_FAIL(getsockname(*server, sa, &slen)))
+ goto close_server;
+
+ *conn = connect_to_server(sa, slen, type);
+ if (*conn < 0)
+ goto close_server;
+
+ /* We want to simulate packets arriving at conn, so we have to
+ * swap src and dst.
+ */
+ slen = sizeof(ss);
+ if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
+ goto close_conn;
+
+ if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
+ goto close_conn;
+
+ slen = sizeof(ss);
+ if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
+ goto close_conn;
+
+ if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
+ goto close_conn;
+
+ tuple->family = ss.ss_family;
+ return true;
+
+close_conn:
+ close(*conn);
+ *conn = -1;
+close_server:
+ close(*server);
+ *server = -1;
+ return false;
+}
+
+static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
+{
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = family;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ return sizeof(*addr4);
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = family;
+ addr6->sin6_addr = in6addr_loopback;
+ return sizeof(*addr6);
+ default:
+ fprintf(stderr, "Invalid family %d", family);
+ return 0;
+ }
+}
+
+static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr)
+{
+ return tattr->data_size_out < tattr->data_size_in;
+}
+
+enum type {
+ UDP,
+ TCP,
+ __NR_KIND,
+};
+
+enum hops {
+ NO_HOPS,
+ ONE_HOP,
+};
+
+enum flags {
+ NONE,
+ SYN,
+ ACK,
+};
+
+enum conn {
+ KNOWN_CONN,
+ UNKNOWN_CONN,
+};
+
+enum result {
+ ACCEPT,
+ FORWARD,
+};
+
+struct test_cfg {
+ enum type type;
+ enum result result;
+ enum conn conn;
+ enum hops hops;
+ enum flags flags;
+};
+
+static int test_str(void *buf, size_t len, const struct test_cfg *test,
+ int family)
+{
+ const char *family_str, *type, *conn, *hops, *result, *flags;
+
+ family_str = "IPv4";
+ if (family == AF_INET6)
+ family_str = "IPv6";
+
+ type = "TCP";
+ if (test->type == UDP)
+ type = "UDP";
+
+ conn = "known";
+ if (test->conn == UNKNOWN_CONN)
+ conn = "unknown";
+
+ hops = "no hops";
+ if (test->hops == ONE_HOP)
+ hops = "one hop";
+
+ result = "accept";
+ if (test->result == FORWARD)
+ result = "forward";
+
+ flags = "none";
+ if (test->flags == SYN)
+ flags = "SYN";
+ else if (test->flags == ACK)
+ flags = "ACK";
+
+ return snprintf(buf, len, "%s %s %s %s (%s, flags: %s)", family_str,
+ type, result, conn, hops, flags);
+}
+
+static struct test_cfg tests[] = {
+ { TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, SYN },
+ { TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, ACK },
+ { TCP, FORWARD, UNKNOWN_CONN, ONE_HOP, ACK },
+ { TCP, ACCEPT, KNOWN_CONN, ONE_HOP, ACK },
+ { UDP, ACCEPT, UNKNOWN_CONN, NO_HOPS, NONE },
+ { UDP, FORWARD, UNKNOWN_CONN, ONE_HOP, NONE },
+ { UDP, ACCEPT, KNOWN_CONN, ONE_HOP, NONE },
+};
+
+static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
+{
+ const uint8_t hlen =
+ (sizeof(struct guehdr) / sizeof(uint32_t)) + hop_count;
+ *encap = (encap_headers_t){
+ .eth = { .h_proto = htons(ETH_P_IP) },
+ .ip = {
+ .ihl = 5,
+ .version = 4,
+ .ttl = IPDEFTTL,
+ .protocol = IPPROTO_UDP,
+ .daddr = htonl(ENCAP_IP)
+ },
+ .udp = {
+ .dest = htons(ENCAP_PORT),
+ },
+ .gue = {
+ .hlen = hlen,
+ .proto_ctype = proto
+ },
+ .unigue = {
+ .hop_count = hop_count
+ },
+ };
+}
+
+static size_t build_input(const struct test_cfg *test, void *const buf,
+ const struct tuple *tuple)
+{
+ in_port_t sport = tuple->src.port;
+ encap_headers_t encap;
+ struct iphdr ip;
+ struct ipv6hdr ipv6;
+ struct tcphdr tcp;
+ struct udphdr udp;
+ struct in_addr next_hop;
+ uint8_t *p = buf;
+ int proto;
+
+ proto = IPPROTO_IPIP;
+ if (tuple->family == AF_INET6)
+ proto = IPPROTO_IPV6;
+
+ encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
+ p = mempcpy(p, &encap, sizeof(encap));
+
+ if (test->hops == ONE_HOP) {
+ next_hop = (struct in_addr){ .s_addr = htonl(0x7f000002) };
+ p = mempcpy(p, &next_hop, sizeof(next_hop));
+ }
+
+ proto = IPPROTO_TCP;
+ if (test->type == UDP)
+ proto = IPPROTO_UDP;
+
+ switch (tuple->family) {
+ case AF_INET:
+ ip = (struct iphdr){
+ .ihl = 5,
+ .version = 4,
+ .ttl = IPDEFTTL,
+ .protocol = proto,
+ .saddr = tuple->src.in_addr.s_addr,
+ .daddr = tuple->dst.in_addr.s_addr,
+ };
+ p = mempcpy(p, &ip, sizeof(ip));
+ break;
+ case AF_INET6:
+ ipv6 = (struct ipv6hdr){
+ .version = 6,
+ .hop_limit = IPDEFTTL,
+ .nexthdr = proto,
+ .saddr = tuple->src.in6_addr,
+ .daddr = tuple->dst.in6_addr,
+ };
+ p = mempcpy(p, &ipv6, sizeof(ipv6));
+ break;
+ default:
+ return 0;
+ }
+
+ if (test->conn == UNKNOWN_CONN)
+ sport--;
+
+ switch (test->type) {
+ case TCP:
+ tcp = (struct tcphdr){
+ .source = sport,
+ .dest = tuple->dst.port,
+ };
+ if (test->flags == SYN)
+ tcp.syn = true;
+ if (test->flags == ACK)
+ tcp.ack = true;
+ p = mempcpy(p, &tcp, sizeof(tcp));
+ break;
+ case UDP:
+ udp = (struct udphdr){
+ .source = sport,
+ .dest = tuple->dst.port,
+ };
+ p = mempcpy(p, &udp, sizeof(udp));
+ break;
+ default:
+ return 0;
+ }
+
+ return (void *)p - buf;
+}
+
+static void close_fds(int *fds, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ if (fds[i] > 0)
+ close(fds[i]);
+}
+
+static void test_cls_redirect_common(struct bpf_program *prog)
+{
+ struct bpf_prog_test_run_attr tattr = {};
+ int families[] = { AF_INET, AF_INET6 };
+ struct sockaddr_storage ss;
+ struct sockaddr *addr;
+ socklen_t slen;
+ int i, j, err;
+ int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
+ int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
+ struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
+
+ addr = (struct sockaddr *)&ss;
+ for (i = 0; i < ARRAY_SIZE(families); i++) {
+ slen = prepare_addr(&ss, families[i]);
+ if (CHECK_FAIL(!slen))
+ goto cleanup;
+
+ if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
+ &servers[UDP][i], &conns[UDP][i],
+ &tuples[UDP][i])))
+ goto cleanup;
+
+ if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
+ &servers[TCP][i], &conns[TCP][i],
+ &tuples[TCP][i])))
+ goto cleanup;
+ }
+
+ tattr.prog_fd = bpf_program__fd(prog);
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ struct test_cfg *test = &tests[i];
+
+ for (j = 0; j < ARRAY_SIZE(families); j++) {
+ struct tuple *tuple = &tuples[test->type][j];
+ char input[256];
+ char tmp[256];
+
+ test_str(tmp, sizeof(tmp), test, tuple->family);
+ if (!test__start_subtest(tmp))
+ continue;
+
+ tattr.data_out = tmp;
+ tattr.data_size_out = sizeof(tmp);
+
+ tattr.data_in = input;
+ tattr.data_size_in = build_input(test, input, tuple);
+ if (CHECK_FAIL(!tattr.data_size_in))
+ continue;
+
+ err = bpf_prog_test_run_xattr(&tattr);
+ if (CHECK_FAIL(err))
+ continue;
+
+ if (tattr.retval != TC_ACT_REDIRECT) {
+ PRINT_FAIL("expected TC_ACT_REDIRECT, got %d\n",
+ tattr.retval);
+ continue;
+ }
+
+ switch (test->result) {
+ case ACCEPT:
+ if (CHECK_FAIL(!was_decapsulated(&tattr)))
+ continue;
+ break;
+ case FORWARD:
+ if (CHECK_FAIL(was_decapsulated(&tattr)))
+ continue;
+ break;
+ default:
+ PRINT_FAIL("unknown result %d\n", test->result);
+ continue;
+ }
+ }
+ }
+
+cleanup:
+ close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
+ close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
+}
+
+static void test_cls_redirect_inlined(void)
+{
+ struct test_cls_redirect *skel;
+ int err;
+
+ skel = test_cls_redirect__open();
+ if (CHECK(!skel, "skel_open", "failed\n"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect__destroy(skel);
+}
+
+static void test_cls_redirect_subprogs(void)
+{
+ struct test_cls_redirect_subprogs *skel;
+ int err;
+
+ skel = test_cls_redirect_subprogs__open();
+ if (CHECK(!skel, "skel_open", "failed\n"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect_subprogs__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect_subprogs__destroy(skel);
+}
+
+void test_cls_redirect(void)
+{
+ if (test__start_subtest("cls_redirect_inlined"))
+ test_cls_redirect_inlined();
+ if (test__start_subtest("cls_redirect_subprogs"))
+ test_cls_redirect_subprogs();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
new file mode 100644
index 000000000000..9229db2f5ca5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static int verify_ports(int family, int fd,
+ __u16 expected_local, __u16 expected_peer)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ __u16 port;
+
+ if (getsockname(fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ if (family == AF_INET)
+ port = ((struct sockaddr_in *)&addr)->sin_port;
+ else
+ port = ((struct sockaddr_in6 *)&addr)->sin6_port;
+
+ if (ntohs(port) != expected_local) {
+ log_err("Unexpected local port %d, expected %d", ntohs(port),
+ expected_local);
+ return -1;
+ }
+
+ if (getpeername(fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get peer addr");
+ return -1;
+ }
+
+ if (family == AF_INET)
+ port = ((struct sockaddr_in *)&addr)->sin_port;
+ else
+ port = ((struct sockaddr_in6 *)&addr)->sin6_port;
+
+ if (ntohs(port) != expected_peer) {
+ log_err("Unexpected peer port %d, expected %d", ntohs(port),
+ expected_peer);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int run_test(int cgroup_fd, int server_fd, int family, int type)
+{
+ bool v4 = family == AF_INET;
+ __u16 expected_local_port = v4 ? 22222 : 22223;
+ __u16 expected_peer_port = 60000;
+ struct bpf_prog_load_attr attr = {
+ .file = v4 ? "./connect_force_port4.o" :
+ "./connect_force_port6.o",
+ };
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int xlate_fd, fd, err;
+ __u32 duration = 0;
+
+ err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd);
+ if (err) {
+ log_err("Failed to load BPF object");
+ return -1;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, v4 ?
+ "cgroup/connect4" :
+ "cgroup/connect6");
+ if (CHECK(!prog, "find_prog", "connect prog not found\n")) {
+ err = -EIO;
+ goto close_bpf_object;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+ BPF_CGROUP_INET4_CONNECT :
+ BPF_CGROUP_INET6_CONNECT, 0);
+ if (err) {
+ log_err("Failed to attach BPF program");
+ goto close_bpf_object;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, v4 ?
+ "cgroup/getpeername4" :
+ "cgroup/getpeername6");
+ if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) {
+ err = -EIO;
+ goto close_bpf_object;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+ BPF_CGROUP_INET4_GETPEERNAME :
+ BPF_CGROUP_INET6_GETPEERNAME, 0);
+ if (err) {
+ log_err("Failed to attach BPF program");
+ goto close_bpf_object;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, v4 ?
+ "cgroup/getsockname4" :
+ "cgroup/getsockname6");
+ if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) {
+ err = -EIO;
+ goto close_bpf_object;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+ BPF_CGROUP_INET4_GETSOCKNAME :
+ BPF_CGROUP_INET6_GETSOCKNAME, 0);
+ if (err) {
+ log_err("Failed to attach BPF program");
+ goto close_bpf_object;
+ }
+
+ fd = connect_to_fd(server_fd, 0);
+ if (fd < 0) {
+ err = -1;
+ goto close_bpf_object;
+ }
+
+ err = verify_ports(family, fd, expected_local_port,
+ expected_peer_port);
+ close(fd);
+
+close_bpf_object:
+ bpf_object__close(obj);
+ return err;
+}
+
+void test_connect_force_port(void)
+{
+ int server_fd, cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/connect_force_port");
+ if (CHECK_FAIL(cgroup_fd < 0))
+ return;
+
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
+ close(server_fd);
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
+ close(server_fd);
+
+ server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
+ close(server_fd);
+
+ server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
+ close(server_fd);
+
+close_cgroup_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
new file mode 100644
index 000000000000..981c251453d9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+/* real layout and sizes according to test's (32-bit) BTF
+ * needs to be defined before skeleton is included */
+struct test_struct___real {
+ unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
+ unsigned int val2;
+ unsigned long long val1;
+ unsigned short val3;
+ unsigned char val4;
+ unsigned char _pad;
+};
+
+#include "test_core_autosize.skel.h"
+
+static int duration = 0;
+
+static struct {
+ unsigned long long ptr_samesized;
+ unsigned long long val1_samesized;
+ unsigned long long val2_samesized;
+ unsigned long long val3_samesized;
+ unsigned long long val4_samesized;
+ struct test_struct___real output_samesized;
+
+ unsigned long long ptr_downsized;
+ unsigned long long val1_downsized;
+ unsigned long long val2_downsized;
+ unsigned long long val3_downsized;
+ unsigned long long val4_downsized;
+ struct test_struct___real output_downsized;
+
+ unsigned long long ptr_probed;
+ unsigned long long val1_probed;
+ unsigned long long val2_probed;
+ unsigned long long val3_probed;
+ unsigned long long val4_probed;
+
+ unsigned long long ptr_signed;
+ unsigned long long val1_signed;
+ unsigned long long val2_signed;
+ unsigned long long val3_signed;
+ unsigned long long val4_signed;
+ struct test_struct___real output_signed;
+} out;
+
+void test_core_autosize(void)
+{
+ char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
+ int err, fd = -1, zero = 0;
+ int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+ struct test_core_autosize* skel = NULL;
+ struct bpf_object_load_attr load_attr = {};
+ struct bpf_program *prog;
+ struct bpf_map *bss_map;
+ struct btf *btf = NULL;
+ size_t written;
+ const void *raw_data;
+ __u32 raw_sz;
+ FILE *f = NULL;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ return;
+ /* Emit the following struct with 32-bit pointer size:
+ *
+ * struct test_struct {
+ * void *ptr;
+ * unsigned long val2;
+ * unsigned long long val1;
+ * unsigned short val3;
+ * unsigned char val4;
+ * char: 8;
+ * };
+ *
+ * This struct is going to be used as the "kernel BTF" for this test.
+ * It's equivalent memory-layout-wise to test_struct__real above.
+ */
+
+ /* force 32-bit pointer size */
+ btf__set_pointer_size(btf, 4);
+
+ char_id = btf__add_int(btf, "unsigned char", 1, 0);
+ ASSERT_EQ(char_id, 1, "char_id");
+ short_id = btf__add_int(btf, "unsigned short", 2, 0);
+ ASSERT_EQ(short_id, 2, "short_id");
+ /* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */
+ int_id = btf__add_int(btf, "long unsigned int", 4, 0);
+ ASSERT_EQ(int_id, 3, "int_id");
+ long_long_id = btf__add_int(btf, "unsigned long long", 8, 0);
+ ASSERT_EQ(long_long_id, 4, "long_long_id");
+ void_ptr_id = btf__add_ptr(btf, 0);
+ ASSERT_EQ(void_ptr_id, 5, "void_ptr_id");
+
+ id = btf__add_struct(btf, "test_struct", 20 /* bytes */);
+ ASSERT_EQ(id, 6, "struct_id");
+ err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0);
+ err = err ?: btf__add_field(btf, "val2", int_id, 32, 0);
+ err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0);
+ err = err ?: btf__add_field(btf, "val3", short_id, 128, 0);
+ err = err ?: btf__add_field(btf, "val4", char_id, 144, 0);
+ ASSERT_OK(err, "struct_fields");
+
+ fd = mkstemp(btf_file);
+ if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd))
+ goto cleanup;
+ f = fdopen(fd, "w");
+ if (!ASSERT_OK_PTR(f, "btf_fdopen"))
+ goto cleanup;
+
+ raw_data = btf__get_raw_data(btf, &raw_sz);
+ if (!ASSERT_OK_PTR(raw_data, "raw_data"))
+ goto cleanup;
+ written = fwrite(raw_data, 1, raw_sz, f);
+ if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno))
+ goto cleanup;
+ fflush(f);
+ fclose(f);
+ f = NULL;
+ close(fd);
+ fd = -1;
+
+ /* open and load BPF program with custom BTF as the kernel BTF */
+ skel = test_core_autosize__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ /* disable handle_signed() for now */
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ bpf_program__set_autoload(prog, false);
+
+ load_attr.obj = skel->obj;
+ load_attr.target_btf_path = btf_file;
+ err = bpf_object__load_xattr(&load_attr);
+ if (!ASSERT_OK(err, "prog_load"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ skel->links.handle_samesize = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ skel->links.handle_downsize = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_probed");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ skel->links.handle_probed = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach"))
+ goto cleanup;
+
+ usleep(1);
+
+ bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss");
+ if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
+ goto cleanup;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out);
+ if (!ASSERT_OK(err, "bss_lookup"))
+ goto cleanup;
+
+ ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized");
+ ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized");
+ ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized");
+ ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized");
+ ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized");
+ ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized");
+ ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized");
+ ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized");
+ ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized");
+ ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized");
+
+ ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized");
+ ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized");
+ ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized");
+ ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized");
+ ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized");
+ ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized");
+ ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized");
+ ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized");
+ ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized");
+ ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized");
+
+ ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed");
+ ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed");
+ ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed");
+ ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed");
+ ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed");
+
+ test_core_autosize__destroy(skel);
+ skel = NULL;
+
+ /* now re-load with handle_signed() enabled, it should fail loading */
+ skel = test_core_autosize__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ load_attr.obj = skel->obj;
+ load_attr.target_btf_path = btf_file;
+ err = bpf_object__load_xattr(&load_attr);
+ if (!ASSERT_ERR(err, "bad_prog_load"))
+ goto cleanup;
+
+cleanup:
+ if (f)
+ fclose(f);
+ if (fd >= 0)
+ close(fd);
+ remove(btf_file);
+ btf__free(btf);
+ test_core_autosize__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c
index b093787e9448..1931a158510e 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_extern.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c
@@ -159,8 +159,8 @@ void test_core_extern(void)
exp = (uint64_t *)&t->data;
for (j = 0; j < n; j++) {
CHECK(got[j] != exp[j], "check_res",
- "result #%d: expected %lx, but got %lx\n",
- j, exp[j], got[j]);
+ "result #%d: expected %llx, but got %llx\n",
+ j, (__u64)exp[j], (__u64)got[j]);
}
cleanup:
test_core_extern__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 31e177adbdf1..30e40ff4b0d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -3,6 +3,9 @@
#include "progs/core_reloc_types.h"
#include <sys/mman.h>
#include <sys/syscall.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
@@ -177,14 +180,13 @@
.fails = true, \
}
-#define EXISTENCE_CASE_COMMON(name) \
+#define FIELD_EXISTS_CASE_COMMON(name) \
.case_name = #name, \
.bpf_obj_file = "test_core_reloc_existence.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
- .relaxed_core_relocs = true
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
-#define EXISTENCE_ERR_CASE(name) { \
- EXISTENCE_CASE_COMMON(name), \
+#define FIELD_EXISTS_ERR_CASE(name) { \
+ FIELD_EXISTS_CASE_COMMON(name), \
.fails = true, \
}
@@ -237,8 +239,8 @@
.union_sz = sizeof(((type *)0)->union_field), \
.arr_sz = sizeof(((type *)0)->arr_field), \
.arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \
- .ptr_sz = sizeof(((type *)0)->ptr_field), \
- .enum_sz = sizeof(((type *)0)->enum_field), \
+ .ptr_sz = 8, /* always 8-byte pointer for BPF */ \
+ .enum_sz = sizeof(((type *)0)->enum_field), \
}
#define SIZE_CASE(name) { \
@@ -253,6 +255,61 @@
.fails = true, \
}
+#define TYPE_BASED_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_type_based.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define TYPE_BASED_CASE(name, ...) { \
+ TYPE_BASED_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_type_based_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_type_based_output), \
+}
+
+#define TYPE_BASED_ERR_CASE(name) { \
+ TYPE_BASED_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define TYPE_ID_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_type_id.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define TYPE_ID_CASE(name, setup_fn) { \
+ TYPE_ID_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_type_id_output) {}, \
+ .output_len = sizeof(struct core_reloc_type_id_output), \
+ .setup = setup_fn, \
+}
+
+#define TYPE_ID_ERR_CASE(name) { \
+ TYPE_ID_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define ENUMVAL_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_enumval.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".o" \
+
+#define ENUMVAL_CASE(name, ...) { \
+ ENUMVAL_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_enumval_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_enumval_output), \
+}
+
+#define ENUMVAL_ERR_CASE(name) { \
+ ENUMVAL_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+struct core_reloc_test_case;
+
+typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
+
struct core_reloc_test_case {
const char *case_name;
const char *bpf_obj_file;
@@ -264,8 +321,136 @@ struct core_reloc_test_case {
bool fails;
bool relaxed_core_relocs;
bool direct_raw_tp;
+ setup_test_fn setup;
};
+static int find_btf_type(const struct btf *btf, const char *name, __u32 kind)
+{
+ int id;
+
+ id = btf__find_by_name_kind(btf, name, kind);
+ if (CHECK(id <= 0, "find_type_id", "failed to find '%s', kind %d: %d\n", name, kind, id))
+ return -1;
+
+ return id;
+}
+
+static int setup_type_id_case_local(struct core_reloc_test_case *test)
+{
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ struct btf *local_btf = btf__parse(test->bpf_obj_file, NULL);
+ struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+ const struct btf_type *t;
+ const char *name;
+ int i;
+
+ if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
+ CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+ btf__free(local_btf);
+ btf__free(targ_btf);
+ return -EINVAL;
+ }
+
+ exp->local_anon_struct = -1;
+ exp->local_anon_union = -1;
+ exp->local_anon_enum = -1;
+ exp->local_anon_func_proto_ptr = -1;
+ exp->local_anon_void_ptr = -1;
+ exp->local_anon_arr = -1;
+
+ for (i = 1; i <= btf__get_nr_types(local_btf); i++)
+ {
+ t = btf__type_by_id(local_btf, i);
+ /* we are interested only in anonymous types */
+ if (t->name_off)
+ continue;
+
+ if (btf_is_struct(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+ strcmp(name, "marker_field") == 0) {
+ exp->local_anon_struct = i;
+ } else if (btf_is_union(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+ strcmp(name, "marker_field") == 0) {
+ exp->local_anon_union = i;
+ } else if (btf_is_enum(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_enum(t)[0].name_off)) &&
+ strcmp(name, "MARKER_ENUM_VAL") == 0) {
+ exp->local_anon_enum = i;
+ } else if (btf_is_ptr(t) && (t = btf__type_by_id(local_btf, t->type))) {
+ if (btf_is_func_proto(t) && (t = btf__type_by_id(local_btf, t->type)) &&
+ btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+ strcmp(name, "_Bool") == 0) {
+ /* ptr -> func_proto -> _Bool */
+ exp->local_anon_func_proto_ptr = i;
+ } else if (btf_is_void(t)) {
+ /* ptr -> void */
+ exp->local_anon_void_ptr = i;
+ }
+ } else if (btf_is_array(t) && (t = btf__type_by_id(local_btf, btf_array(t)->type)) &&
+ btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+ strcmp(name, "_Bool") == 0) {
+ /* _Bool[] */
+ exp->local_anon_arr = i;
+ }
+ }
+
+ exp->local_struct = find_btf_type(local_btf, "a_struct", BTF_KIND_STRUCT);
+ exp->local_union = find_btf_type(local_btf, "a_union", BTF_KIND_UNION);
+ exp->local_enum = find_btf_type(local_btf, "an_enum", BTF_KIND_ENUM);
+ exp->local_int = find_btf_type(local_btf, "int", BTF_KIND_INT);
+ exp->local_struct_typedef = find_btf_type(local_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+ exp->local_func_proto_typedef = find_btf_type(local_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+ exp->local_arr_typedef = find_btf_type(local_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+ btf__free(local_btf);
+ btf__free(targ_btf);
+ return 0;
+}
+
+static int setup_type_id_case_success(struct core_reloc_test_case *test) {
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+ int err;
+
+ err = setup_type_id_case_local(test);
+ if (err)
+ return err;
+
+ targ_btf = btf__parse(test->btf_src_file, NULL);
+
+ exp->targ_struct = find_btf_type(targ_btf, "a_struct", BTF_KIND_STRUCT);
+ exp->targ_union = find_btf_type(targ_btf, "a_union", BTF_KIND_UNION);
+ exp->targ_enum = find_btf_type(targ_btf, "an_enum", BTF_KIND_ENUM);
+ exp->targ_int = find_btf_type(targ_btf, "int", BTF_KIND_INT);
+ exp->targ_struct_typedef = find_btf_type(targ_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+ exp->targ_func_proto_typedef = find_btf_type(targ_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+ exp->targ_arr_typedef = find_btf_type(targ_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+ btf__free(targ_btf);
+ return 0;
+}
+
+static int setup_type_id_case_failure(struct core_reloc_test_case *test)
+{
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ int err;
+
+ err = setup_type_id_case_local(test);
+ if (err)
+ return err;
+
+ exp->targ_struct = 0;
+ exp->targ_union = 0;
+ exp->targ_enum = 0;
+ exp->targ_int = 0;
+ exp->targ_struct_typedef = 0;
+ exp->targ_func_proto_typedef = 0;
+ exp->targ_arr_typedef = 0;
+
+ return 0;
+}
+
static struct core_reloc_test_case test_cases[] = {
/* validate we can find kernel image and use its BTF for relocs */
{
@@ -364,7 +549,7 @@ static struct core_reloc_test_case test_cases[] = {
/* validate field existence checks */
{
- EXISTENCE_CASE_COMMON(existence),
+ FIELD_EXISTS_CASE_COMMON(existence),
.input = STRUCT_TO_CHAR_PTR(core_reloc_existence) {
.a = 1,
.b = 2,
@@ -388,11 +573,11 @@ static struct core_reloc_test_case test_cases[] = {
.output_len = sizeof(struct core_reloc_existence_output),
},
{
- EXISTENCE_CASE_COMMON(existence___minimal),
+ FIELD_EXISTS_CASE_COMMON(existence___minimal),
.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
.a = 42,
},
- .input_len = sizeof(struct core_reloc_existence),
+ .input_len = sizeof(struct core_reloc_existence___minimal),
.output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
.a_exists = 1,
.b_exists = 0,
@@ -408,12 +593,12 @@ static struct core_reloc_test_case test_cases[] = {
.output_len = sizeof(struct core_reloc_existence_output),
},
- EXISTENCE_ERR_CASE(existence__err_int_sz),
- EXISTENCE_ERR_CASE(existence__err_int_type),
- EXISTENCE_ERR_CASE(existence__err_int_kind),
- EXISTENCE_ERR_CASE(existence__err_arr_kind),
- EXISTENCE_ERR_CASE(existence__err_arr_value_type),
- EXISTENCE_ERR_CASE(existence__err_struct_type),
+ FIELD_EXISTS_ERR_CASE(existence__err_int_sz),
+ FIELD_EXISTS_ERR_CASE(existence__err_int_type),
+ FIELD_EXISTS_ERR_CASE(existence__err_int_kind),
+ FIELD_EXISTS_ERR_CASE(existence__err_arr_kind),
+ FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type),
+ FIELD_EXISTS_ERR_CASE(existence__err_struct_type),
/* bitfield relocation checks */
BITFIELDS_CASE(bitfields, {
@@ -432,31 +617,137 @@ static struct core_reloc_test_case test_cases[] = {
.sb4 = -1,
.sb20 = -0x17654321,
.u32 = 0xBEEF,
- .s32 = -0x3FEDCBA987654321,
+ .s32 = -0x3FEDCBA987654321LL,
}),
BITFIELDS_CASE(bitfields___bitfield_vs_int, {
- .ub1 = 0xFEDCBA9876543210,
+ .ub1 = 0xFEDCBA9876543210LL,
.ub2 = 0xA6,
- .ub7 = -0x7EDCBA987654321,
- .sb4 = -0x6123456789ABCDE,
- .sb20 = 0xD00D,
+ .ub7 = -0x7EDCBA987654321LL,
+ .sb4 = -0x6123456789ABCDELL,
+ .sb20 = 0xD00DLL,
.u32 = -0x76543,
- .s32 = 0x0ADEADBEEFBADB0B,
+ .s32 = 0x0ADEADBEEFBADB0BLL,
}),
BITFIELDS_CASE(bitfields___just_big_enough, {
- .ub1 = 0xF,
- .ub2 = 0x0812345678FEDCBA,
+ .ub1 = 0xFLL,
+ .ub2 = 0x0812345678FEDCBALL,
}),
BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield),
/* size relocation checks */
SIZE_CASE(size),
SIZE_CASE(size___diff_sz),
+ SIZE_ERR_CASE(size___err_ambiguous),
+
+ /* validate type existence and size relocations */
+ TYPE_BASED_CASE(type_based, {
+ .struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+ .struct_sz = sizeof(struct a_struct),
+ .union_sz = sizeof(union a_union),
+ .enum_sz = sizeof(enum an_enum),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef),
+ .typedef_int_sz = sizeof(int_typedef),
+ .typedef_enum_sz = sizeof(enum_typedef),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef),
+ .typedef_arr_sz = sizeof(arr_typedef),
+ }),
+ TYPE_BASED_CASE(type_based___all_missing, {
+ /* all zeros */
+ }),
+ TYPE_BASED_CASE(type_based___diff_sz, {
+ .struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+ .struct_sz = sizeof(struct a_struct___diff_sz),
+ .union_sz = sizeof(union a_union___diff_sz),
+ .enum_sz = sizeof(enum an_enum___diff_sz),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef___diff_sz),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff_sz),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff_sz),
+ .typedef_int_sz = sizeof(int_typedef___diff_sz),
+ .typedef_enum_sz = sizeof(enum_typedef___diff_sz),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff_sz),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef___diff_sz),
+ .typedef_arr_sz = sizeof(arr_typedef___diff_sz),
+ }),
+ TYPE_BASED_CASE(type_based___incompat, {
+ .enum_exists = 1,
+ .enum_sz = sizeof(enum an_enum),
+ }),
+ TYPE_BASED_CASE(type_based___fn_wrong_args, {
+ .struct_exists = 1,
+ .struct_sz = sizeof(struct a_struct),
+ }),
+
+ /* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */
+ TYPE_ID_CASE(type_id, setup_type_id_case_success),
+ TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure),
+
+ /* Enumerator value existence and value relocations */
+ ENUMVAL_CASE(enumval, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = true,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = true,
+ .named_val1 = 1,
+ .named_val2 = 2,
+ .anon_val1 = 0x10,
+ .anon_val2 = 0x20,
+ }),
+ ENUMVAL_CASE(enumval___diff, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = true,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = true,
+ .named_val1 = 101,
+ .named_val2 = 202,
+ .anon_val1 = 0x11,
+ .anon_val2 = 0x22,
+ }),
+ ENUMVAL_CASE(enumval___val3_missing, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = false,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = false,
+ .named_val1 = 111,
+ .named_val2 = 222,
+ .anon_val1 = 0x111,
+ .anon_val2 = 0x222,
+ }),
+ ENUMVAL_ERR_CASE(enumval___err_missing),
};
struct data {
char in[256];
char out[256];
+ bool skip;
uint64_t my_pid_tgid;
};
@@ -472,7 +763,7 @@ void test_core_reloc(void)
struct bpf_object_load_attr load_attr = {};
struct core_reloc_test_case *test_case;
const char *tp_name, *probe_name;
- int err, duration = 0, i, equal;
+ int err, i, equal;
struct bpf_link *link = NULL;
struct bpf_map *data_map;
struct bpf_program *prog;
@@ -488,11 +779,13 @@ void test_core_reloc(void)
if (!test__start_subtest(test_case->case_name))
continue;
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .relaxed_core_relocs = test_case->relaxed_core_relocs,
- );
+ if (test_case->setup) {
+ err = test_case->setup(test_case);
+ if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
+ continue;
+ }
- obj = bpf_object__open_file(test_case->bpf_obj_file, &opts);
+ obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
test_case->bpf_obj_file, PTR_ERR(obj)))
continue;
@@ -515,15 +808,10 @@ void test_core_reloc(void)
load_attr.log_level = 0;
load_attr.target_btf_path = test_case->btf_src_file;
err = bpf_object__load_xattr(&load_attr);
- if (test_case->fails) {
- CHECK(!err, "obj_load_fail",
- "should fail to load prog '%s'\n", probe_name);
+ if (err) {
+ if (!test_case->fails)
+ CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err);
goto cleanup;
- } else {
- if (CHECK(err, "obj_load",
- "failed to load prog '%s': %d\n",
- probe_name, err))
- goto cleanup;
}
data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
@@ -551,6 +839,16 @@ void test_core_reloc(void)
/* trigger test run */
usleep(1);
+ if (data->skip) {
+ test__skip();
+ goto cleanup;
+ }
+
+ if (test_case->fails) {
+ CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name);
+ goto cleanup;
+ }
+
equal = memcmp(data->out, test_case->output,
test_case->output_len) == 0;
if (CHECK(!equal, "check_result",
diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c
new file mode 100644
index 000000000000..6acb0e94d4d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_core_retro.skel.h"
+
+void test_core_retro(void)
+{
+ int err, zero = 0, res, duration = 0, my_pid = getpid();
+ struct test_core_retro *skel;
+
+ /* load program */
+ skel = test_core_retro__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
+ goto out_close;
+
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0);
+ if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno))
+ goto out_close;
+
+ /* attach probe */
+ err = test_core_retro__attach(skel);
+ if (CHECK(err, "attach_kprobe", "err %d\n", err))
+ goto out_close;
+
+ /* trigger */
+ usleep(1);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.results), &zero, &res);
+ if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno))
+ goto out_close;
+
+ CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid);
+
+out_close:
+ test_core_retro__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
new file mode 100644
index 000000000000..0a577a248d34
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#define MAX_PATH_LEN 128
+#define MAX_FILES 7
+
+#include "test_d_path.skel.h"
+
+static int duration;
+
+static struct {
+ __u32 cnt;
+ char paths[MAX_FILES][MAX_PATH_LEN];
+} src;
+
+static int set_pathname(int fd, pid_t pid)
+{
+ char buf[MAX_PATH_LEN];
+
+ snprintf(buf, MAX_PATH_LEN, "/proc/%d/fd/%d", pid, fd);
+ return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
+}
+
+static int trigger_fstat_events(pid_t pid)
+{
+ int sockfd = -1, procfd = -1, devfd = -1;
+ int localfd = -1, indicatorfd = -1;
+ int pipefd[2] = { -1, -1 };
+ struct stat fileStat;
+ int ret = -1;
+
+ /* unmountable pseudo-filesystems */
+ if (CHECK(pipe(pipefd) < 0, "trigger", "pipe failed\n"))
+ return ret;
+ /* unmountable pseudo-filesystems */
+ sockfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (CHECK(sockfd < 0, "trigger", "socket failed\n"))
+ goto out_close;
+ /* mountable pseudo-filesystems */
+ procfd = open("/proc/self/comm", O_RDONLY);
+ if (CHECK(procfd < 0, "trigger", "open /proc/self/comm failed\n"))
+ goto out_close;
+ devfd = open("/dev/urandom", O_RDONLY);
+ if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n"))
+ goto out_close;
+ localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY, 0644);
+ if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n"))
+ goto out_close;
+ /* bpf_d_path will return path with (deleted) */
+ remove("/tmp/d_path_loadgen.txt");
+ indicatorfd = open("/tmp/", O_PATH);
+ if (CHECK(indicatorfd < 0, "trigger", "open /tmp/ failed\n"))
+ goto out_close;
+
+ ret = set_pathname(pipefd[0], pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[0]\n"))
+ goto out_close;
+ ret = set_pathname(pipefd[1], pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[1]\n"))
+ goto out_close;
+ ret = set_pathname(sockfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for socket\n"))
+ goto out_close;
+ ret = set_pathname(procfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for proc\n"))
+ goto out_close;
+ ret = set_pathname(devfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for dev\n"))
+ goto out_close;
+ ret = set_pathname(localfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for file\n"))
+ goto out_close;
+ ret = set_pathname(indicatorfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for dir\n"))
+ goto out_close;
+
+ /* triggers vfs_getattr */
+ fstat(pipefd[0], &fileStat);
+ fstat(pipefd[1], &fileStat);
+ fstat(sockfd, &fileStat);
+ fstat(procfd, &fileStat);
+ fstat(devfd, &fileStat);
+ fstat(localfd, &fileStat);
+ fstat(indicatorfd, &fileStat);
+
+out_close:
+ /* triggers filp_close */
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(sockfd);
+ close(procfd);
+ close(devfd);
+ close(localfd);
+ close(indicatorfd);
+ return ret;
+}
+
+void test_d_path(void)
+{
+ struct test_d_path__bss *bss;
+ struct test_d_path *skel;
+ int err;
+
+ skel = test_d_path__open_and_load();
+ if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
+ goto cleanup;
+
+ err = test_d_path__attach(skel);
+ if (CHECK(err, "setup", "attach failed: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = trigger_fstat_events(bss->my_pid);
+ if (err < 0)
+ goto cleanup;
+
+ if (CHECK(!bss->called_stat,
+ "stat",
+ "trampoline for security_inode_getattr was not called\n"))
+ goto cleanup;
+
+ if (CHECK(!bss->called_close,
+ "close",
+ "trampoline for filp_close was not called\n"))
+ goto cleanup;
+
+ for (int i = 0; i < MAX_FILES; i++) {
+ CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN),
+ "check",
+ "failed to get stat path[%d]: %s vs %s\n",
+ i, src.paths[i], bss->paths_stat[i]);
+ CHECK(strncmp(src.paths[i], bss->paths_close[i], MAX_PATH_LEN),
+ "check",
+ "failed to get close path[%d]: %s vs %s\n",
+ i, src.paths[i], bss->paths_close[i]);
+ /* The d_path helper returns size plus NUL char, hence + 1 */
+ CHECK(bss->rets_stat[i] != strlen(bss->paths_stat[i]) + 1,
+ "check",
+ "failed to match stat return [%d]: %d vs %zd [%s]\n",
+ i, bss->rets_stat[i], strlen(bss->paths_stat[i]) + 1,
+ bss->paths_stat[i]);
+ CHECK(bss->rets_close[i] != strlen(bss->paths_stat[i]) + 1,
+ "check",
+ "failed to match stat return [%d]: %d vs %zd [%s]\n",
+ i, bss->rets_close[i], strlen(bss->paths_close[i]) + 1,
+ bss->paths_stat[i]);
+ }
+
+cleanup:
+ test_d_path__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
new file mode 100644
index 000000000000..2cb2085917e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_enable_stats.skel.h"
+
+void test_enable_stats(void)
+{
+ struct test_enable_stats *skel;
+ int stats_fd, err, prog_fd;
+ struct bpf_prog_info info;
+ __u32 info_len = sizeof(info);
+ int duration = 0;
+
+ skel = test_enable_stats__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+ return;
+
+ stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) {
+ test_enable_stats__destroy(skel);
+ return;
+ }
+
+ err = test_enable_stats__attach(skel);
+ if (CHECK(err, "attach_raw_tp", "err %d\n", err))
+ goto cleanup;
+
+ test_enable_stats__detach(skel);
+
+ prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
+ memset(&info, 0, info_len);
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err, "get_prog_info",
+ "failed to get bpf_prog_info for fd %d\n", prog_fd))
+ goto cleanup;
+ if (CHECK(info.run_time_ns == 0, "check_stats_enabled",
+ "failed to enable run_time_ns stats\n"))
+ goto cleanup;
+
+ CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid",
+ "invalid run_cnt stats\n");
+
+cleanup:
+ test_enable_stats__destroy(skel);
+ close(stats_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/endian.c b/tools/testing/selftests/bpf/prog_tests/endian.c
new file mode 100644
index 000000000000..1a11612ace6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/endian.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include "test_endian.skel.h"
+
+static int duration;
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+#define OUT16 0x3412
+#define OUT32 0x78563412U
+#define OUT64 0xf0debc9a78563412ULL
+
+void test_endian(void)
+{
+ struct test_endian* skel;
+ struct test_endian__bss *bss;
+ int err;
+
+ skel = test_endian__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+
+ bss->in16 = IN16;
+ bss->in32 = IN32;
+ bss->in64 = IN64;
+
+ err = test_endian__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(bss->out16 != OUT16, "out16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out16, (__u64)OUT16);
+ CHECK(bss->out32 != OUT32, "out32", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out32, (__u64)OUT32);
+ CHECK(bss->out64 != OUT64, "out16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out64, (__u64)OUT64);
+
+ CHECK(bss->const16 != OUT16, "const16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const16, (__u64)OUT16);
+ CHECK(bss->const32 != OUT32, "const32", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const32, (__u64)OUT32);
+ CHECK(bss->const64 != OUT64, "const64", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const64, (__u64)OUT64);
+cleanup:
+ test_endian__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 83493bd5745c..109d0345a2be 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -36,7 +36,7 @@ void test_fentry_fexit(void)
fentry_res = (__u64 *)fentry_skel->bss;
fexit_res = (__u64 *)fexit_skel->bss;
printf("%lld\n", fentry_skel->bss->test1_result);
- for (i = 0; i < 6; i++) {
+ for (i = 0; i < 8; i++) {
CHECK(fentry_res[i] != 1, "result",
"fentry_test%d failed err %lld\n", i + 1, fentry_res[i]);
CHECK(fexit_res[i] != 1, "result",
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index c2642517e1d8..5c0448910426 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -1,36 +1,80 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include <network_helpers.h>
+#include <bpf/btf.h>
+
+typedef int (*test_cb)(struct bpf_object *obj);
+
+static int check_data_map(struct bpf_object *obj, int prog_cnt, bool reset)
+{
+ struct bpf_map *data_map = NULL, *map;
+ __u64 *result = NULL;
+ const int zero = 0;
+ __u32 duration = 0;
+ int ret = -1, i;
+
+ result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
+ if (CHECK(!result, "alloc_memory", "failed to alloc memory"))
+ return -ENOMEM;
+
+ bpf_object__for_each_map(map, obj)
+ if (bpf_map__is_internal(map)) {
+ data_map = map;
+ break;
+ }
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto out;
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
+ if (CHECK(ret, "get_result",
+ "failed to get output data: %d\n", ret))
+ goto out;
+
+ for (i = 0; i < prog_cnt; i++) {
+ if (CHECK(result[i] != 1, "result",
+ "fexit_bpf2bpf result[%d] failed err %llu\n",
+ i, result[i]))
+ goto out;
+ result[i] = 0;
+ }
+ if (reset) {
+ ret = bpf_map_update_elem(bpf_map__fd(data_map), &zero, result, 0);
+ if (CHECK(ret, "reset_result", "failed to reset result\n"))
+ goto out;
+ }
+
+ ret = 0;
+out:
+ free(result);
+ return ret;
+}
static void test_fexit_bpf2bpf_common(const char *obj_file,
const char *target_obj_file,
int prog_cnt,
const char **prog_name,
- bool run_prog)
+ bool run_prog,
+ test_cb cb)
{
- struct bpf_object *obj = NULL, *pkt_obj;
- int err, pkt_fd, i;
- struct bpf_link **link = NULL;
+ struct bpf_object *obj = NULL, *tgt_obj;
struct bpf_program **prog = NULL;
+ struct bpf_link **link = NULL;
__u32 duration = 0, retval;
- struct bpf_map *data_map;
- const int zero = 0;
- u64 *result = NULL;
+ int err, tgt_fd, i;
err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
- &pkt_obj, &pkt_fd);
+ &tgt_obj, &tgt_fd);
if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
target_obj_file, err, errno))
return;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .attach_prog_fd = pkt_fd,
+ .attach_prog_fd = tgt_fd,
);
link = calloc(sizeof(struct bpf_link *), prog_cnt);
prog = calloc(sizeof(struct bpf_program *), prog_cnt);
- result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64));
- if (CHECK(!link || !prog || !result, "alloc_memory",
- "failed to alloc memory"))
+ if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory"))
goto close_prog;
obj = bpf_object__open_file(obj_file, &opts);
@@ -52,39 +96,33 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
goto close_prog;
}
- if (!run_prog)
- goto close_prog;
+ if (cb) {
+ err = cb(obj);
+ if (err)
+ goto close_prog;
+ }
- data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss");
- if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ if (!run_prog)
goto close_prog;
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
- err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
+ if (check_data_map(obj, prog_cnt, false))
goto close_prog;
- for (i = 0; i < prog_cnt; i++)
- if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n",
- result[i]))
- goto close_prog;
-
close_prog:
for (i = 0; i < prog_cnt; i++)
if (!IS_ERR_OR_NULL(link[i]))
bpf_link__destroy(link[i]);
if (!IS_ERR_OR_NULL(obj))
bpf_object__close(obj);
- bpf_object__close(pkt_obj);
+ bpf_object__close(tgt_obj);
free(link);
free(prog);
- free(result);
}
static void test_target_no_callees(void)
@@ -95,7 +133,7 @@ static void test_target_no_callees(void)
test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o",
"./test_pkt_md_access.o",
ARRAY_SIZE(prog_name),
- prog_name, true);
+ prog_name, true, NULL);
}
static void test_target_yes_callees(void)
@@ -109,7 +147,7 @@ static void test_target_yes_callees(void)
test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
"./test_pkt_access.o",
ARRAY_SIZE(prog_name),
- prog_name, true);
+ prog_name, true, NULL);
}
static void test_func_replace(void)
@@ -122,11 +160,12 @@ static void test_func_replace(void)
"freplace/get_skb_len",
"freplace/get_skb_ifindex",
"freplace/get_constant",
+ "freplace/test_pkt_write_access_subprog",
};
test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
"./test_pkt_access.o",
ARRAY_SIZE(prog_name),
- prog_name, true);
+ prog_name, true, NULL);
}
static void test_func_replace_verify(void)
@@ -137,13 +176,198 @@ static void test_func_replace_verify(void)
test_fexit_bpf2bpf_common("./freplace_connect4.o",
"./connect4_prog.o",
ARRAY_SIZE(prog_name),
- prog_name, false);
+ prog_name, false, NULL);
+}
+
+static int test_second_attach(struct bpf_object *obj)
+{
+ const char *prog_name = "freplace/get_constant";
+ const char *tgt_name = prog_name + 9; /* cut off freplace/ */
+ const char *tgt_obj_file = "./test_pkt_access.o";
+ struct bpf_program *prog = NULL;
+ struct bpf_object *tgt_obj;
+ __u32 duration = 0, retval;
+ struct bpf_link *link;
+ int err = 0, tgt_fd;
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name))
+ return -ENOENT;
+
+ err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
+ &tgt_obj, &tgt_fd);
+ if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n",
+ tgt_obj_file, err, errno))
+ return err;
+
+ link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
+ if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+ goto out;
+
+ err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ if (CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration))
+ goto out;
+
+ err = check_data_map(obj, 1, true);
+ if (err)
+ goto out;
+
+out:
+ bpf_link__destroy(link);
+ bpf_object__close(tgt_obj);
+ return err;
+}
+
+static void test_func_replace_multi(void)
+{
+ const char *prog_name[] = {
+ "freplace/get_constant",
+ };
+ test_fexit_bpf2bpf_common("./freplace_get_constant.o",
+ "./test_pkt_access.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, true, test_second_attach);
+}
+
+static void test_fmod_ret_freplace(void)
+{
+ struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL;
+ const char *freplace_name = "./freplace_get_constant.o";
+ const char *fmod_ret_name = "./fmod_ret_freplace.o";
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+ const char *tgt_name = "./test_pkt_access.o";
+ struct bpf_link *freplace_link = NULL;
+ struct bpf_program *prog;
+ __u32 duration = 0;
+ int err, pkt_fd;
+
+ err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
+ &pkt_obj, &pkt_fd);
+ /* the target prog should load fine */
+ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+ tgt_name, err, errno))
+ return;
+ opts.attach_prog_fd = pkt_fd;
+
+ freplace_obj = bpf_object__open_file(freplace_name, &opts);
+ if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
+ "failed to open %s: %ld\n", freplace_name,
+ PTR_ERR(freplace_obj)))
+ goto out;
+
+ err = bpf_object__load(freplace_obj);
+ if (CHECK(err, "freplace_obj_load", "err %d\n", err))
+ goto out;
+
+ prog = bpf_program__next(NULL, freplace_obj);
+ freplace_link = bpf_program__attach_trace(prog);
+ if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+ goto out;
+
+ opts.attach_prog_fd = bpf_program__fd(prog);
+ fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
+ if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
+ "failed to open %s: %ld\n", fmod_ret_name,
+ PTR_ERR(fmod_obj)))
+ goto out;
+
+ err = bpf_object__load(fmod_obj);
+ if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n"))
+ goto out;
+
+out:
+ bpf_link__destroy(freplace_link);
+ bpf_object__close(freplace_obj);
+ bpf_object__close(fmod_obj);
+ bpf_object__close(pkt_obj);
+}
+
+
+static void test_func_sockmap_update(void)
+{
+ const char *prog_name[] = {
+ "freplace/cls_redirect",
+ };
+ test_fexit_bpf2bpf_common("./freplace_cls_redirect.o",
+ "./test_cls_redirect.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, false, NULL);
+}
+
+static void test_obj_load_failure_common(const char *obj_file,
+ const char *target_obj_file)
+
+{
+ /*
+ * standalone test that asserts failure to load freplace prog
+ * because of invalid return code.
+ */
+ struct bpf_object *obj = NULL, *pkt_obj;
+ int err, pkt_fd;
+ __u32 duration = 0;
+
+ err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ &pkt_obj, &pkt_fd);
+ /* the target prog should load fine */
+ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+ target_obj_file, err, errno))
+ return;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .attach_prog_fd = pkt_fd,
+ );
+
+ obj = bpf_object__open_file(obj_file, &opts);
+ if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
+ "failed to open %s: %ld\n", obj_file,
+ PTR_ERR(obj)))
+ goto close_prog;
+
+ /* It should fail to load the program */
+ err = bpf_object__load(obj);
+ if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))
+ goto close_prog;
+
+close_prog:
+ if (!IS_ERR_OR_NULL(obj))
+ bpf_object__close(obj);
+ bpf_object__close(pkt_obj);
+}
+
+static void test_func_replace_return_code(void)
+{
+ /* test invalid return code in the replaced program */
+ test_obj_load_failure_common("./freplace_connect_v4_prog.o",
+ "./connect4_prog.o");
+}
+
+static void test_func_map_prog_compatibility(void)
+{
+ /* test with spin lock map value in the replaced program */
+ test_obj_load_failure_common("./freplace_attach_probe.o",
+ "./test_attach_probe.o");
}
void test_fexit_bpf2bpf(void)
{
- test_target_no_callees();
- test_target_yes_callees();
- test_func_replace();
- test_func_replace_verify();
+ if (test__start_subtest("target_no_callees"))
+ test_target_no_callees();
+ if (test__start_subtest("target_yes_callees"))
+ test_target_yes_callees();
+ if (test__start_subtest("func_replace"))
+ test_func_replace();
+ if (test__start_subtest("func_replace_verify"))
+ test_func_replace_verify();
+ if (test__start_subtest("func_sockmap_update"))
+ test_func_sockmap_update();
+ if (test__start_subtest("func_replace_return_code"))
+ test_func_replace_return_code();
+ if (test__start_subtest("func_map_prog_compatibility"))
+ test_func_map_prog_compatibility();
+ if (test__start_subtest("func_replace_multi"))
+ test_func_replace_multi();
+ if (test__start_subtest("fmod_ret_freplace"))
+ test_fmod_ret_freplace();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 92563898867c..cd6dc80edf18 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
#include <error.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <sys/uio.h>
+#include "bpf_flow.skel.h"
+
#ifndef IP_MF
#define IP_MF 0x2000
#endif
@@ -100,6 +103,7 @@ struct test {
#define VLAN_HLEN 4
+static __u32 duration;
struct test tests[] = {
{
.name = "ipv4",
@@ -443,17 +447,130 @@ static int ifup(const char *ifname)
return 0;
}
+static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
+{
+ int i, err, map_fd, prog_fd;
+ struct bpf_program *prog;
+ char prog_name[32];
+
+ map_fd = bpf_map__fd(prog_array);
+ if (map_fd < 0)
+ return -1;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (!prog)
+ return -1;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0)
+ return -1;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (err)
+ return -1;
+ }
+ return 0;
+}
+
+static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
+{
+ int i, err, keys_fd;
+
+ keys_fd = bpf_map__fd(keys);
+ if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ /* Keep in sync with 'flags' from eth_get_headlen. */
+ __u32 eth_get_headlen_flags =
+ BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
+ struct bpf_prog_test_run_attr tattr = {};
+ struct bpf_flow_keys flow_keys = {};
+ __u32 key = (__u32)(tests[i].keys.sport) << 16 |
+ tests[i].keys.dport;
+
+ /* For skb-less case we can't pass input flags; run
+ * only the tests that have a matching set of flags.
+ */
+
+ if (tests[i].flags != eth_get_headlen_flags)
+ continue;
+
+ err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
+ CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
+
+ err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
+ CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+
+ CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
+ CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+
+ err = bpf_map_delete_elem(keys_fd, &key);
+ CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+ }
+}
+
+static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd)
+{
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+ return;
+
+ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno))
+ return;
+
+ run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+ err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+ CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno);
+}
+
+static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
+{
+ struct bpf_link *link;
+ int err, net_fd;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno))
+ return;
+
+ link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
+ if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+ goto out_close;
+
+ run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+ err = bpf_link__destroy(link);
+ CHECK(err, "bpf_link__destroy", "err %d\n", err);
+out_close:
+ close(net_fd);
+}
+
void test_flow_dissector(void)
{
int i, err, prog_fd, keys_fd = -1, tap_fd;
- struct bpf_object *obj;
- __u32 duration = 0;
+ struct bpf_flow *skel;
- err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
- "jmp_table", "last_dissection", &prog_fd, &keys_fd);
- if (CHECK_FAIL(err))
+ skel = bpf_flow__open_and_load();
+ if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
return;
+ prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+ goto out_destroy_skel;
+ keys_fd = bpf_map__fd(skel->maps.last_dissection);
+ if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+ goto out_destroy_skel;
+ err = init_prog_array(skel->obj, skel->maps.jmp_table);
+ if (CHECK(err, "init_prog_array", "err %d\n", err))
+ goto out_destroy_skel;
+
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_flow_keys flow_keys;
struct bpf_prog_test_run_attr tattr = {
@@ -474,7 +591,7 @@ void test_flow_dissector(void)
CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
err || tattr.retval != 1,
tests[i].name,
- "err %d errno %d retval %d duration %d size %u/%lu\n",
+ "err %d errno %d retval %d duration %d size %u/%zu\n",
err, errno, tattr.retval, tattr.duration,
tattr.data_size_out, sizeof(flow_keys));
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
@@ -486,43 +603,17 @@ void test_flow_dissector(void)
* via BPF map in this case.
*/
- err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
- CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno);
-
tap_fd = create_tap("tap0");
CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
err = ifup("tap0");
CHECK(err, "ifup", "err %d errno %d\n", err, errno);
- for (i = 0; i < ARRAY_SIZE(tests); i++) {
- /* Keep in sync with 'flags' from eth_get_headlen. */
- __u32 eth_get_headlen_flags =
- BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
- struct bpf_prog_test_run_attr tattr = {};
- struct bpf_flow_keys flow_keys = {};
- __u32 key = (__u32)(tests[i].keys.sport) << 16 |
- tests[i].keys.dport;
-
- /* For skb-less case we can't pass input flags; run
- * only the tests that have a matching set of flags.
- */
-
- if (tests[i].flags != eth_get_headlen_flags)
- continue;
-
- err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
- CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
-
- err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
- CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
-
- CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
- CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
-
- err = bpf_map_delete_elem(keys_fd, &key);
- CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
- }
+ /* Test direct prog attachment */
+ test_skb_less_prog_attach(skel, tap_fd);
+ /* Test indirect prog attachment via link */
+ test_skb_less_link_create(skel, tap_fd);
- bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
- bpf_object__close(obj);
+ close(tap_fd);
+out_destroy_skel:
+ bpf_flow__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index dc5ef155ec28..0e8a4d2f023d 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_flow_dissector_load_bytes(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 1f51ba66b98b..172c586b6996 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -1,9 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Test that the flow_dissector program can be updated with a single
- * syscall by attaching a new program that replaces the existing one.
- *
- * Corner case - the same program cannot be attached twice.
+ * Tests for attaching, detaching, and replacing flow_dissector BPF program.
*/
#define _GNU_SOURCE
@@ -11,6 +8,7 @@
#include <fcntl.h>
#include <sched.h>
#include <stdbool.h>
+#include <sys/stat.h>
#include <unistd.h>
#include <linux/bpf.h>
@@ -18,21 +16,30 @@
#include "test_progs.h"
-static bool is_attached(int netns)
+static int init_net = -1;
+
+static __u32 query_attached_prog_id(int netns)
{
- __u32 cnt;
+ __u32 prog_ids[1] = {};
+ __u32 prog_cnt = ARRAY_SIZE(prog_ids);
int err;
- err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt);
+ err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL,
+ prog_ids, &prog_cnt);
if (CHECK_FAIL(err)) {
perror("bpf_prog_query");
- return true; /* fail-safe */
+ return 0;
}
- return cnt > 0;
+ return prog_cnt == 1 ? prog_ids[0] : 0;
+}
+
+static bool prog_is_attached(int netns)
+{
+ return query_attached_prog_id(netns) > 0;
}
-static int load_prog(void)
+static int load_prog(enum bpf_prog_type type)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
@@ -40,61 +47,593 @@ static int load_prog(void)
};
int fd;
- fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
- ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
if (CHECK_FAIL(fd < 0))
perror("bpf_load_program");
return fd;
}
-static void do_flow_dissector_reattach(void)
+static __u32 query_prog_id(int prog)
{
- int prog_fd[2] = { -1, -1 };
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
int err;
- prog_fd[0] = load_prog();
- if (prog_fd[0] < 0)
- return;
+ err = bpf_obj_get_info_by_fd(prog, &info, &info_len);
+ if (CHECK_FAIL(err || info_len != sizeof(info))) {
+ perror("bpf_obj_get_info_by_fd");
+ return 0;
+ }
- prog_fd[1] = load_prog();
- if (prog_fd[1] < 0)
- goto out_close;
+ return info.id;
+}
+
+static int unshare_net(int old_net)
+{
+ int err, new_net;
- err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0);
+ err = unshare(CLONE_NEWNET);
if (CHECK_FAIL(err)) {
- perror("bpf_prog_attach-0");
- goto out_close;
+ perror("unshare(CLONE_NEWNET)");
+ return -1;
+ }
+ new_net = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK_FAIL(new_net < 0)) {
+ perror("open(/proc/self/ns/net)");
+ setns(old_net, CLONE_NEWNET);
+ return -1;
+ }
+ return new_net;
+}
+
+static void test_prog_attach_prog_attach(int netns, int prog1, int prog2)
+{
+ int err;
+
+ err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach(prog1)");
+ return;
}
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
/* Expect success when attaching a different program */
- err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+ err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
if (CHECK_FAIL(err)) {
- perror("bpf_prog_attach-1");
+ perror("bpf_prog_attach(prog2) #1");
goto out_detach;
}
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
/* Expect failure when attaching the same program twice */
- err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+ err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
if (CHECK_FAIL(!err || errno != EINVAL))
- perror("bpf_prog_attach-2");
+ perror("bpf_prog_attach(prog2) #2");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
out_detach:
- err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+ err = bpf_prog_detach2(prog2, 0, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(err))
+ perror("bpf_prog_detach");
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_link_create(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int link1, link2;
+
+ link1 = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure creating link when another link exists */
+ errno = 0;
+ link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+ perror("bpf_prog_attach(prog2) expected E2BIG");
+ if (link2 != -1)
+ close(link2);
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link1);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_link_create(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int err, link;
+
+ err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure creating link when prog attached */
+ errno = 0;
+ link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link != -1 || errno != EEXIST))
+ perror("bpf_link_create(prog2) expected EEXIST");
+ if (link != -1)
+ close(link);
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR);
if (CHECK_FAIL(err))
perror("bpf_prog_detach");
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_attach(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure attaching prog when link exists */
+ errno = 0;
+ err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(!err || errno != EEXIST))
+ perror("bpf_prog_attach(prog2) expected EEXIST");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_detach(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure detaching prog when link exists */
+ errno = 0;
+ err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_prog_detach expected EINVAL");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_detach_query(int netns, int prog1, int prog2)
+{
+ int err;
+
+ err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_detach");
+ return;
+ }
+
+ /* Expect no prog attached after successful detach */
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_close_query(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ /* Expect no prog attached after closing last link FD */
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_no_old_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect success replacing the prog when old prog not specified */
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(err))
+ perror("bpf_link_update");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_replace_old_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect success F_REPLACE and old prog specified to succeed */
+ update_opts.flags = BPF_F_REPLACE;
+ update_opts.old_prog_fd = prog1;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(err))
+ perror("bpf_link_update");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_same_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect success updating the prog with the same one */
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog1, &update_opts);
+ if (CHECK_FAIL(err))
+ perror("bpf_link_update");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_opts(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail w/ old prog FD but w/o F_REPLACE*/
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = prog1;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EINVAL)) {
+ perror("bpf_link_update expected EINVAL");
+ goto out_close;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail on old prog FD mismatch */
+ errno = 0;
+ update_opts.flags = BPF_F_REPLACE;
+ update_opts.old_prog_fd = prog2;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EPERM)) {
+ perror("bpf_link_update expected EPERM");
+ goto out_close;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail for invalid old prog FD */
+ errno = 0;
+ update_opts.flags = BPF_F_REPLACE;
+ update_opts.old_prog_fd = -1;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EBADF)) {
+ perror("bpf_link_update expected EBADF");
+ goto out_close;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail with invalid flags */
+ errno = 0;
+ update_opts.flags = BPF_F_ALLOW_MULTI;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_link_update expected EINVAL");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+out_close:
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link, prog3;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure when new prog FD is not valid */
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, -1, &update_opts);
+ if (CHECK_FAIL(!err || errno != EBADF)) {
+ perror("bpf_link_update expected EINVAL");
+ goto out_close_link;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ prog3 = load_prog(BPF_PROG_TYPE_SOCKET_FILTER);
+ if (prog3 < 0)
+ goto out_close_link;
+
+ /* Expect failure when new prog FD type doesn't match */
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog3, &update_opts);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_link_update expected EINVAL");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(prog3);
+out_close_link:
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_netns_gone(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link, old_net;
+
+ old_net = netns;
+ netns = unshare_net(old_net);
+ if (netns < 0)
+ return;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(netns);
+ err = setns(old_net, CLONE_NEWNET);
+ if (CHECK_FAIL(err)) {
+ perror("setns(CLONE_NEWNET)");
+ close(link);
+ return;
+ }
+
+ /* Expect failure when netns destroyed */
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != ENOLINK))
+ perror("bpf_link_update");
+
+ close(link);
+}
+
+static void test_link_get_info(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ struct bpf_link_info info = {};
+ struct stat netns_stat = {};
+ __u32 info_len, link_id;
+ int err, link, old_net;
+
+ old_net = netns;
+ netns = unshare_net(old_net);
+ if (netns < 0)
+ return;
+
+ err = fstat(netns, &netns_stat);
+ if (CHECK_FAIL(err)) {
+ perror("stat(netns)");
+ goto out_resetns;
+ }
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ goto out_resetns;
+ }
+
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_obj_get_info");
+ goto out_unlink;
+ }
+ CHECK_FAIL(info_len != sizeof(info));
+
+ /* Expect link info to be sane and match prog and netns details */
+ CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+ CHECK_FAIL(info.id == 0);
+ CHECK_FAIL(info.prog_id != query_prog_id(prog1));
+ CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+ CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_link_update(prog2)");
+ goto out_unlink;
+ }
+
+ link_id = info.id;
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_obj_get_info");
+ goto out_unlink;
+ }
+ CHECK_FAIL(info_len != sizeof(info));
+
+ /* Expect no info change after update except in prog id */
+ CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+ CHECK_FAIL(info.id != link_id);
+ CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+ CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+ CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+ /* Leave netns link is attached to and close last FD to it */
+ err = setns(old_net, CLONE_NEWNET);
+ if (CHECK_FAIL(err)) {
+ perror("setns(NEWNET)");
+ goto out_unlink;
+ }
+ close(netns);
+ old_net = -1;
+ netns = -1;
+
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_obj_get_info");
+ goto out_unlink;
+ }
+ CHECK_FAIL(info_len != sizeof(info));
+
+ /* Expect netns_ino to change to 0 */
+ CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+ CHECK_FAIL(info.id != link_id);
+ CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+ CHECK_FAIL(info.netns.netns_ino != 0);
+ CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+out_unlink:
+ close(link);
+out_resetns:
+ if (old_net != -1)
+ setns(old_net, CLONE_NEWNET);
+ if (netns != -1)
+ close(netns);
+}
+
+static void run_tests(int netns)
+{
+ struct test {
+ const char *test_name;
+ void (*test_func)(int netns, int prog1, int prog2);
+ } tests[] = {
+ { "prog attach, prog attach",
+ test_prog_attach_prog_attach },
+ { "link create, link create",
+ test_link_create_link_create },
+ { "prog attach, link create",
+ test_prog_attach_link_create },
+ { "link create, prog attach",
+ test_link_create_prog_attach },
+ { "link create, prog detach",
+ test_link_create_prog_detach },
+ { "prog attach, detach, query",
+ test_prog_attach_detach_query },
+ { "link create, close, query",
+ test_link_create_close_query },
+ { "link update no old prog",
+ test_link_update_no_old_prog },
+ { "link update with replace old prog",
+ test_link_update_replace_old_prog },
+ { "link update with same prog",
+ test_link_update_same_prog },
+ { "link update invalid opts",
+ test_link_update_invalid_opts },
+ { "link update invalid prog",
+ test_link_update_invalid_prog },
+ { "link update netns gone",
+ test_link_update_netns_gone },
+ { "link get info",
+ test_link_get_info },
+ };
+ int i, progs[2] = { -1, -1 };
+ char test_name[80];
+
+ for (i = 0; i < ARRAY_SIZE(progs); i++) {
+ progs[i] = load_prog(BPF_PROG_TYPE_FLOW_DISSECTOR);
+ if (progs[i] < 0)
+ goto out_close;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ snprintf(test_name, sizeof(test_name),
+ "flow dissector %s%s",
+ tests[i].test_name,
+ netns == init_net ? " (init_net)" : "");
+ if (test__start_subtest(test_name))
+ tests[i].test_func(netns, progs[0], progs[1]);
+ }
out_close:
- close(prog_fd[1]);
- close(prog_fd[0]);
+ for (i = 0; i < ARRAY_SIZE(progs); i++) {
+ if (progs[i] != -1)
+ CHECK_FAIL(close(progs[i]));
+ }
}
void test_flow_dissector_reattach(void)
{
- int init_net, self_net, err;
+ int err, new_net, saved_net;
- self_net = open("/proc/self/ns/net", O_RDONLY);
- if (CHECK_FAIL(self_net < 0)) {
+ saved_net = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK_FAIL(saved_net < 0)) {
perror("open(/proc/self/ns/net");
return;
}
@@ -111,30 +650,29 @@ void test_flow_dissector_reattach(void)
goto out_close;
}
- if (is_attached(init_net)) {
+ if (prog_is_attached(init_net)) {
test__skip();
printf("Can't test with flow dissector attached to init_net\n");
goto out_setns;
}
/* First run tests in root network namespace */
- do_flow_dissector_reattach();
+ run_tests(init_net);
/* Then repeat tests in a non-root namespace */
- err = unshare(CLONE_NEWNET);
- if (CHECK_FAIL(err)) {
- perror("unshare(CLONE_NEWNET)");
+ new_net = unshare_net(init_net);
+ if (new_net < 0)
goto out_setns;
- }
- do_flow_dissector_reattach();
+ run_tests(new_net);
+ close(new_net);
out_setns:
/* Move back to netns we started in. */
- err = setns(self_net, CLONE_NEWNET);
+ err = setns(saved_net, CLONE_NEWNET);
if (CHECK_FAIL(err))
perror("setns(/proc/self/ns/net)");
out_close:
close(init_net);
- close(self_net);
+ close(saved_net);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
new file mode 100644
index 000000000000..d884b2ed5bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <test_progs.h>
+#include "test_stacktrace_build_id.skel.h"
+
+void test_get_stackid_cannot_attach(void)
+{
+ struct perf_event_attr attr = {
+ /* .type = PERF_TYPE_SOFTWARE, */
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .precise_ip = 1,
+ .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_CALL_STACK,
+ .sample_period = 5000,
+ .size = sizeof(struct perf_event_attr),
+ };
+ struct test_stacktrace_build_id *skel;
+ __u32 duration = 0;
+ int pmu_fd, err;
+
+ skel = test_stacktrace_build_id__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ return;
+
+ /* override program type */
+ bpf_program__set_perf_event(skel->progs.oncpu);
+
+ err = test_stacktrace_build_id__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("%s:SKIP:cannot open PERF_COUNT_HW_CPU_CYCLES with precise_ip > 0\n",
+ __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
+ "should have failed\n");
+ close(pmu_fd);
+
+ /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
+ attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
+ "err: %ld\n", PTR_ERR(skel->links.oncpu));
+ close(pmu_fd);
+
+ /* add exclude_callchain_kernel, attach should fail */
+ attr.exclude_callchain_kernel = 1;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
+ "should have failed\n");
+ close(pmu_fd);
+
+cleanup:
+ test_stacktrace_build_id__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index c680926fce73..9efa7e50eab2 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
static void test_global_data_number(struct bpf_object *obj, __u32 duration)
{
int i, err, map_fd;
- uint64_t num;
+ __u64 num;
map_fd = bpf_find_map(__func__, obj, "result_number");
if (CHECK_FAIL(map_fd < 0))
@@ -13,7 +14,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
struct {
char *name;
uint32_t key;
- uint64_t num;
+ __u64 num;
} tests[] = {
{ "relocate .bss reference", 0, 0 },
{ "relocate .data reference", 1, 42 },
@@ -31,7 +32,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num);
CHECK(err || num != tests[i].num, tests[i].name,
- "err %d result %lx expected %lx\n",
+ "err %d result %llx expected %llx\n",
err, num, tests[i].num);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 3bdaa5a40744..ee46b11f1f9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -12,7 +12,8 @@ void test_global_data_init(void)
size_t sz;
obj = bpf_object__open_file(file, NULL);
- if (CHECK_FAIL(!obj))
+ err = libbpf_get_error(obj);
+ if (CHECK_FAIL(err))
return;
map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index c490e012c23f..428d488830c6 100644
--- a/tools/testing/selftests/bpf/test_hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -5,26 +5,17 @@
*
* Copyright (c) 2019 Facebook
*/
-#include <stdio.h>
-#include <errno.h>
-#include <linux/err.h>
+#include "test_progs.h"
#include "bpf/hashmap.h"
-#define CHECK(condition, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
- fprintf(stderr, format); \
- } \
- __ret; \
-})
+static int duration = 0;
-size_t hash_fn(const void *k, void *ctx)
+static size_t hash_fn(const void *k, void *ctx)
{
return (long)k;
}
-bool equal_fn(const void *a, const void *b, void *ctx)
+static bool equal_fn(const void *a, const void *b, void *ctx)
{
return (long)a == (long)b;
}
@@ -49,53 +40,55 @@ static inline size_t exp_cap(size_t sz)
#define ELEM_CNT 62
-int test_hashmap_generic(void)
+static void test_hashmap_generic(void)
{
struct hashmap_entry *entry, *tmp;
int err, bkt, found_cnt, i;
long long found_msk;
struct hashmap *map;
- fprintf(stderr, "%s: ", __func__);
-
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
- return 1;
+ if (CHECK(IS_ERR(map), "hashmap__new",
+ "failed to create map: %ld\n", PTR_ERR(map)))
+ return;
for (i = 0; i < ELEM_CNT; i++) {
const void *oldk, *k = (const void *)(long)i;
void *oldv, *v = (void *)(long)(1024 + i);
err = hashmap__update(map, k, v, &oldk, &oldv);
- if (CHECK(err != -ENOENT, "unexpected result: %d\n", err))
- return 1;
+ if (CHECK(err != -ENOENT, "hashmap__update",
+ "unexpected result: %d\n", err))
+ goto cleanup;
if (i % 2) {
err = hashmap__add(map, k, v);
} else {
err = hashmap__set(map, k, v, &oldk, &oldv);
- if (CHECK(oldk != NULL || oldv != NULL,
+ if (CHECK(oldk != NULL || oldv != NULL, "check_kv",
"unexpected k/v: %p=%p\n", oldk, oldv))
- return 1;
+ goto cleanup;
}
- if (CHECK(err, "failed to add k/v %ld = %ld: %d\n",
+ if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n",
(long)k, (long)v, err))
- return 1;
+ goto cleanup;
- if (CHECK(!hashmap__find(map, k, &oldv),
+ if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
"failed to find key %ld\n", (long)k))
- return 1;
- if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
- return 1;
+ goto cleanup;
+ if (CHECK(oldv != v, "elem_val",
+ "found value is wrong: %ld\n", (long)oldv))
+ goto cleanup;
}
- if (CHECK(hashmap__size(map) != ELEM_CNT,
+ if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size",
"invalid map size: %zu\n", hashmap__size(map)))
- return 1;
+ goto cleanup;
if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+ "hashmap_cap",
"unexpected map capacity: %zu\n", hashmap__capacity(map)))
- return 1;
+ goto cleanup;
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
@@ -103,42 +96,47 @@ int test_hashmap_generic(void)
long v = (long)entry->value;
found_msk |= 1ULL << k;
- if (CHECK(v - k != 1024, "invalid k/v pair: %ld = %ld\n", k, v))
- return 1;
+ if (CHECK(v - k != 1024, "check_kv",
+ "invalid k/v pair: %ld = %ld\n", k, v))
+ goto cleanup;
}
- if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+ if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt",
"not all keys iterated: %llx\n", found_msk))
- return 1;
+ goto cleanup;
for (i = 0; i < ELEM_CNT; i++) {
const void *oldk, *k = (const void *)(long)i;
void *oldv, *v = (void *)(long)(256 + i);
err = hashmap__add(map, k, v);
- if (CHECK(err != -EEXIST, "unexpected add result: %d\n", err))
- return 1;
+ if (CHECK(err != -EEXIST, "hashmap__add",
+ "unexpected add result: %d\n", err))
+ goto cleanup;
if (i % 2)
err = hashmap__update(map, k, v, &oldk, &oldv);
else
err = hashmap__set(map, k, v, &oldk, &oldv);
- if (CHECK(err, "failed to update k/v %ld = %ld: %d\n",
- (long)k, (long)v, err))
- return 1;
- if (CHECK(!hashmap__find(map, k, &oldv),
+ if (CHECK(err, "elem_upd",
+ "failed to update k/v %ld = %ld: %d\n",
+ (long)k, (long)v, err))
+ goto cleanup;
+ if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
"failed to find key %ld\n", (long)k))
- return 1;
- if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
- return 1;
+ goto cleanup;
+ if (CHECK(oldv != v, "elem_val",
+ "found value is wrong: %ld\n", (long)oldv))
+ goto cleanup;
}
- if (CHECK(hashmap__size(map) != ELEM_CNT,
+ if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size",
"invalid updated map size: %zu\n", hashmap__size(map)))
- return 1;
+ goto cleanup;
if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+ "hashmap__capacity",
"unexpected map capacity: %zu\n", hashmap__capacity(map)))
- return 1;
+ goto cleanup;
found_msk = 0;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
@@ -146,20 +144,21 @@ int test_hashmap_generic(void)
long v = (long)entry->value;
found_msk |= 1ULL << k;
- if (CHECK(v - k != 256,
+ if (CHECK(v - k != 256, "elem_check",
"invalid updated k/v pair: %ld = %ld\n", k, v))
- return 1;
+ goto cleanup;
}
- if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+ if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt",
"not all keys iterated after update: %llx\n", found_msk))
- return 1;
+ goto cleanup;
found_cnt = 0;
hashmap__for_each_key_entry(map, entry, (void *)0) {
found_cnt++;
}
- if (CHECK(!found_cnt, "didn't find any entries for key 0\n"))
- return 1;
+ if (CHECK(!found_cnt, "found_cnt",
+ "didn't find any entries for key 0\n"))
+ goto cleanup;
found_msk = 0;
found_cnt = 0;
@@ -173,30 +172,31 @@ int test_hashmap_generic(void)
found_cnt++;
found_msk |= 1ULL << (long)k;
- if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+ if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
"failed to delete k/v %ld = %ld\n",
(long)k, (long)v))
- return 1;
- if (CHECK(oldk != k || oldv != v,
+ goto cleanup;
+ if (CHECK(oldk != k || oldv != v, "check_old",
"invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
(long)k, (long)v, (long)oldk, (long)oldv))
- return 1;
- if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+ goto cleanup;
+ if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
"unexpectedly deleted k/v %ld = %ld\n",
(long)oldk, (long)oldv))
- return 1;
+ goto cleanup;
}
- if (CHECK(!found_cnt || !found_msk,
+ if (CHECK(!found_cnt || !found_msk, "found_entries",
"didn't delete any key entries\n"))
- return 1;
- if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt,
+ goto cleanup;
+ if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt, "elem_cnt",
"invalid updated map size (already deleted: %d): %zu\n",
found_cnt, hashmap__size(map)))
- return 1;
+ goto cleanup;
if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+ "hashmap__capacity",
"unexpected map capacity: %zu\n", hashmap__capacity(map)))
- return 1;
+ goto cleanup;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
const void *oldk, *k;
@@ -208,53 +208,56 @@ int test_hashmap_generic(void)
found_cnt++;
found_msk |= 1ULL << (long)k;
- if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+ if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
"failed to delete k/v %ld = %ld\n",
(long)k, (long)v))
- return 1;
- if (CHECK(oldk != k || oldv != v,
+ goto cleanup;
+ if (CHECK(oldk != k || oldv != v, "elem_check",
"invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
(long)k, (long)v, (long)oldk, (long)oldv))
- return 1;
- if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+ goto cleanup;
+ if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
"unexpectedly deleted k/v %ld = %ld\n",
(long)k, (long)v))
- return 1;
+ goto cleanup;
}
if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1,
+ "found_cnt",
"not all keys were deleted: found_cnt:%d, found_msk:%llx\n",
found_cnt, found_msk))
- return 1;
- if (CHECK(hashmap__size(map) != 0,
+ goto cleanup;
+ if (CHECK(hashmap__size(map) != 0, "hashmap__size",
"invalid updated map size (already deleted: %d): %zu\n",
found_cnt, hashmap__size(map)))
- return 1;
+ goto cleanup;
found_cnt = 0;
hashmap__for_each_entry(map, entry, bkt) {
- CHECK(false, "unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
- return 1;
+ CHECK(false, "elem_exists",
+ "unexpected map entries left: %ld = %ld\n",
+ (long)entry->key, (long)entry->value);
+ goto cleanup;
}
- hashmap__free(map);
+ hashmap__clear(map);
hashmap__for_each_entry(map, entry, bkt) {
- CHECK(false, "unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
- return 1;
+ CHECK(false, "elem_exists",
+ "unexpected map entries left: %ld = %ld\n",
+ (long)entry->key, (long)entry->value);
+ goto cleanup;
}
- fprintf(stderr, "OK\n");
- return 0;
+cleanup:
+ hashmap__free(map);
}
-size_t collision_hash_fn(const void *k, void *ctx)
+static size_t collision_hash_fn(const void *k, void *ctx)
{
return 0;
}
-int test_hashmap_multimap(void)
+static void test_hashmap_multimap(void)
{
void *k1 = (void *)0, *k2 = (void *)1;
struct hashmap_entry *entry;
@@ -262,121 +265,116 @@ int test_hashmap_multimap(void)
long found_msk;
int err, bkt;
- fprintf(stderr, "%s: ", __func__);
-
/* force collisions */
map = hashmap__new(collision_hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
- return 1;
-
+ if (CHECK(IS_ERR(map), "hashmap__new",
+ "failed to create map: %ld\n", PTR_ERR(map)))
+ return;
/* set up multimap:
* [0] -> 1, 2, 4;
* [1] -> 8, 16, 32;
*/
err = hashmap__append(map, k1, (void *)1);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k1, (void *)2);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k1, (void *)4);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k2, (void *)8);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k2, (void *)16);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k2, (void *)32);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
- if (CHECK(hashmap__size(map) != 6,
+ if (CHECK(hashmap__size(map) != 6, "hashmap_size",
"invalid map size: %zu\n", hashmap__size(map)))
- return 1;
+ goto cleanup;
/* verify global iteration still works and sees all values */
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
found_msk |= (long)entry->value;
}
- if (CHECK(found_msk != (1 << 6) - 1,
+ if (CHECK(found_msk != (1 << 6) - 1, "found_msk",
"not all keys iterated: %lx\n", found_msk))
- return 1;
+ goto cleanup;
/* iterate values for key 1 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k1) {
found_msk |= (long)entry->value;
}
- if (CHECK(found_msk != (1 | 2 | 4),
+ if (CHECK(found_msk != (1 | 2 | 4), "found_msk",
"invalid k1 values: %lx\n", found_msk))
- return 1;
+ goto cleanup;
/* iterate values for key 2 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k2) {
found_msk |= (long)entry->value;
}
- if (CHECK(found_msk != (8 | 16 | 32),
+ if (CHECK(found_msk != (8 | 16 | 32), "found_msk",
"invalid k2 values: %lx\n", found_msk))
- return 1;
+ goto cleanup;
- fprintf(stderr, "OK\n");
- return 0;
+cleanup:
+ hashmap__free(map);
}
-int test_hashmap_empty()
+static void test_hashmap_empty()
{
struct hashmap_entry *entry;
int bkt;
struct hashmap *map;
void *k = (void *)0;
- fprintf(stderr, "%s: ", __func__);
-
/* force collisions */
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
- return 1;
+ if (CHECK(IS_ERR(map), "hashmap__new",
+ "failed to create map: %ld\n", PTR_ERR(map)))
+ goto cleanup;
- if (CHECK(hashmap__size(map) != 0,
+ if (CHECK(hashmap__size(map) != 0, "hashmap__size",
"invalid map size: %zu\n", hashmap__size(map)))
- return 1;
- if (CHECK(hashmap__capacity(map) != 0,
+ goto cleanup;
+ if (CHECK(hashmap__capacity(map) != 0, "hashmap__capacity",
"invalid map capacity: %zu\n", hashmap__capacity(map)))
- return 1;
- if (CHECK(hashmap__find(map, k, NULL), "unexpected find\n"))
- return 1;
- if (CHECK(hashmap__delete(map, k, NULL, NULL), "unexpected delete\n"))
- return 1;
+ goto cleanup;
+ if (CHECK(hashmap__find(map, k, NULL), "elem_find",
+ "unexpected find\n"))
+ goto cleanup;
+ if (CHECK(hashmap__delete(map, k, NULL, NULL), "elem_del",
+ "unexpected delete\n"))
+ goto cleanup;
hashmap__for_each_entry(map, entry, bkt) {
- CHECK(false, "unexpected iterated entry\n");
- return 1;
+ CHECK(false, "elem_found", "unexpected iterated entry\n");
+ goto cleanup;
}
hashmap__for_each_key_entry(map, entry, k) {
- CHECK(false, "unexpected key entry\n");
- return 1;
+ CHECK(false, "key_found", "unexpected key entry\n");
+ goto cleanup;
}
- fprintf(stderr, "OK\n");
- return 0;
+cleanup:
+ hashmap__free(map);
}
-int main(int argc, char **argv)
+void test_hashmap()
{
- bool failed = false;
-
- if (test_hashmap_generic())
- failed = true;
- if (test_hashmap_multimap())
- failed = true;
- if (test_hashmap_empty())
- failed = true;
-
- return failed;
+ if (test__start_subtest("generic"))
+ test_hashmap_generic();
+ if (test__start_subtest("multimap"))
+ test_hashmap_multimap();
+ if (test__start_subtest("empty"))
+ test_hashmap_empty();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 7507c8f689bc..42c3a3103c26 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
struct meta {
int ifindex;
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
new file mode 100644
index 000000000000..b295969b263b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_ksyms.skel.h"
+#include <sys/stat.h>
+
+static int duration;
+
+void test_ksyms(void)
+{
+ const char *btf_path = "/sys/kernel/btf/vmlinux";
+ struct test_ksyms *skel;
+ struct test_ksyms__data *data;
+ __u64 link_fops_addr, per_cpu_start_addr;
+ struct stat st;
+ __u64 btf_size;
+ int err;
+
+ err = kallsyms_find("bpf_link_fops", &link_fops_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
+ return;
+
+ err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
+ return;
+
+ if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
+ return;
+ btf_size = st.st_size;
+
+ skel = test_ksyms__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ return;
+
+ err = test_ksyms__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops",
+ "got 0x%llx, exp 0x%llx\n",
+ data->out__bpf_link_fops, link_fops_addr);
+ CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1",
+ "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
+ CHECK(data->out__btf_size != btf_size, "btf_size",
+ "got %llu, exp %llu\n", data->out__btf_size, btf_size);
+ CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start",
+ "got %llu, exp %llu\n", data->out__per_cpu_start,
+ per_cpu_start_addr);
+
+cleanup:
+ test_ksyms__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
new file mode 100644
index 000000000000..b58b775d19f3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "test_ksyms_btf.skel.h"
+#include "test_ksyms_btf_null_check.skel.h"
+
+static int duration;
+
+static void test_basic(void)
+{
+ __u64 runqueues_addr, bpf_prog_active_addr;
+ __u32 this_rq_cpu;
+ int this_bpf_prog_active;
+ struct test_ksyms_btf *skel = NULL;
+ struct test_ksyms_btf__data *data;
+ int err;
+
+ err = kallsyms_find("runqueues", &runqueues_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n"))
+ return;
+
+ err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
+ return;
+
+ skel = test_ksyms_btf__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ goto cleanup;
+
+ err = test_ksyms_btf__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr",
+ "got %llu, exp %llu\n",
+ (unsigned long long)data->out__runqueues_addr,
+ (unsigned long long)runqueues_addr);
+ CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr",
+ "got %llu, exp %llu\n",
+ (unsigned long long)data->out__bpf_prog_active_addr,
+ (unsigned long long)bpf_prog_active_addr);
+
+ CHECK(data->out__rq_cpu == -1, "rq_cpu",
+ "got %u, exp != -1\n", data->out__rq_cpu);
+ CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active",
+ "got %d, exp >= 0\n", data->out__bpf_prog_active);
+ CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu",
+ "got %u, exp 0\n", data->out__cpu_0_rq_cpu);
+
+ this_rq_cpu = data->out__this_rq_cpu;
+ CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu",
+ "got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu);
+
+ this_bpf_prog_active = data->out__this_bpf_prog_active;
+ CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active",
+ "got %d, exp %d\n", this_bpf_prog_active,
+ data->out__bpf_prog_active);
+
+cleanup:
+ test_ksyms_btf__destroy(skel);
+}
+
+static void test_null_check(void)
+{
+ struct test_ksyms_btf_null_check *skel;
+
+ skel = test_ksyms_btf_null_check__open_and_load();
+ CHECK(skel, "skel_open", "unexpected load of a prog missing null check\n");
+
+ test_ksyms_btf_null_check__destroy(skel);
+}
+
+void test_ksyms_btf(void)
+{
+ int percpu_datasec;
+ struct btf *btf;
+
+ btf = libbpf_find_kernel_btf();
+ if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
+ PTR_ERR(btf)))
+ return;
+
+ percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
+ BTF_KIND_DATASEC);
+ btf__free(btf);
+ if (percpu_datasec < 0) {
+ printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("basic"))
+ test_basic();
+
+ if (test__start_subtest("null_check"))
+ test_null_check();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index eaf64595be88..8073105548ff 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
static void test_l4lb(const char *file)
{
@@ -79,9 +80,8 @@ out:
void test_l4lb_all(void)
{
- const char *file1 = "./test_l4lb.o";
- const char *file2 = "./test_l4lb_noinline.o";
-
- test_l4lb(file1);
- test_l4lb(file2);
+ if (test__start_subtest("l4lb_inline"))
+ test_l4lb("test_l4lb.o");
+ if (test__start_subtest("l4lb_noinline"))
+ test_l4lb("test_l4lb_noinline.o");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
new file mode 100644
index 000000000000..5a2a689dbb68
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_load_bytes_relative(void)
+{
+ int server_fd, cgroup_fd, prog_fd, map_fd, client_fd;
+ int err;
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ struct bpf_map *test_result;
+ __u32 duration = 0;
+
+ __u32 map_key = 0;
+ __u32 map_value = 0;
+
+ cgroup_fd = test__join_cgroup("/load_bytes_relative");
+ if (CHECK_FAIL(cgroup_fd < 0))
+ return;
+
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+
+ err = bpf_prog_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ goto close_server_fd;
+
+ test_result = bpf_object__find_map_by_name(obj, "test_result");
+ if (CHECK_FAIL(!test_result))
+ goto close_bpf_object;
+
+ map_fd = bpf_map__fd(test_result);
+ if (map_fd < 0)
+ goto close_bpf_object;
+
+ prog = bpf_object__find_program_by_name(obj, "load_bytes_relative");
+ if (CHECK_FAIL(!prog))
+ goto close_bpf_object;
+
+ err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI);
+ if (CHECK_FAIL(err))
+ goto close_bpf_object;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (CHECK_FAIL(client_fd < 0))
+ goto close_bpf_object;
+ close(client_fd);
+
+ err = bpf_map_lookup_elem(map_fd, &map_key, &map_value);
+ if (CHECK_FAIL(err))
+ goto close_bpf_object;
+
+ CHECK(map_value != 1, "bpf", "bpf program returned failure");
+
+close_bpf_object:
+ bpf_object__close(obj);
+
+close_server_fd:
+ close(server_fd);
+
+close_cgroup_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index 8f91f1881d11..ce17b1ed8709 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -1,5 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
+
+static void *spin_lock_thread(void *arg)
+{
+ __u32 duration, retval;
+ int err, prog_fd = *(u32 *) arg;
+
+ err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+ pthread_exit(arg);
+}
static void *parallel_map_access(void *arg)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
new file mode 100644
index 000000000000..c230a573c373
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_ptr_kern.skel.h"
+
+void test_map_ptr(void)
+{
+ struct map_ptr_kern *skel;
+ __u32 duration = 0, retval;
+ char buf[128];
+ int err;
+
+ skel = map_ptr_kern__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ return;
+
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
+ sizeof(pkt_v4), buf, NULL, &retval, NULL);
+
+ if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
+ goto cleanup;
+
+ if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval,
+ skel->bss->g_map_type, skel->bss->g_line))
+ goto cleanup;
+
+cleanup:
+ map_ptr_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c
new file mode 100644
index 000000000000..2c53eade88e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/metadata.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "metadata_unused.skel.h"
+#include "metadata_used.skel.h"
+
+static int duration;
+
+static int prog_holds_map(int prog_fd, int map_fd)
+{
+ struct bpf_prog_info prog_info = {};
+ struct bpf_prog_info map_info = {};
+ __u32 prog_info_len;
+ __u32 map_info_len;
+ __u32 *map_ids;
+ int nr_maps;
+ int ret;
+ int i;
+
+ map_info_len = sizeof(map_info);
+ ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+ if (ret)
+ return -errno;
+
+ prog_info_len = sizeof(prog_info);
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ return -errno;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32));
+ if (!map_ids)
+ return -ENOMEM;
+
+ nr_maps = prog_info.nr_map_ids;
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.nr_map_ids = nr_maps;
+ prog_info.map_ids = ptr_to_u64(map_ids);
+ prog_info_len = sizeof(prog_info);
+
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret) {
+ ret = -errno;
+ goto free_map_ids;
+ }
+
+ ret = -ENOENT;
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ if (map_ids[i] == map_info.id) {
+ ret = 0;
+ break;
+ }
+ }
+
+free_map_ids:
+ free(map_ids);
+ return ret;
+}
+
+static void test_metadata_unused(void)
+{
+ struct metadata_unused *obj;
+ int err;
+
+ obj = metadata_unused__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata));
+ if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+ return;
+
+ /* Assert that we can access the metadata in skel and the values are
+ * what we expect.
+ */
+ if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "foo",
+ sizeof(obj->rodata->bpf_metadata_a)),
+ "bpf_metadata_a", "expected \"foo\", value differ"))
+ goto close_bpf_object;
+ if (CHECK(obj->rodata->bpf_metadata_b != 1, "bpf_metadata_b",
+ "expected 1, got %d", obj->rodata->bpf_metadata_b))
+ goto close_bpf_object;
+
+ /* Assert that binding metadata map to prog again succeeds. */
+ err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata), NULL);
+ CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+ metadata_unused__destroy(obj);
+}
+
+static void test_metadata_used(void)
+{
+ struct metadata_used *obj;
+ int err;
+
+ obj = metadata_used__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata));
+ if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+ return;
+
+ /* Assert that we can access the metadata in skel and the values are
+ * what we expect.
+ */
+ if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "bar",
+ sizeof(obj->rodata->bpf_metadata_a)),
+ "metadata_a", "expected \"bar\", value differ"))
+ goto close_bpf_object;
+ if (CHECK(obj->rodata->bpf_metadata_b != 2, "metadata_b",
+ "expected 2, got %d", obj->rodata->bpf_metadata_b))
+ goto close_bpf_object;
+
+ /* Assert that binding metadata map to prog again succeeds. */
+ err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata), NULL);
+ CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+ metadata_used__destroy(obj);
+}
+
+void test_metadata(void)
+{
+ if (test__start_subtest("unused"))
+ test_metadata_unused();
+
+ if (test__start_subtest("used"))
+ test_metadata_used();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 43d0b5578f46..9c3c5c0f068f 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -21,7 +21,7 @@ void test_mmap(void)
const long page_size = sysconf(_SC_PAGE_SIZE);
int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd, rdmap_fd;
struct bpf_map *data_map, *bss_map;
- void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
+ void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp0, *tmp1, *tmp2;
struct test_mmap__bss *bss_data;
struct bpf_map_info map_info;
__u32 map_info_sz = sizeof(map_info);
@@ -183,16 +183,23 @@ void test_mmap(void)
/* check some more advanced mmap() manipulations */
+ tmp0 = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS,
+ -1, 0);
+ if (CHECK(tmp0 == MAP_FAILED, "adv_mmap0", "errno %d\n", errno))
+ goto cleanup;
+
/* map all but last page: pages 1-3 mapped */
- tmp1 = mmap(NULL, 3 * page_size, PROT_READ, MAP_SHARED,
+ tmp1 = mmap(tmp0, 3 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
data_map_fd, 0);
- if (CHECK(tmp1 == MAP_FAILED, "adv_mmap1", "errno %d\n", errno))
+ if (CHECK(tmp0 != tmp1, "adv_mmap1", "tmp0: %p, tmp1: %p\n", tmp0, tmp1)) {
+ munmap(tmp0, 4 * page_size);
goto cleanup;
+ }
/* unmap second page: pages 1, 3 mapped */
err = munmap(tmp1 + page_size, page_size);
if (CHECK(err, "adv_mmap2", "errno %d\n", errno)) {
- munmap(tmp1, map_sz);
+ munmap(tmp1, 4 * page_size);
goto cleanup;
}
@@ -201,7 +208,7 @@ void test_mmap(void)
MAP_SHARED | MAP_FIXED, data_map_fd, 0);
if (CHECK(tmp2 == MAP_FAILED, "adv_mmap3", "errno %d\n", errno)) {
munmap(tmp1, page_size);
- munmap(tmp1 + 2*page_size, page_size);
+ munmap(tmp1 + 2*page_size, 2 * page_size);
goto cleanup;
}
CHECK(tmp1 + page_size != tmp2, "adv_mmap4",
@@ -211,7 +218,7 @@ void test_mmap(void)
tmp2 = mmap(tmp1, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
data_map_fd, 0);
if (CHECK(tmp2 == MAP_FAILED, "adv_mmap5", "errno %d\n", errno)) {
- munmap(tmp1, 3 * page_size); /* unmap page 1 */
+ munmap(tmp1, 4 * page_size); /* unmap page 1 */
goto cleanup;
}
CHECK(tmp1 != tmp2, "adv_mmap6", "tmp1: %p, tmp2: %p\n", tmp1, tmp2);
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 542240e16564..e74dc501b27f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -80,9 +80,6 @@ void test_ns_current_pid_tgid(void)
"User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
goto cleanup;
cleanup:
- if (!link) {
- bpf_link__destroy(link);
- link = NULL;
- }
+ bpf_link__destroy(link);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c
new file mode 100644
index 000000000000..673d38395253
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "test_pe_preserve_elems.skel.h"
+
+static int duration;
+
+static void test_one_map(struct bpf_map *map, struct bpf_program *prog,
+ bool has_share_pe)
+{
+ int err, key = 0, pfd = -1, mfd = bpf_map__fd(map);
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct perf_event_attr attr = {
+ .size = sizeof(struct perf_event_attr),
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ };
+
+ pfd = syscall(__NR_perf_event_open, &attr, 0 /* pid */,
+ -1 /* cpu 0 */, -1 /* group id */, 0 /* flags */);
+ if (CHECK(pfd < 0, "perf_event_open", "failed\n"))
+ return;
+
+ err = bpf_map_update_elem(mfd, &key, &pfd, BPF_ANY);
+ close(pfd);
+ if (CHECK(err < 0, "bpf_map_update_elem", "failed\n"))
+ return;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+ return;
+ if (CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+ "failed with %d\n", opts.retval))
+ return;
+
+ /* closing mfd, prog still holds a reference on map */
+ close(mfd);
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+ return;
+
+ if (has_share_pe) {
+ CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+ "failed with %d\n", opts.retval);
+ } else {
+ CHECK(opts.retval != -ENOENT, "bpf_perf_event_read_value",
+ "should have failed with %d, but got %d\n", -ENOENT,
+ opts.retval);
+ }
+}
+
+void test_pe_preserve_elems(void)
+{
+ struct test_pe_preserve_elems *skel;
+
+ skel = test_pe_preserve_elems__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ test_one_map(skel->maps.array_1, skel->progs.read_array_1, false);
+ test_one_map(skel->maps.array_2, skel->progs.read_array_2, true);
+
+ test_pe_preserve_elems__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 1450ea2dd4cc..ca9f0895ec84 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -4,8 +4,16 @@
#include <sched.h>
#include <sys/socket.h>
#include <test_progs.h>
+#include "test_perf_buffer.skel.h"
#include "bpf/libbpf_internal.h"
+static int duration;
+
+/* AddressSanitizer sometimes crashes due to data dereference below, due to
+ * this being mmap()'ed memory. Disable instrumentation with
+ * no_sanitize_address attribute
+ */
+__attribute__((no_sanitize_address))
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
int cpu_data = *(int *)data, duration = 0;
@@ -18,18 +26,31 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
CPU_SET(cpu, cpu_seen);
}
+int trigger_on_cpu(int cpu)
+{
+ cpu_set_t cpu_set;
+ int err;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(cpu, &cpu_set);
+
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", cpu, err))
+ return err;
+
+ usleep(1);
+
+ return 0;
+}
+
void test_perf_buffer(void)
{
- int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
- const char *prog_name = "kprobe/sys_nanosleep";
- const char *file = "./test_perf_buffer.o";
+ int err, on_len, nr_on_cpus = 0, nr_cpus, i;
struct perf_buffer_opts pb_opts = {};
- struct bpf_map *perf_buf_map;
- cpu_set_t cpu_set, cpu_seen;
- struct bpf_program *prog;
- struct bpf_object *obj;
+ struct test_perf_buffer *skel;
+ cpu_set_t cpu_seen;
struct perf_buffer *pb;
- struct bpf_link *link;
+ int last_fd = -1, fd;
bool *online;
nr_cpus = libbpf_num_possible_cpus();
@@ -46,33 +67,24 @@ void test_perf_buffer(void)
nr_on_cpus++;
/* load program */
- err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
- if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) {
- obj = NULL;
- goto out_close;
- }
-
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
+ skel = test_perf_buffer__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
goto out_close;
- /* load map */
- perf_buf_map = bpf_object__find_map_by_name(obj, "perf_buf_map");
- if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
- goto out_close;
-
- /* attach kprobe */
- link = bpf_program__attach_kprobe(prog, false /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+ /* attach probe */
+ err = test_perf_buffer__attach(skel);
+ if (CHECK(err, "attach_kprobe", "err %d\n", err))
goto out_close;
/* set up perf buffer */
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &cpu_seen;
- pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
- goto out_detach;
+ goto out_close;
+
+ CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
+ "bad fd: %d\n", perf_buffer__epoll_fd(pb));
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
@@ -82,16 +94,8 @@ void test_perf_buffer(void)
continue;
}
- CPU_ZERO(&cpu_set);
- CPU_SET(i, &cpu_set);
-
- err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
- &cpu_set);
- if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
- i, err))
- goto out_detach;
-
- usleep(1);
+ if (trigger_on_cpu(i))
+ goto out_close;
}
/* read perf buffer */
@@ -103,11 +107,37 @@ void test_perf_buffer(void)
"expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;
+ if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt",
+ "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus))
+ goto out_close;
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (i >= on_len || !online[i])
+ continue;
+
+ fd = perf_buffer__buffer_fd(pb, i);
+ CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd);
+ last_fd = fd;
+
+ err = perf_buffer__consume_buffer(pb, i);
+ if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err))
+ goto out_close;
+
+ CPU_CLR(i, &cpu_seen);
+ if (trigger_on_cpu(i))
+ goto out_close;
+
+ err = perf_buffer__consume_buffer(pb, i);
+ if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err))
+ goto out_close;
+
+ if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i))
+ goto out_close;
+ }
+
out_free_pb:
perf_buffer__free(pb);
-out_detach:
- bpf_link__destroy(link);
out_close:
- bpf_object__close(obj);
+ test_perf_buffer__destroy(skel);
free(online);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
new file mode 100644
index 000000000000..72c3690844fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "perf_event_stackmap.skel.h"
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+noinline int func_1(void)
+{
+ static int val = 1;
+
+ val += 1;
+
+ usleep(100);
+ return val;
+}
+
+noinline int func_2(void)
+{
+ return func_1();
+}
+
+noinline int func_3(void)
+{
+ return func_2();
+}
+
+noinline int func_4(void)
+{
+ return func_3();
+}
+
+noinline int func_5(void)
+{
+ return func_4();
+}
+
+noinline int func_6(void)
+{
+ int i, val = 1;
+
+ for (i = 0; i < 100; i++)
+ val += func_5();
+
+ return val;
+}
+
+void test_perf_event_stackmap(void)
+{
+ struct perf_event_attr attr = {
+ /* .type = PERF_TYPE_SOFTWARE, */
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .precise_ip = 2,
+ .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK |
+ PERF_SAMPLE_CALLCHAIN,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_CALL_STACK,
+ .sample_period = 5000,
+ .size = sizeof(struct perf_event_attr),
+ };
+ struct perf_event_stackmap *skel;
+ __u32 duration = 0;
+ cpu_set_t cpu_set;
+ int pmu_fd, err;
+
+ skel = perf_event_stackmap__open();
+
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ return;
+
+ err = perf_event_stackmap__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno))
+ goto cleanup;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (pmu_fd < 0) {
+ printf("%s:SKIP:cpu doesn't support the event\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
+ "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ close(pmu_fd);
+ goto cleanup;
+ }
+
+ /* create kernel and user stack traces for testing */
+ func_6();
+
+ CHECK(skel->data->stackid_kernel != 2, "get_stackid_kernel", "failed\n");
+ CHECK(skel->data->stackid_user != 2, "get_stackid_user", "failed\n");
+ CHECK(skel->data->stack_kernel != 2, "get_stack_kernel", "failed\n");
+ CHECK(skel->data->stack_user != 2, "get_stack_user", "failed\n");
+
+cleanup:
+ perf_event_stackmap__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index 041952524c55..fcf54b3a1dd0 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -37,7 +37,7 @@ void test_pinning(void)
struct stat statbuf = {};
struct bpf_object *obj;
struct bpf_map *map;
- int err;
+ int err, map_fd;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
.pin_root_path = custpath,
);
@@ -213,6 +213,53 @@ void test_pinning(void)
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
goto out;
+ /* remove the custom pin path to re-test it with reuse fd below */
+ err = unlink(custpinpath);
+ if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
+ goto out;
+
+ err = rmdir(custpath);
+ if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
+ goto out;
+
+ bpf_object__close(obj);
+
+ /* test pinning at custom path with reuse fd */
+ obj = bpf_object__open_file(file, NULL);
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
+ obj = NULL;
+ goto out;
+ }
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32),
+ sizeof(__u64), 1, 0);
+ if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd))
+ goto out;
+
+ map = bpf_object__find_map_by_name(obj, "pinmap");
+ if (CHECK(!map, "find map", "NULL map"))
+ goto close_map_fd;
+
+ err = bpf_map__reuse_fd(map, map_fd);
+ if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+ err = bpf_map__set_pin_path(map, custpinpath);
+ if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+ /* check that pinmap was pinned at the custom path */
+ err = stat(custpinpath, &statbuf);
+ if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+close_map_fd:
+ close(map_fd);
out:
unlink(pinpath);
unlink(nopinpath);
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index a2537dfa899c..44b514fabccd 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_pkt_access(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index 5f7aea605019..939015cd6dba 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_pkt_md_access(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 5dd89b941f53..935a294f049a 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_prog_run_xattr(void)
{
@@ -27,7 +28,7 @@ void test_prog_run_xattr(void)
"err %d errno %d retval %d\n", err, errno, tattr.retval);
CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
- "incorrect output size, want %lu have %u\n",
+ "incorrect output size, want %zu have %u\n",
sizeof(pkt_v4), tattr.data_size_out);
CHECK_ATTR(buf[5] != 0, "overflow",
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index faccc66f4e39..f47e7b1cb32c 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
enum {
QUEUE,
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
new file mode 100644
index 000000000000..c5fb191874ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "bpf/libbpf_internal.h"
+#include "test_raw_tp_test_run.skel.h"
+
+static int duration;
+
+void test_raw_tp_test_run(void)
+{
+ struct bpf_prog_test_run_attr test_attr = {};
+ int comm_fd = -1, err, nr_online, i, prog_fd;
+ __u64 args[2] = {0x1234ULL, 0x5678ULL};
+ int expected_retval = 0x1234 + 0x5678;
+ struct test_raw_tp_test_run *skel;
+ char buf[] = "new_name";
+ bool *online = NULL;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ );
+
+ err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online,
+ &nr_online);
+ if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err))
+ return;
+
+ skel = test_raw_tp_test_run__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ goto cleanup;
+
+ err = test_raw_tp_test_run__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+ if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno))
+ goto cleanup;
+
+ err = write(comm_fd, buf, sizeof(buf));
+ CHECK(err < 0, "task rename", "err %d", errno);
+
+ CHECK(skel->bss->count == 0, "check_count", "didn't increase\n");
+ CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n");
+
+ prog_fd = bpf_program__fd(skel->progs.rename);
+ test_attr.prog_fd = prog_fd;
+ test_attr.ctx_in = args;
+ test_attr.ctx_size_in = sizeof(__u64);
+
+ err = bpf_prog_test_run_xattr(&test_attr);
+ CHECK(err == 0, "test_run", "should fail for too small ctx\n");
+
+ test_attr.ctx_size_in = sizeof(args);
+ err = bpf_prog_test_run_xattr(&test_attr);
+ CHECK(err < 0, "test_run", "err %d\n", errno);
+ CHECK(test_attr.retval != expected_retval, "check_retval",
+ "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval);
+
+ for (i = 0; i < nr_online; i++) {
+ if (!online[i])
+ continue;
+
+ opts.cpu = i;
+ opts.retval = 0;
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ CHECK(err < 0, "test_run_opts", "err %d\n", errno);
+ CHECK(skel->data->on_cpu != i, "check_on_cpu",
+ "expect %d got %d\n", i, skel->data->on_cpu);
+ CHECK(opts.retval != expected_retval,
+ "check_retval", "expect 0x%x, got 0x%x\n",
+ expected_retval, opts.retval);
+ }
+
+ /* invalid cpu ID should fail with ENXIO */
+ opts.cpu = 0xffffffff;
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ CHECK(err != -1 || errno != ENXIO,
+ "test_run_opts_fail",
+ "should failed with ENXIO\n");
+
+ /* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */
+ opts.cpu = 1;
+ opts.flags = 0;
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ CHECK(err != -1 || errno != EINVAL,
+ "test_run_opts_fail",
+ "should failed with EINVAL\n");
+
+cleanup:
+ close(comm_fd);
+ test_raw_tp_test_run__destroy(skel);
+ free(online);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index fc0d7f4f02cf..ac1ee10cffd8 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -27,7 +27,7 @@ void test_reference_tracking(void)
const char *title;
/* Ignore .text sections */
- title = bpf_program__title(prog, false);
+ title = bpf_program__section_name(prog);
if (strstr(title, ".text") != NULL)
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
new file mode 100644
index 000000000000..6ace5e9efec1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/err.h>
+#include <string.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <linux/kernel.h>
+#define CONFIG_DEBUG_INFO_BTF
+#include <linux/btf_ids.h>
+#include "test_progs.h"
+
+static int duration;
+
+struct symbol {
+ const char *name;
+ int type;
+ int id;
+};
+
+struct symbol test_symbols[] = {
+ { "unused", BTF_KIND_UNKN, 0 },
+ { "S", BTF_KIND_TYPEDEF, -1 },
+ { "T", BTF_KIND_TYPEDEF, -1 },
+ { "U", BTF_KIND_TYPEDEF, -1 },
+ { "S", BTF_KIND_STRUCT, -1 },
+ { "U", BTF_KIND_UNION, -1 },
+ { "func", BTF_KIND_FUNC, -1 },
+};
+
+/* Align the .BTF_ids section to 4 bytes */
+asm (
+".pushsection " BTF_IDS_SECTION " ,\"a\"; \n"
+".balign 4, 0; \n"
+".popsection; \n");
+
+BTF_ID_LIST(test_list_local)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+
+extern __u32 test_list_global[];
+BTF_ID_LIST_GLOBAL(test_list_global)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+
+BTF_SET_START(test_set)
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+BTF_SET_END(test_set)
+
+static int
+__resolve_symbol(struct btf *btf, int type_id)
+{
+ const struct btf_type *type;
+ const char *str;
+ unsigned int i;
+
+ type = btf__type_by_id(btf, type_id);
+ if (!type) {
+ PRINT_FAIL("Failed to get type for ID %d\n", type_id);
+ return -1;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
+ if (test_symbols[i].id != -1)
+ continue;
+
+ if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
+ continue;
+
+ str = btf__name_by_offset(btf, type->name_off);
+ if (!str) {
+ PRINT_FAIL("Failed to get name for BTF ID %d\n", type_id);
+ return -1;
+ }
+
+ if (!strcmp(str, test_symbols[i].name))
+ test_symbols[i].id = type_id;
+ }
+
+ return 0;
+}
+
+static int resolve_symbols(void)
+{
+ struct btf *btf;
+ int type_id;
+ __u32 nr;
+
+ btf = btf__parse_elf("btf_data.o", NULL);
+ if (CHECK(libbpf_get_error(btf), "resolve",
+ "Failed to load BTF from btf_data.o\n"))
+ return -1;
+
+ nr = btf__get_nr_types(btf);
+
+ for (type_id = 1; type_id <= nr; type_id++) {
+ if (__resolve_symbol(btf, type_id))
+ break;
+ }
+
+ btf__free(btf);
+ return 0;
+}
+
+int test_resolve_btfids(void)
+{
+ __u32 *test_list, *test_lists[] = { test_list_local, test_list_global };
+ unsigned int i, j;
+ int ret = 0;
+
+ if (resolve_symbols())
+ return -1;
+
+ /* Check BTF_ID_LIST(test_list_local) and
+ * BTF_ID_LIST_GLOBAL(test_list_global) IDs
+ */
+ for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
+ test_list = test_lists[j];
+ for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
+ ret = CHECK(test_list[i] != test_symbols[i].id,
+ "id_check",
+ "wrong ID for %s (%d != %d)\n",
+ test_symbols[i].name,
+ test_list[i], test_symbols[i].id);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* Check BTF_SET_START(test_set) IDs */
+ for (i = 0; i < test_set.cnt; i++) {
+ bool found = false;
+
+ for (j = 0; j < ARRAY_SIZE(test_symbols); j++) {
+ if (test_symbols[j].id != test_set.ids[i])
+ continue;
+ found = true;
+ break;
+ }
+
+ ret = CHECK(!found, "id_check",
+ "ID %d not found in test_symbols\n",
+ test_set.ids[i]);
+ if (ret)
+ break;
+
+ if (i > 0) {
+ ret = CHECK(test_set.ids[i - 1] > test_set.ids[i],
+ "sort_check",
+ "test_set is not sorted\n");
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
new file mode 100644
index 000000000000..c1650548433c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/sysinfo.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include "test_ringbuf.skel.h"
+
+#define EDONE 7777
+
+static int duration = 0;
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+static int sample_cnt;
+
+static void atomic_inc(int *cnt)
+{
+ __atomic_add_fetch(cnt, 1, __ATOMIC_SEQ_CST);
+}
+
+static int atomic_xchg(int *cnt, int val)
+{
+ return __atomic_exchange_n(cnt, val, __ATOMIC_SEQ_CST);
+}
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+ struct sample *s = data;
+
+ atomic_inc(&sample_cnt);
+
+ switch (s->seq) {
+ case 0:
+ CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+ 333L, s->value);
+ return 0;
+ case 1:
+ CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+ 777L, s->value);
+ return -EDONE;
+ default:
+ /* we don't care about the rest */
+ return 0;
+ }
+}
+
+static struct test_ringbuf *skel;
+static struct ring_buffer *ringbuf;
+
+static void trigger_samples()
+{
+ skel->bss->dropped = 0;
+ skel->bss->total = 0;
+ skel->bss->discarded = 0;
+
+ /* trigger exactly two samples */
+ skel->bss->value = 333;
+ syscall(__NR_getpgid);
+ skel->bss->value = 777;
+ syscall(__NR_getpgid);
+}
+
+static void *poll_thread(void *input)
+{
+ long timeout = (long)input;
+
+ return (void *)(long)ring_buffer__poll(ringbuf, timeout);
+}
+
+void test_ringbuf(void)
+{
+ const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
+ pthread_t thread;
+ long bg_ret = -1;
+ int err, cnt;
+
+ skel = test_ringbuf__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ return;
+
+ /* only trigger BPF program for current process */
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+ process_sample, NULL, NULL);
+ if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+ goto cleanup;
+
+ err = test_ringbuf__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+ goto cleanup;
+
+ trigger_samples();
+
+ /* 2 submitted + 1 discarded records */
+ CHECK(skel->bss->avail_data != 3 * rec_sz,
+ "err_avail_size", "exp %ld, got %ld\n",
+ 3L * rec_sz, skel->bss->avail_data);
+ CHECK(skel->bss->ring_size != 4096,
+ "err_ring_size", "exp %ld, got %ld\n",
+ 4096L, skel->bss->ring_size);
+ CHECK(skel->bss->cons_pos != 0,
+ "err_cons_pos", "exp %ld, got %ld\n",
+ 0L, skel->bss->cons_pos);
+ CHECK(skel->bss->prod_pos != 3 * rec_sz,
+ "err_prod_pos", "exp %ld, got %ld\n",
+ 3L * rec_sz, skel->bss->prod_pos);
+
+ /* poll for samples */
+ err = ring_buffer__poll(ringbuf, -1);
+
+ /* -EDONE is used as an indicator that we are done */
+ if (CHECK(err != -EDONE, "err_done", "done err: %d\n", err))
+ goto cleanup;
+ cnt = atomic_xchg(&sample_cnt, 0);
+ CHECK(cnt != 2, "cnt", "exp %d samples, got %d\n", 2, cnt);
+
+ /* we expect extra polling to return nothing */
+ err = ring_buffer__poll(ringbuf, 0);
+ if (CHECK(err != 0, "extra_samples", "poll result: %d\n", err))
+ goto cleanup;
+ cnt = atomic_xchg(&sample_cnt, 0);
+ CHECK(cnt != 0, "cnt", "exp %d samples, got %d\n", 0, cnt);
+
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+ CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+ 1L, skel->bss->discarded);
+
+ /* now validate consumer position is updated and returned */
+ trigger_samples();
+ CHECK(skel->bss->cons_pos != 3 * rec_sz,
+ "err_cons_pos", "exp %ld, got %ld\n",
+ 3L * rec_sz, skel->bss->cons_pos);
+ err = ring_buffer__poll(ringbuf, -1);
+ CHECK(err <= 0, "poll_err", "err %d\n", err);
+ cnt = atomic_xchg(&sample_cnt, 0);
+ CHECK(cnt != 2, "cnt", "exp %d samples, got %d\n", 2, cnt);
+
+ /* start poll in background w/ long timeout */
+ err = pthread_create(&thread, NULL, poll_thread, (void *)(long)10000);
+ if (CHECK(err, "bg_poll", "pthread_create failed: %d\n", err))
+ goto cleanup;
+
+ /* turn off notifications now */
+ skel->bss->flags = BPF_RB_NO_WAKEUP;
+
+ /* give background thread a bit of a time */
+ usleep(50000);
+ trigger_samples();
+ /* sleeping arbitrarily is bad, but no better way to know that
+ * epoll_wait() **DID NOT** unblock in background thread
+ */
+ usleep(50000);
+ /* background poll should still be blocked */
+ err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+ if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+ goto cleanup;
+
+ /* BPF side did everything right */
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+ CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+ 1L, skel->bss->discarded);
+ cnt = atomic_xchg(&sample_cnt, 0);
+ CHECK(cnt != 0, "cnt", "exp %d samples, got %d\n", 0, cnt);
+
+ /* clear flags to return to "adaptive" notification mode */
+ skel->bss->flags = 0;
+
+ /* produce new samples, no notification should be triggered, because
+ * consumer is now behind
+ */
+ trigger_samples();
+
+ /* background poll should still be blocked */
+ err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+ if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+ goto cleanup;
+
+ /* still no samples, because consumer is behind */
+ cnt = atomic_xchg(&sample_cnt, 0);
+ CHECK(cnt != 0, "cnt", "exp %d samples, got %d\n", 0, cnt);
+
+ skel->bss->dropped = 0;
+ skel->bss->total = 0;
+ skel->bss->discarded = 0;
+
+ skel->bss->value = 333;
+ syscall(__NR_getpgid);
+ /* now force notifications */
+ skel->bss->flags = BPF_RB_FORCE_WAKEUP;
+ skel->bss->value = 777;
+ syscall(__NR_getpgid);
+
+ /* now we should get a pending notification */
+ usleep(50000);
+ err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+ if (CHECK(err, "join_bg", "err %d\n", err))
+ goto cleanup;
+
+ if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret))
+ goto cleanup;
+
+ /* 3 rounds, 2 samples each */
+ cnt = atomic_xchg(&sample_cnt, 0);
+ CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt);
+
+ /* BPF side did everything right */
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+ CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+ 1L, skel->bss->discarded);
+
+ test_ringbuf__detach(skel);
+cleanup:
+ ring_buffer__free(ringbuf);
+ test_ringbuf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
new file mode 100644
index 000000000000..78e450609803
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/epoll.h>
+#include "test_ringbuf_multi.skel.h"
+
+static int duration = 0;
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+ int ring = (unsigned long)ctx;
+ struct sample *s = data;
+
+ switch (s->seq) {
+ case 0:
+ CHECK(ring != 1, "sample1_ring", "exp %d, got %d\n", 1, ring);
+ CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+ 333L, s->value);
+ break;
+ case 1:
+ CHECK(ring != 2, "sample2_ring", "exp %d, got %d\n", 2, ring);
+ CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+ 777L, s->value);
+ break;
+ default:
+ CHECK(true, "extra_sample", "unexpected sample seq %d, val %ld\n",
+ s->seq, s->value);
+ return -1;
+ }
+
+ return 0;
+}
+
+void test_ringbuf_multi(void)
+{
+ struct test_ringbuf_multi *skel;
+ struct ring_buffer *ringbuf;
+ int err;
+
+ skel = test_ringbuf_multi__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ return;
+
+ /* only trigger BPF program for current process */
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf1),
+ process_sample, (void *)(long)1, NULL);
+ if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+ goto cleanup;
+
+ err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2),
+ process_sample, (void *)(long)2);
+ if (CHECK(err, "ringbuf_add", "failed to add another ring\n"))
+ goto cleanup;
+
+ err = test_ringbuf_multi__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger few samples, some will be skipped */
+ skel->bss->target_ring = 0;
+ skel->bss->value = 333;
+ syscall(__NR_getpgid);
+
+ /* skipped, no ringbuf in slot 1 */
+ skel->bss->target_ring = 1;
+ skel->bss->value = 555;
+ syscall(__NR_getpgid);
+
+ skel->bss->target_ring = 2;
+ skel->bss->value = 777;
+ syscall(__NR_getpgid);
+
+ /* poll for samples, should get 2 ringbufs back */
+ err = ring_buffer__poll(ringbuf, -1);
+ if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err))
+ goto cleanup;
+
+ /* expect extra polling to return nothing */
+ err = ring_buffer__poll(ringbuf, 0);
+ if (CHECK(err < 0, "extra_samples", "poll result: %d\n", err))
+ goto cleanup;
+
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->skipped != 1, "err_skipped", "exp %ld, got %ld\n",
+ 1L, skel->bss->skipped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+
+cleanup:
+ ring_buffer__free(ringbuf);
+ test_ringbuf_multi__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 713167449c98..8b571890c57e 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -35,7 +35,7 @@ static struct sec_name_test tests[] = {
{-EINVAL, 0},
},
{"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} },
- {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
+ {"xdp", {0, BPF_PROG_TYPE_XDP, BPF_XDP}, {0, BPF_XDP} },
{"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
{"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
{"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 504abb7bfb95..7043e6ded0e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr,
close(pipe_p2c[1]); /* close write */
/* notify parent signal handler is installed */
- write(pipe_c2p[1], buf, 1);
+ CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
/* make sure parent enabled bpf program to send_signal */
- read(pipe_p2c[0], buf, 1);
+ CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
/* wait a little for signal handler */
sleep(1);
- if (sigusr1_received)
- write(pipe_c2p[1], "2", 1);
- else
- write(pipe_c2p[1], "0", 1);
+ buf[0] = sigusr1_received ? '2' : '0';
+ CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
/* wait for parent notification and exit */
- read(pipe_p2c[0], buf, 1);
+ CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
/* wait until child signal handler installed */
- read(pipe_c2p[0], buf, 1);
+ CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
/* trigger the bpf send_signal */
skel->bss->pid = pid;
@@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
skel->bss->signal_thread = signal_thread;
/* notify child that bpf program can send_signal now */
- write(pipe_p2c[1], buf, 1);
+ CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
@@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
CHECK(buf[0] != '2', test_name, "incorrect result\n");
/* notify child safe to exit */
- write(pipe_p2c[1], buf, 1);
+ CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
disable_pmu:
close(pmu_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index 996e808f43a2..dfcbddcbe4d3 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
static void sigalrm_handler(int s) {}
static struct sigaction sigalrm_action = {
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index d572e1a2c297..3a469099f30d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -20,6 +20,7 @@
#define CONNECT_PORT 4321
#define TEST_DADDR (0xC0A80203)
#define NS_SELF "/proc/self/ns/net"
+#define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map"
static const struct timeval timeo_sec = { .tv_sec = 3 };
static const size_t timeo_optlen = sizeof(timeo_sec);
@@ -48,7 +49,7 @@ configure_stack(void)
sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
"direct-action object-file ./test_sk_assign.o",
"section classifier/sk_assign_test",
- (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "");
+ (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
if (CHECK(system(tc_cmd), "BPF load failed;",
"run with -vv for more info\n"))
return false;
@@ -264,8 +265,10 @@ void test_sk_assign(void)
TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false),
TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
};
- int server = -1;
+ __s64 server = -1;
+ int server_map;
int self_net;
+ int i;
self_net = open(NS_SELF, O_RDONLY);
if (CHECK_FAIL(self_net < 0)) {
@@ -278,9 +281,17 @@ void test_sk_assign(void)
goto cleanup;
}
- for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
+ server_map = bpf_obj_get(SERVER_MAP_PATH);
+ if (CHECK_FAIL(server_map < 0)) {
+ perror("Unable to open " SERVER_MAP_PATH);
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
struct test_sk_cfg *test = &tests[i];
const struct sockaddr *addr;
+ const int zero = 0;
+ int err;
if (!test__start_subtest(test->name))
continue;
@@ -288,7 +299,13 @@ void test_sk_assign(void)
addr = (const struct sockaddr *)test->addr;
server = start_server(addr, test->len, test->type);
if (server == -1)
- goto cleanup;
+ goto close;
+
+ err = bpf_map_update_elem(server_map, &zero, &server, BPF_ANY);
+ if (CHECK_FAIL(err)) {
+ perror("Unable to update server_map");
+ goto close;
+ }
/* connect to unbound ports */
prepare_addr(test->addr, test->family, CONNECT_PORT,
@@ -302,7 +319,10 @@ void test_sk_assign(void)
close:
close(server);
+ close(server_map);
cleanup:
+ if (CHECK_FAIL(unlink(SERVER_MAP_PATH)))
+ perror("Unable to unlink " SERVER_MAP_PATH);
if (CHECK_FAIL(setns(self_net, CLONE_NEWNET)))
perror("Failed to setns("NS_SELF")");
close(self_net);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
new file mode 100644
index 000000000000..9ff0412e1fd3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -0,0 +1,1330 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+/*
+ * Test BPF attach point for INET socket lookup (BPF_SK_LOOKUP).
+ *
+ * Tests exercise:
+ * - attaching/detaching/querying programs to BPF_SK_LOOKUP hook,
+ * - redirecting socket lookup to a socket selected by BPF program,
+ * - failing a socket lookup on BPF program's request,
+ * - error scenarios for selecting a socket from BPF program,
+ * - accessing BPF program context,
+ * - attaching and running multiple BPF programs.
+ *
+ * Tests run in a dedicated network namespace.
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "test_progs.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "testing_helpers.h"
+#include "test_sk_lookup.skel.h"
+
+/* External (address, port) pairs the client sends packets to. */
+#define EXT_IP4 "127.0.0.1"
+#define EXT_IP6 "fd00::1"
+#define EXT_PORT 7007
+
+/* Internal (address, port) pairs the server listens/receives at. */
+#define INT_IP4 "127.0.0.2"
+#define INT_IP4_V6 "::ffff:127.0.0.2"
+#define INT_IP6 "fd00::2"
+#define INT_PORT 8008
+
+#define IO_TIMEOUT_SEC 3
+
+enum server {
+ SERVER_A = 0,
+ SERVER_B = 1,
+ MAX_SERVERS,
+};
+
+enum {
+ PROG1 = 0,
+ PROG2,
+};
+
+struct inet_addr {
+ const char *ip;
+ unsigned short port;
+};
+
+struct test {
+ const char *desc;
+ struct bpf_program *lookup_prog;
+ struct bpf_program *reuseport_prog;
+ struct bpf_map *sock_map;
+ int sotype;
+ struct inet_addr connect_to;
+ struct inet_addr listen_at;
+ enum server accept_on;
+ bool reuseport_has_conns; /* Add a connected socket to reuseport group */
+};
+
+static __u32 duration; /* for CHECK macro */
+
+static bool is_ipv6(const char *ip)
+{
+ return !!strchr(ip, ':');
+}
+
+static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog)
+{
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(reuseport_prog);
+ if (prog_fd < 0) {
+ errno = -prog_fd;
+ return -1;
+ }
+
+ err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static socklen_t inetaddr_len(const struct sockaddr_storage *addr)
+{
+ return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) :
+ addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0);
+}
+
+static int make_socket(int sotype, const char *ip, int port,
+ struct sockaddr_storage *addr)
+{
+ struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC };
+ int err, family, fd;
+
+ family = is_ipv6(ip) ? AF_INET6 : AF_INET;
+ err = make_sockaddr(family, ip, port, addr, NULL);
+ if (CHECK(err, "make_address", "failed\n"))
+ return -1;
+
+ fd = socket(addr->ss_family, sotype, 0);
+ if (CHECK(fd < 0, "socket", "failed\n")) {
+ log_err("failed to make socket");
+ return -1;
+ }
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+ if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) {
+ log_err("failed to set SNDTIMEO");
+ close(fd);
+ return -1;
+ }
+
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+ if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) {
+ log_err("failed to set RCVTIMEO");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int make_server(int sotype, const char *ip, int port,
+ struct bpf_program *reuseport_prog)
+{
+ struct sockaddr_storage addr = {0};
+ const int one = 1;
+ int err, fd = -1;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */
+ if (sotype == SOCK_DGRAM) {
+ err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) {
+ log_err("failed to enable IP_RECVORIGDSTADDR");
+ goto fail;
+ }
+ }
+
+ if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) {
+ err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) {
+ log_err("failed to enable IPV6_RECVORIGDSTADDR");
+ goto fail;
+ }
+ }
+
+ if (sotype == SOCK_STREAM) {
+ err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) {
+ log_err("failed to enable SO_REUSEADDR");
+ goto fail;
+ }
+ }
+
+ if (reuseport_prog) {
+ err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) {
+ log_err("failed to enable SO_REUSEPORT");
+ goto fail;
+ }
+ }
+
+ err = bind(fd, (void *)&addr, inetaddr_len(&addr));
+ if (CHECK(err, "bind", "failed\n")) {
+ log_err("failed to bind listen socket");
+ goto fail;
+ }
+
+ if (sotype == SOCK_STREAM) {
+ err = listen(fd, SOMAXCONN);
+ if (CHECK(err, "make_server", "listen")) {
+ log_err("failed to listen on port %d", port);
+ goto fail;
+ }
+ }
+
+ /* Late attach reuseport prog so we can have one init path */
+ if (reuseport_prog) {
+ err = attach_reuseport(fd, reuseport_prog);
+ if (CHECK(err, "attach_reuseport", "failed\n")) {
+ log_err("failed to attach reuseport prog");
+ goto fail;
+ }
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int make_client(int sotype, const char *ip, int port)
+{
+ struct sockaddr_storage addr = {0};
+ int err, fd;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ err = connect(fd, (void *)&addr, inetaddr_len(&addr));
+ if (CHECK(err, "make_client", "connect")) {
+ log_err("failed to connect client socket");
+ goto fail;
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int send_byte(int fd)
+{
+ ssize_t n;
+
+ errno = 0;
+ n = send(fd, "a", 1, 0);
+ if (CHECK(n <= 0, "send_byte", "send")) {
+ log_err("failed/partial send");
+ return -1;
+ }
+ return 0;
+}
+
+static int recv_byte(int fd)
+{
+ char buf[1];
+ ssize_t n;
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (CHECK(n <= 0, "recv_byte", "recv")) {
+ log_err("failed/partial recv");
+ return -1;
+ }
+ return 0;
+}
+
+static int tcp_recv_send(int server_fd)
+{
+ char buf[1];
+ int ret, fd;
+ ssize_t n;
+
+ fd = accept(server_fd, NULL, NULL);
+ if (CHECK(fd < 0, "accept", "failed\n")) {
+ log_err("failed to accept");
+ return -1;
+ }
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (CHECK(n <= 0, "recv", "failed\n")) {
+ log_err("failed/partial recv");
+ ret = -1;
+ goto close;
+ }
+
+ n = send(fd, buf, n, 0);
+ if (CHECK(n <= 0, "send", "failed\n")) {
+ log_err("failed/partial send");
+ ret = -1;
+ goto close;
+ }
+
+ ret = 0;
+close:
+ close(fd);
+ return ret;
+}
+
+static void v4_to_v6(struct sockaddr_storage *ss)
+{
+ struct sockaddr_in6 *v6 = (struct sockaddr_in6 *)ss;
+ struct sockaddr_in v4 = *(struct sockaddr_in *)ss;
+
+ v6->sin6_family = AF_INET6;
+ v6->sin6_port = v4.sin_port;
+ v6->sin6_addr.s6_addr[10] = 0xff;
+ v6->sin6_addr.s6_addr[11] = 0xff;
+ memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4);
+ memset(&v6->sin6_addr.s6_addr[0], 0, 10);
+}
+
+static int udp_recv_send(int server_fd)
+{
+ char cmsg_buf[CMSG_SPACE(sizeof(struct sockaddr_storage))];
+ struct sockaddr_storage _src_addr = { 0 };
+ struct sockaddr_storage *src_addr = &_src_addr;
+ struct sockaddr_storage *dst_addr = NULL;
+ struct msghdr msg = { 0 };
+ struct iovec iov = { 0 };
+ struct cmsghdr *cm;
+ char buf[1];
+ int ret, fd;
+ ssize_t n;
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ msg.msg_name = src_addr;
+ msg.msg_namelen = sizeof(*src_addr);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ errno = 0;
+ n = recvmsg(server_fd, &msg, 0);
+ if (CHECK(n <= 0, "recvmsg", "failed\n")) {
+ log_err("failed to receive");
+ return -1;
+ }
+ if (CHECK(msg.msg_flags & MSG_CTRUNC, "recvmsg", "truncated cmsg\n"))
+ return -1;
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_ORIGDSTADDR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_ORIGDSTADDR)) {
+ dst_addr = (struct sockaddr_storage *)CMSG_DATA(cm);
+ break;
+ }
+ log_err("warning: ignored cmsg at level %d type %d",
+ cm->cmsg_level, cm->cmsg_type);
+ }
+ if (CHECK(!dst_addr, "recvmsg", "missing ORIGDSTADDR\n"))
+ return -1;
+
+ /* Server socket bound to IPv4-mapped IPv6 address */
+ if (src_addr->ss_family == AF_INET6 &&
+ dst_addr->ss_family == AF_INET) {
+ v4_to_v6(dst_addr);
+ }
+
+ /* Reply from original destination address. */
+ fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0);
+ if (CHECK(fd < 0, "socket", "failed\n")) {
+ log_err("failed to create tx socket");
+ return -1;
+ }
+
+ ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr));
+ if (CHECK(ret, "bind", "failed\n")) {
+ log_err("failed to bind tx socket");
+ goto out;
+ }
+
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ n = sendmsg(fd, &msg, 0);
+ if (CHECK(n <= 0, "sendmsg", "failed\n")) {
+ log_err("failed to send echo reply");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ close(fd);
+ return ret;
+}
+
+static int tcp_echo_test(int client_fd, int server_fd)
+{
+ int err;
+
+ err = send_byte(client_fd);
+ if (err)
+ return -1;
+ err = tcp_recv_send(server_fd);
+ if (err)
+ return -1;
+ err = recv_byte(client_fd);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static int udp_echo_test(int client_fd, int server_fd)
+{
+ int err;
+
+ err = send_byte(client_fd);
+ if (err)
+ return -1;
+ err = udp_recv_send(server_fd);
+ if (err)
+ return -1;
+ err = recv_byte(client_fd);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ int net_fd;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open", "failed\n")) {
+ log_err("failed to open /proc/self/ns/net");
+ return NULL;
+ }
+
+ link = bpf_program__attach_netns(prog, net_fd);
+ if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+ errno = -PTR_ERR(link);
+ log_err("failed to attach program '%s' to netns",
+ bpf_program__name(prog));
+ link = NULL;
+ }
+
+ close(net_fd);
+ return link;
+}
+
+static int update_lookup_map(struct bpf_map *map, int index, int sock_fd)
+{
+ int err, map_fd;
+ uint64_t value;
+
+ map_fd = bpf_map__fd(map);
+ if (CHECK(map_fd < 0, "bpf_map__fd", "failed\n")) {
+ errno = -map_fd;
+ log_err("failed to get map FD");
+ return -1;
+ }
+
+ value = (uint64_t)sock_fd;
+ err = bpf_map_update_elem(map_fd, &index, &value, BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n")) {
+ log_err("failed to update redir_map @ %d", index);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void query_lookup_prog(struct test_sk_lookup *skel)
+{
+ struct bpf_link *link[3] = {};
+ struct bpf_link_info info;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+ __u32 prog_id;
+ int net_fd;
+ int err;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open", "failed\n")) {
+ log_err("failed to open /proc/self/ns/net");
+ return;
+ }
+
+ link[0] = attach_lookup_prog(skel->progs.lookup_pass);
+ if (!link[0])
+ goto close;
+ link[1] = attach_lookup_prog(skel->progs.lookup_pass);
+ if (!link[1])
+ goto detach;
+ link[2] = attach_lookup_prog(skel->progs.lookup_drop);
+ if (!link[2])
+ goto detach;
+
+ err = bpf_prog_query(net_fd, BPF_SK_LOOKUP, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ if (CHECK(err, "bpf_prog_query", "failed\n")) {
+ log_err("failed to query lookup prog");
+ goto detach;
+ }
+
+ errno = 0;
+ if (CHECK(attach_flags != 0, "bpf_prog_query",
+ "wrong attach_flags on query: %u", attach_flags))
+ goto detach;
+ if (CHECK(prog_cnt != 3, "bpf_prog_query",
+ "wrong program count on query: %u", prog_cnt))
+ goto detach;
+ prog_id = link_info_prog_id(link[0], &info);
+ CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+ "invalid program #0 id on query: %u != %u\n",
+ prog_ids[0], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[1], &info);
+ CHECK(prog_ids[1] != prog_id, "bpf_prog_query",
+ "invalid program #1 id on query: %u != %u\n",
+ prog_ids[1], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[2], &info);
+ CHECK(prog_ids[2] != prog_id, "bpf_prog_query",
+ "invalid program #2 id on query: %u != %u\n",
+ prog_ids[2], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+
+ err = bpf_link__detach(link[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto detach;
+
+ /* prog id is still there, but netns_ino is zeroed out */
+ prog_id = link_info_prog_id(link[0], &info);
+ CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+ "invalid program #0 id on query: %u != %u\n",
+ prog_ids[0], prog_id);
+ CHECK(info.netns.netns_ino != 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+
+detach:
+ if (link[2])
+ bpf_link__destroy(link[2]);
+ if (link[1])
+ bpf_link__destroy(link[1]);
+ if (link[0])
+ bpf_link__destroy(link[0]);
+close:
+ close(net_fd);
+}
+
+static void run_lookup_prog(const struct test *t)
+{
+ int server_fds[MAX_SERVERS] = { -1 };
+ int client_fd, reuse_conn_fd = -1;
+ struct bpf_link *lookup_link;
+ int i, err;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ server_fds[i] = make_server(t->sotype, t->listen_at.ip,
+ t->listen_at.port,
+ t->reuseport_prog);
+ if (server_fds[i] < 0)
+ goto close;
+
+ err = update_lookup_map(t->sock_map, i, server_fds[i]);
+ if (err)
+ goto close;
+
+ /* want just one server for non-reuseport test */
+ if (!t->reuseport_prog)
+ break;
+ }
+
+ /* Regular UDP socket lookup with reuseport behaves
+ * differently when reuseport group contains connected
+ * sockets. Check that adding a connected UDP socket to the
+ * reuseport group does not affect how reuseport works with
+ * BPF socket lookup.
+ */
+ if (t->reuseport_has_conns) {
+ struct sockaddr_storage addr = {};
+ socklen_t len = sizeof(addr);
+
+ /* Add an extra socket to reuseport group */
+ reuse_conn_fd = make_server(t->sotype, t->listen_at.ip,
+ t->listen_at.port,
+ t->reuseport_prog);
+ if (reuse_conn_fd < 0)
+ goto close;
+
+ /* Connect the extra socket to itself */
+ err = getsockname(reuse_conn_fd, (void *)&addr, &len);
+ if (CHECK(err, "getsockname", "errno %d\n", errno))
+ goto close;
+ err = connect(reuse_conn_fd, (void *)&addr, len);
+ if (CHECK(err, "connect", "errno %d\n", errno))
+ goto close;
+ }
+
+ client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port);
+ if (client_fd < 0)
+ goto close;
+
+ if (t->sotype == SOCK_STREAM)
+ tcp_echo_test(client_fd, server_fds[t->accept_on]);
+ else
+ udp_echo_test(client_fd, server_fds[t->accept_on]);
+
+ close(client_fd);
+close:
+ if (reuse_conn_fd != -1)
+ close(reuse_conn_fd);
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ if (server_fds[i] != -1)
+ close(server_fds[i]);
+ }
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_redirect_lookup(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv4 redir addr",
+ .lookup_prog = skel->progs.redir_ip4,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv4 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "TCP IPv4 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "TCP IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 redir addr",
+ .lookup_prog = skel->progs.redir_ip6,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv4->IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4_V6, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "TCP IPv6 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "UDP IPv4 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 redir addr",
+ .lookup_prog = skel->progs.redir_ip4,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "UDP IPv4 redir and reuseport with conns",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ .reuseport_has_conns = true,
+ },
+ {
+ .desc = "UDP IPv4 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "UDP IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 redir addr",
+ .lookup_prog = skel->progs.redir_ip6,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4->IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .listen_at = { INT_IP4_V6, INT_PORT },
+ .connect_to = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 redir and reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "UDP IPv6 redir and reuseport with conns",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ .reuseport_has_conns = true,
+ },
+ {
+ .desc = "UDP IPv6 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ run_lookup_prog(t);
+ }
+}
+
+static void drop_on_lookup(const struct test *t)
+{
+ struct sockaddr_storage dst = {};
+ int client_fd, server_fd, err;
+ struct bpf_link *lookup_link;
+ ssize_t n;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ server_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+ t->reuseport_prog);
+ if (server_fd < 0)
+ goto detach;
+
+ client_fd = make_socket(t->sotype, t->connect_to.ip,
+ t->connect_to.port, &dst);
+ if (client_fd < 0)
+ goto close_srv;
+
+ err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ if (t->sotype == SOCK_DGRAM) {
+ err = send_byte(client_fd);
+ if (err)
+ goto close_all;
+
+ /* Read out asynchronous error */
+ n = recv(client_fd, NULL, 0, 0);
+ err = n == -1;
+ }
+ if (CHECK(!err || errno != ECONNREFUSED, "connect",
+ "unexpected success or error\n"))
+ log_err("expected ECONNREFUSED on connect");
+
+close_all:
+ close(client_fd);
+close_srv:
+ close(server_fd);
+detach:
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_lookup(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ drop_on_lookup(t);
+ }
+}
+
+static void drop_on_reuseport(const struct test *t)
+{
+ struct sockaddr_storage dst = { 0 };
+ int client, server1, server2, err;
+ struct bpf_link *lookup_link;
+ ssize_t n;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ server1 = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+ t->reuseport_prog);
+ if (server1 < 0)
+ goto detach;
+
+ err = update_lookup_map(t->sock_map, SERVER_A, server1);
+ if (err)
+ goto detach;
+
+ /* second server on destination address we should never reach */
+ server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port,
+ NULL /* reuseport prog */);
+ if (server2 < 0)
+ goto close_srv1;
+
+ client = make_socket(t->sotype, t->connect_to.ip,
+ t->connect_to.port, &dst);
+ if (client < 0)
+ goto close_srv2;
+
+ err = connect(client, (void *)&dst, inetaddr_len(&dst));
+ if (t->sotype == SOCK_DGRAM) {
+ err = send_byte(client);
+ if (err)
+ goto close_all;
+
+ /* Read out asynchronous error */
+ n = recv(client, NULL, 0, 0);
+ err = n == -1;
+ }
+ if (CHECK(!err || errno != ECONNREFUSED, "connect",
+ "unexpected success or error\n"))
+ log_err("expected ECONNREFUSED on connect");
+
+close_all:
+ close(client);
+close_srv2:
+ close(server2);
+close_srv1:
+ close(server1);
+detach:
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_reuseport(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ drop_on_reuseport(t);
+ }
+}
+
+static void run_sk_assign(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog,
+ const char *listen_ip, const char *connect_ip)
+{
+ int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
+ struct bpf_link *lookup_link;
+ int i, err;
+
+ lookup_link = attach_lookup_prog(lookup_prog);
+ if (!lookup_link)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+ if (server_fds[i] < 0)
+ goto close_servers;
+
+ err = update_lookup_map(skel->maps.redir_map, i,
+ server_fds[i]);
+ if (err)
+ goto close_servers;
+ }
+
+ client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
+ if (client_fd < 0)
+ goto close_servers;
+
+ peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
+ if (CHECK(peer_fd < 0, "accept", "failed\n"))
+ goto close_client;
+
+ close(peer_fd);
+close_client:
+ close(client_fd);
+close_servers:
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ if (server_fds[i] != -1)
+ close(server_fds[i]);
+ }
+ bpf_link__destroy(lookup_link);
+}
+
+static void run_sk_assign_v4(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog)
+{
+ run_sk_assign(skel, lookup_prog, INT_IP4, EXT_IP4);
+}
+
+static void run_sk_assign_v6(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog)
+{
+ run_sk_assign(skel, lookup_prog, INT_IP6, EXT_IP6);
+}
+
+static void run_sk_assign_connected(struct test_sk_lookup *skel,
+ int sotype)
+{
+ int err, client_fd, connected_fd, server_fd;
+ struct bpf_link *lookup_link;
+
+ server_fd = make_server(sotype, EXT_IP4, EXT_PORT, NULL);
+ if (server_fd < 0)
+ return;
+
+ connected_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+ if (connected_fd < 0)
+ goto out_close_server;
+
+ /* Put a connected socket in redirect map */
+ err = update_lookup_map(skel->maps.redir_map, SERVER_A, connected_fd);
+ if (err)
+ goto out_close_connected;
+
+ lookup_link = attach_lookup_prog(skel->progs.sk_assign_esocknosupport);
+ if (!lookup_link)
+ goto out_close_connected;
+
+ /* Try to redirect TCP SYN / UDP packet to a connected socket */
+ client_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+ if (client_fd < 0)
+ goto out_unlink_prog;
+ if (sotype == SOCK_DGRAM) {
+ send_byte(client_fd);
+ recv_byte(server_fd);
+ }
+
+ close(client_fd);
+out_unlink_prog:
+ bpf_link__destroy(lookup_link);
+out_close_connected:
+ close(connected_fd);
+out_close_server:
+ close(server_fd);
+}
+
+static void test_sk_assign_helper(struct test_sk_lookup *skel)
+{
+ if (test__start_subtest("sk_assign returns EEXIST"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_eexist);
+ if (test__start_subtest("sk_assign honors F_REPLACE"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_replace_flag);
+ if (test__start_subtest("sk_assign accepts NULL socket"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_null);
+ if (test__start_subtest("access ctx->sk"))
+ run_sk_assign_v4(skel, skel->progs.access_ctx_sk);
+ if (test__start_subtest("narrow access to ctx v4"))
+ run_sk_assign_v4(skel, skel->progs.ctx_narrow_access);
+ if (test__start_subtest("narrow access to ctx v6"))
+ run_sk_assign_v6(skel, skel->progs.ctx_narrow_access);
+ if (test__start_subtest("sk_assign rejects TCP established"))
+ run_sk_assign_connected(skel, SOCK_STREAM);
+ if (test__start_subtest("sk_assign rejects UDP connected"))
+ run_sk_assign_connected(skel, SOCK_DGRAM);
+}
+
+struct test_multi_prog {
+ const char *desc;
+ struct bpf_program *prog1;
+ struct bpf_program *prog2;
+ struct bpf_map *redir_map;
+ struct bpf_map *run_map;
+ int expect_errno;
+ struct inet_addr listen_at;
+};
+
+static void run_multi_prog_lookup(const struct test_multi_prog *t)
+{
+ struct sockaddr_storage dst = {};
+ int map_fd, server_fd, client_fd;
+ struct bpf_link *link1, *link2;
+ int prog_idx, done, err;
+
+ map_fd = bpf_map__fd(t->run_map);
+
+ done = 0;
+ prog_idx = PROG1;
+ err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+ return;
+ prog_idx = PROG2;
+ err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+ return;
+
+ link1 = attach_lookup_prog(t->prog1);
+ if (!link1)
+ return;
+ link2 = attach_lookup_prog(t->prog2);
+ if (!link2)
+ goto out_unlink1;
+
+ server_fd = make_server(SOCK_STREAM, t->listen_at.ip,
+ t->listen_at.port, NULL);
+ if (server_fd < 0)
+ goto out_unlink2;
+
+ err = update_lookup_map(t->redir_map, SERVER_A, server_fd);
+ if (err)
+ goto out_close_server;
+
+ client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst);
+ if (client_fd < 0)
+ goto out_close_server;
+
+ err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ if (CHECK(err && !t->expect_errno, "connect",
+ "unexpected error %d\n", errno))
+ goto out_close_client;
+ if (CHECK(err && t->expect_errno && errno != t->expect_errno,
+ "connect", "unexpected error %d\n", errno))
+ goto out_close_client;
+
+ done = 0;
+ prog_idx = PROG1;
+ err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+ CHECK(err, "bpf_map_lookup_elem", "failed\n");
+ CHECK(!done, "bpf_map_lookup_elem", "PROG1 !done\n");
+
+ done = 0;
+ prog_idx = PROG2;
+ err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+ CHECK(err, "bpf_map_lookup_elem", "failed\n");
+ CHECK(!done, "bpf_map_lookup_elem", "PROG2 !done\n");
+
+out_close_client:
+ close(client_fd);
+out_close_server:
+ close(server_fd);
+out_unlink2:
+ bpf_link__destroy(link2);
+out_unlink1:
+ bpf_link__destroy(link1);
+}
+
+static void test_multi_prog_lookup(struct test_sk_lookup *skel)
+{
+ struct test_multi_prog tests[] = {
+ {
+ .desc = "multi prog - pass, pass",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "multi prog - drop, drop",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - pass, drop",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - drop, pass",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - pass, redir",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, pass",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - drop, redir",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, drop",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, redir",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ };
+ struct test_multi_prog *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ t->redir_map = skel->maps.redir_map;
+ t->run_map = skel->maps.run_map;
+ if (test__start_subtest(t->desc))
+ run_multi_prog_lookup(t);
+ }
+}
+
+static void run_tests(struct test_sk_lookup *skel)
+{
+ if (test__start_subtest("query lookup prog"))
+ query_lookup_prog(skel);
+ test_redirect_lookup(skel);
+ test_drop_on_lookup(skel);
+ test_drop_on_reuseport(skel);
+ test_sk_assign_helper(skel);
+ test_multi_prog_lookup(skel);
+}
+
+static int switch_netns(void)
+{
+ static const char * const setup_script[] = {
+ "ip -6 addr add dev lo " EXT_IP6 "/128",
+ "ip -6 addr add dev lo " INT_IP6 "/128",
+ "ip link set dev lo up",
+ NULL,
+ };
+ const char * const *cmd;
+ int err;
+
+ err = unshare(CLONE_NEWNET);
+ if (CHECK(err, "unshare", "failed\n")) {
+ log_err("unshare(CLONE_NEWNET)");
+ return -1;
+ }
+
+ for (cmd = setup_script; *cmd; cmd++) {
+ err = system(*cmd);
+ if (CHECK(err, "system", "failed\n")) {
+ log_err("system(%s)", *cmd);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+void test_sk_lookup(void)
+{
+ struct test_sk_lookup *skel;
+ int err;
+
+ err = switch_netns();
+ if (err)
+ return;
+
+ skel = test_sk_lookup__open_and_load();
+ if (CHECK(!skel, "skel open_and_load", "failed\n"))
+ return;
+
+ run_tests(skel);
+
+ test_sk_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 4538bd08203f..fafeddaad6a9 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_skb_ctx(void)
{
@@ -10,6 +11,7 @@ void test_skb_ctx(void)
.cb[3] = 4,
.cb[4] = 5,
.priority = 6,
+ .ifindex = 1,
.tstamp = 7,
.wire_len = 100,
.gso_segs = 8,
@@ -79,7 +81,7 @@ void test_skb_ctx(void)
CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
"ctx_size_out",
- "incorrect output size, want %lu have %u\n",
+ "incorrect output size, want %zu have %u\n",
sizeof(skb), tattr.ctx_size_out);
for (i = 0; i < 5; i++)
@@ -91,6 +93,10 @@ void test_skb_ctx(void)
"ctx_out_priority",
"skb->priority == %d, expected %d\n",
skb.priority, 7);
+ CHECK_ATTR(skb.ifindex != 1,
+ "ctx_out_ifindex",
+ "skb->ifindex == %d, expected %d\n",
+ skb.ifindex, 1);
CHECK_ATTR(skb.tstamp != 8,
"ctx_out_tstamp",
"skb->tstamp == %lld, expected %d\n",
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
new file mode 100644
index 000000000000..f302ad84a298
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_skb_helpers(void)
+{
+ struct __sk_buff skb = {
+ .wire_len = 100,
+ .gso_segs = 8,
+ .gso_size = 10,
+ };
+ struct bpf_prog_test_run_attr tattr = {
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ .ctx_out = &skb,
+ .ctx_size_out = sizeof(skb),
+ };
+ struct bpf_object *obj;
+ int err;
+
+ err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &tattr.prog_fd);
+ if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ return;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index 9264a2736018..fe87b77af459 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -15,6 +15,8 @@ void test_skeleton(void)
int duration = 0, err;
struct test_skeleton* skel;
struct test_skeleton__bss *bss;
+ struct test_skeleton__data *data;
+ struct test_skeleton__rodata *rodata;
struct test_skeleton__kconfig *kcfg;
skel = test_skeleton__open();
@@ -24,13 +26,45 @@ void test_skeleton(void)
if (CHECK(skel->kconfig, "skel_kconfig", "kconfig is mmaped()!\n"))
goto cleanup;
+ bss = skel->bss;
+ data = skel->data;
+ rodata = skel->rodata;
+
+ /* validate values are pre-initialized correctly */
+ CHECK(data->in1 != -1, "in1", "got %d != exp %d\n", data->in1, -1);
+ CHECK(data->out1 != -1, "out1", "got %d != exp %d\n", data->out1, -1);
+ CHECK(data->in2 != -1, "in2", "got %lld != exp %lld\n", data->in2, -1LL);
+ CHECK(data->out2 != -1, "out2", "got %lld != exp %lld\n", data->out2, -1LL);
+
+ CHECK(bss->in3 != 0, "in3", "got %d != exp %d\n", bss->in3, 0);
+ CHECK(bss->out3 != 0, "out3", "got %d != exp %d\n", bss->out3, 0);
+ CHECK(bss->in4 != 0, "in4", "got %lld != exp %lld\n", bss->in4, 0LL);
+ CHECK(bss->out4 != 0, "out4", "got %lld != exp %lld\n", bss->out4, 0LL);
+
+ CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0);
+ CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0);
+
+ /* validate we can pre-setup global variables, even in .bss */
+ data->in1 = 10;
+ data->in2 = 11;
+ bss->in3 = 12;
+ bss->in4 = 13;
+ rodata->in.in6 = 14;
+
err = test_skeleton__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
goto cleanup;
- bss = skel->bss;
- bss->in1 = 1;
- bss->in2 = 2;
+ /* validate pre-setup values are still there */
+ CHECK(data->in1 != 10, "in1", "got %d != exp %d\n", data->in1, 10);
+ CHECK(data->in2 != 11, "in2", "got %lld != exp %lld\n", data->in2, 11LL);
+ CHECK(bss->in3 != 12, "in3", "got %d != exp %d\n", bss->in3, 12);
+ CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL);
+ CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14);
+
+ /* now set new values and attach to get them into outX variables */
+ data->in1 = 1;
+ data->in2 = 2;
bss->in3 = 3;
bss->in4 = 4;
bss->in5.a = 5;
@@ -44,14 +78,15 @@ void test_skeleton(void)
/* trigger tracepoint */
usleep(1);
- CHECK(bss->out1 != 1, "res1", "got %d != exp %d\n", bss->out1, 1);
- CHECK(bss->out2 != 2, "res2", "got %lld != exp %d\n", bss->out2, 2);
+ CHECK(data->out1 != 1, "res1", "got %d != exp %d\n", data->out1, 1);
+ CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2);
CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
bss->handler_out5.a, 5);
CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
bss->handler_out5.b, 6);
+ CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14);
CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
"got %d != exp %d\n", bss->bpf_syscall, kcfg->CONFIG_BPF_SYSCALL);
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
new file mode 100644
index 000000000000..686b40f11a45
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/btf.h>
+#include "netif_receive_skb.skel.h"
+
+/* Demonstrate that bpf_snprintf_btf succeeds and that various data types
+ * are formatted correctly.
+ */
+void test_snprintf_btf(void)
+{
+ struct netif_receive_skb *skel;
+ struct netif_receive_skb__bss *bss;
+ int err, duration = 0;
+
+ skel = netif_receive_skb__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ err = netif_receive_skb__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+
+ err = netif_receive_skb__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* generate receive event */
+ err = system("ping -c 1 127.0.0.1 > /dev/null");
+ if (CHECK(err, "system", "ping failed: %d\n", err))
+ goto cleanup;
+
+ if (bss->skip) {
+ printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+
+ /*
+ * Make sure netif_receive_skb program was triggered
+ * and it set expected return values from bpf_trace_printk()s
+ * and all tests ran.
+ */
+ if (CHECK(bss->ret <= 0,
+ "bpf_snprintf_btf: got return value",
+ "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests))
+ goto cleanup;
+
+ if (CHECK(bss->ran_subtests == 0, "check if subtests ran",
+ "no subtests ran, did BPF program run?"))
+ goto cleanup;
+
+ if (CHECK(bss->num_subtests != bss->ran_subtests,
+ "check all subtests ran",
+ "only ran %d of %d tests\n", bss->num_subtests,
+ bss->ran_subtests))
+ goto cleanup;
+
+cleanup:
+ netif_receive_skb__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
new file mode 100644
index 000000000000..af87118e748e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "network_helpers.h"
+#include "cgroup_helpers.h"
+#include "test_progs.h"
+#include "bpf_rlimit.h"
+#include "test_sock_fields.skel.h"
+
+enum bpf_linum_array_idx {
+ EGRESS_LINUM_IDX,
+ INGRESS_LINUM_IDX,
+ __NR_BPF_LINUM_ARRAY_IDX,
+};
+
+struct bpf_spinlock_cnt {
+ struct bpf_spin_lock lock;
+ __u32 cnt;
+};
+
+#define PARENT_CGROUP "/test-bpf-sock-fields"
+#define CHILD_CGROUP "/test-bpf-sock-fields/child"
+#define DATA "Hello BPF!"
+#define DATA_LEN sizeof(DATA)
+
+static struct sockaddr_in6 srv_sa6, cli_sa6;
+static int sk_pkt_out_cnt10_fd;
+static struct test_sock_fields *skel;
+static int sk_pkt_out_cnt_fd;
+static __u64 parent_cg_id;
+static __u64 child_cg_id;
+static int linum_map_fd;
+static __u32 duration;
+
+static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
+static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
+
+static void print_sk(const struct bpf_sock *sk, const char *prefix)
+{
+ char src_ip4[24], dst_ip4[24];
+ char src_ip6[64], dst_ip6[64];
+
+ inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
+ inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
+ inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
+ inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
+
+ printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
+ "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
+ "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
+ prefix,
+ sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
+ sk->mark, sk->priority,
+ sk->src_ip4, src_ip4,
+ sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
+ src_ip6, sk->src_port,
+ sk->dst_ip4, dst_ip4,
+ sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
+ dst_ip6, ntohs(sk->dst_port));
+}
+
+static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
+{
+ printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
+ "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
+ "rate_delivered:%u rate_interval_us:%u packets_out:%u "
+ "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
+ "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
+ "bytes_received:%llu bytes_acked:%llu\n",
+ prefix,
+ tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
+ tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
+ tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
+ tp->packets_out, tp->retrans_out, tp->total_retrans,
+ tp->segs_in, tp->data_segs_in, tp->segs_out,
+ tp->data_segs_out, tp->lost_out, tp->sacked_out,
+ tp->bytes_received, tp->bytes_acked);
+}
+
+static void check_result(void)
+{
+ struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
+ struct bpf_sock srv_sk, cli_sk, listen_sk;
+ __u32 ingress_linum, egress_linum;
+ int err;
+
+ err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
+ &egress_linum);
+ CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ "err:%d errno:%d\n", err, errno);
+
+ err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
+ &ingress_linum);
+ CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ "err:%d errno:%d\n", err, errno);
+
+ memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
+ memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
+ memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
+ memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
+ memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
+ memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
+
+ print_sk(&listen_sk, "listen_sk");
+ print_sk(&srv_sk, "srv_sk");
+ print_sk(&cli_sk, "cli_sk");
+ print_tp(&listen_tp, "listen_tp");
+ print_tp(&srv_tp, "srv_tp");
+ print_tp(&cli_tp, "cli_tp");
+
+ CHECK(listen_sk.state != 10 ||
+ listen_sk.family != AF_INET6 ||
+ listen_sk.protocol != IPPROTO_TCP ||
+ memcmp(listen_sk.src_ip6, &in6addr_loopback,
+ sizeof(listen_sk.src_ip6)) ||
+ listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
+ listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
+ listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+ listen_sk.dst_port,
+ "listen_sk",
+ "Unexpected. Check listen_sk output. ingress_linum:%u\n",
+ ingress_linum);
+
+ CHECK(srv_sk.state == 10 ||
+ !srv_sk.state ||
+ srv_sk.family != AF_INET6 ||
+ srv_sk.protocol != IPPROTO_TCP ||
+ memcmp(srv_sk.src_ip6, &in6addr_loopback,
+ sizeof(srv_sk.src_ip6)) ||
+ memcmp(srv_sk.dst_ip6, &in6addr_loopback,
+ sizeof(srv_sk.dst_ip6)) ||
+ srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+ srv_sk.dst_port != cli_sa6.sin6_port,
+ "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
+
+ CHECK(cli_sk.state == 10 ||
+ !cli_sk.state ||
+ cli_sk.family != AF_INET6 ||
+ cli_sk.protocol != IPPROTO_TCP ||
+ memcmp(cli_sk.src_ip6, &in6addr_loopback,
+ sizeof(cli_sk.src_ip6)) ||
+ memcmp(cli_sk.dst_ip6, &in6addr_loopback,
+ sizeof(cli_sk.dst_ip6)) ||
+ cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
+ cli_sk.dst_port != srv_sa6.sin6_port,
+ "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(listen_tp.data_segs_out ||
+ listen_tp.data_segs_in ||
+ listen_tp.total_retrans ||
+ listen_tp.bytes_acked,
+ "listen_tp",
+ "Unexpected. Check listen_tp output. ingress_linum:%u\n",
+ ingress_linum);
+
+ CHECK(srv_tp.data_segs_out != 2 ||
+ srv_tp.data_segs_in ||
+ srv_tp.snd_cwnd != 10 ||
+ srv_tp.total_retrans ||
+ srv_tp.bytes_acked < 2 * DATA_LEN,
+ "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(cli_tp.data_segs_out ||
+ cli_tp.data_segs_in != 2 ||
+ cli_tp.snd_cwnd != 10 ||
+ cli_tp.total_retrans ||
+ cli_tp.bytes_received < 2 * DATA_LEN,
+ "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(skel->bss->parent_cg_id != parent_cg_id,
+ "parent_cg_id", "%zu != %zu\n",
+ (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
+
+ CHECK(skel->bss->child_cg_id != child_cg_id,
+ "child_cg_id", "%zu != %zu\n",
+ (size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
+}
+
+static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
+{
+ struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
+ int err;
+
+ pkt_out_cnt.cnt = ~0;
+ pkt_out_cnt10.cnt = ~0;
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
+ if (!err)
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
+ &pkt_out_cnt10);
+
+ /* The bpf prog only counts for fullsock and
+ * passive connection did not become fullsock until 3WHS
+ * had been finished, so the bpf prog only counted two data
+ * packet out.
+ */
+ CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
+ pkt_out_cnt10.cnt < 0xeB9F + 20,
+ "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
+ "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+ err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+
+ pkt_out_cnt.cnt = ~0;
+ pkt_out_cnt10.cnt = ~0;
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
+ if (!err)
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
+ &pkt_out_cnt10);
+ /* Active connection is fullsock from the beginning.
+ * 1 SYN and 1 ACK during 3WHS
+ * 2 Acks on data packet.
+ *
+ * The bpf_prog initialized it to 0xeB9F.
+ */
+ CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
+ pkt_out_cnt10.cnt < 0xeB9F + 40,
+ "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
+ "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+ err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+}
+
+static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
+{
+ struct bpf_spinlock_cnt scnt = {};
+ int err;
+
+ scnt.cnt = pkt_out_cnt;
+ err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
+ BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
+ "err:%d errno:%d\n", err, errno))
+ return err;
+
+ err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
+ BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
+ "err:%d errno:%d\n", err, errno))
+ return err;
+
+ return 0;
+}
+
+static void test(void)
+{
+ int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
+ socklen_t addrlen = sizeof(struct sockaddr_in6);
+ char buf[DATA_LEN];
+
+ /* Prepare listen_fd */
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ /* start_server() has logged the error details */
+ if (CHECK_FAIL(listen_fd == -1))
+ goto done;
+
+ err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+ if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+ errno))
+ goto done;
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
+ err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
+ if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (CHECK(accept_fd == -1, "accept(listen_fd)",
+ "accept_fd:%d errno:%d\n",
+ accept_fd, errno))
+ goto done;
+
+ if (init_sk_storage(accept_fd, 0xeB9F))
+ goto done;
+
+ for (i = 0; i < 2; i++) {
+ /* Send some data from accept_fd to cli_fd.
+ * MSG_EOR to stop kernel from coalescing two pkts.
+ */
+ err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
+ if (CHECK(err != DATA_LEN, "send(accept_fd)",
+ "err:%d errno:%d\n", err, errno))
+ goto done;
+
+ err = recv(cli_fd, buf, DATA_LEN, 0);
+ if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+ }
+
+ shutdown(cli_fd, SHUT_WR);
+ err = recv(accept_fd, buf, 1, 0);
+ if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+ shutdown(accept_fd, SHUT_WR);
+ err = recv(cli_fd, buf, 1, 0);
+ if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+ check_sk_pkt_out_cnt(accept_fd, cli_fd);
+ check_result();
+
+done:
+ if (accept_fd != -1)
+ close(accept_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+}
+
+void test_sock_fields(void)
+{
+ struct bpf_link *egress_link = NULL, *ingress_link = NULL;
+ int parent_cg_fd = -1, child_cg_fd = -1;
+
+ /* Create a cgroup, get fd, and join it */
+ parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
+ if (CHECK_FAIL(parent_cg_fd < 0))
+ return;
+ parent_cg_id = get_cgroup_id(PARENT_CGROUP);
+ if (CHECK_FAIL(!parent_cg_id))
+ goto done;
+
+ child_cg_fd = test__join_cgroup(CHILD_CGROUP);
+ if (CHECK_FAIL(child_cg_fd < 0))
+ goto done;
+ child_cg_id = get_cgroup_id(CHILD_CGROUP);
+ if (CHECK_FAIL(!child_cg_id))
+ goto done;
+
+ skel = test_sock_fields__open_and_load();
+ if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
+ goto done;
+
+ egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
+ child_cg_fd);
+ if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
+ PTR_ERR(egress_link)))
+ goto done;
+
+ ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
+ child_cg_fd);
+ if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
+ PTR_ERR(ingress_link)))
+ goto done;
+
+ linum_map_fd = bpf_map__fd(skel->maps.linum_map);
+ sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
+ sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
+
+ test();
+
+done:
+ bpf_link__destroy(egress_link);
+ bpf_link__destroy(ingress_link);
+ test_sock_fields__destroy(skel);
+ if (child_cg_fd != -1)
+ close(child_cg_fd);
+ if (parent_cg_fd != -1)
+ close(parent_cg_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index aa43e0bd210c..85f73261fab0 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -1,7 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
+#include <error.h>
#include "test_progs.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_update.skel.h"
+#include "test_sockmap_invalid_update.skel.h"
+#include "bpf_iter_sockmap.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -43,6 +48,37 @@ error:
return -1;
}
+static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
+{
+ __u32 i, max_entries = bpf_map__max_entries(src);
+ int err, duration = 0, src_fd, dst_fd;
+
+ src_fd = bpf_map__fd(src);
+ dst_fd = bpf_map__fd(dst);
+
+ for (i = 0; i < max_entries; i++) {
+ __u64 src_cookie, dst_cookie;
+
+ err = bpf_map_lookup_elem(src_fd, &i, &src_cookie);
+ if (err && errno == ENOENT) {
+ err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+ CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i);
+ CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n",
+ strerror(errno));
+ continue;
+ }
+ if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno)))
+ continue;
+
+ err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+ if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno)))
+ continue;
+
+ CHECK(dst_cookie != src_cookie, "cookie mismatch",
+ "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i);
+ }
+}
+
/* Create a map, populate it with one socket, and free the map. */
static void test_sockmap_create_update_free(enum bpf_map_type map_type)
{
@@ -70,10 +106,198 @@ out:
close(s);
}
+static void test_skmsg_helpers(enum bpf_map_type map_type)
+{
+ struct test_skmsg_load_helpers *skel;
+ int err, map, verdict;
+
+ skel = test_skmsg_load_helpers__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_skmsg_load_helpers__open_and_load");
+ return;
+ }
+
+ verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach");
+ goto out;
+ }
+
+ err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_detach2");
+ goto out;
+ }
+out:
+ test_skmsg_load_helpers__destroy(skel);
+}
+
+static void test_sockmap_update(enum bpf_map_type map_type)
+{
+ struct bpf_prog_test_run_attr tattr;
+ int err, prog, src, duration = 0;
+ struct test_sockmap_update *skel;
+ struct bpf_map *dst_map;
+ const __u32 zero = 0;
+ char dummy[14] = {0};
+ __s64 sk;
+
+ sk = connected_socket_v4();
+ if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n"))
+ return;
+
+ skel = test_sockmap_update__open_and_load();
+ if (CHECK(!skel, "open_and_load", "cannot load skeleton\n"))
+ goto close_sk;
+
+ prog = bpf_program__fd(skel->progs.copy_sock_map);
+ src = bpf_map__fd(skel->maps.src);
+ if (map_type == BPF_MAP_TYPE_SOCKMAP)
+ dst_map = skel->maps.dst_sock_map;
+ else
+ dst_map = skel->maps.dst_sock_hash;
+
+ err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
+ if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
+ goto out;
+
+ tattr = (struct bpf_prog_test_run_attr){
+ .prog_fd = prog,
+ .repeat = 1,
+ .data_in = dummy,
+ .data_size_in = sizeof(dummy),
+ };
+
+ err = bpf_prog_test_run_xattr(&tattr);
+ if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
+ "errno=%u retval=%u\n", errno, tattr.retval))
+ goto out;
+
+ compare_cookies(skel->maps.src, dst_map);
+
+out:
+ test_sockmap_update__destroy(skel);
+close_sk:
+ close(sk);
+}
+
+static void test_sockmap_invalid_update(void)
+{
+ struct test_sockmap_invalid_update *skel;
+ int duration = 0;
+
+ skel = test_sockmap_invalid_update__open_and_load();
+ if (CHECK(skel, "open_and_load", "verifier accepted map_update\n"))
+ test_sockmap_invalid_update__destroy(skel);
+}
+
+static void test_sockmap_copy(enum bpf_map_type map_type)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ int err, len, src_fd, iter_fd, duration = 0;
+ union bpf_iter_link_info linfo = {};
+ __u32 i, num_sockets, num_elems;
+ struct bpf_iter_sockmap *skel;
+ __s64 *sock_fd = NULL;
+ struct bpf_link *link;
+ struct bpf_map *src;
+ char buf[64];
+
+ skel = bpf_iter_sockmap__open_and_load();
+ if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n"))
+ return;
+
+ if (map_type == BPF_MAP_TYPE_SOCKMAP) {
+ src = skel->maps.sockmap;
+ num_elems = bpf_map__max_entries(src);
+ num_sockets = num_elems - 1;
+ } else {
+ src = skel->maps.sockhash;
+ num_elems = bpf_map__max_entries(src) - 1;
+ num_sockets = num_elems;
+ }
+
+ sock_fd = calloc(num_sockets, sizeof(*sock_fd));
+ if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n"))
+ goto out;
+
+ for (i = 0; i < num_sockets; i++)
+ sock_fd[i] = -1;
+
+ src_fd = bpf_map__fd(src);
+
+ for (i = 0; i < num_sockets; i++) {
+ sock_fd[i] = connected_socket_v4();
+ if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n"))
+ goto out;
+
+ err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST);
+ if (CHECK(err, "map_update", "failed: %s\n", strerror(errno)))
+ goto out;
+ }
+
+ linfo.map.map_fd = src_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.copy, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n",
+ skel->bss->elems, num_elems))
+ goto close_iter;
+
+ if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n",
+ skel->bss->socks, num_sockets))
+ goto close_iter;
+
+ compare_cookies(src, skel->maps.dst);
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ for (i = 0; sock_fd && i < num_sockets; i++)
+ if (sock_fd[i] >= 0)
+ close(sock_fd[i]);
+ if (sock_fd)
+ free(sock_fd);
+ bpf_iter_sockmap__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap sk_msg load helpers"))
+ test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash sk_msg load helpers"))
+ test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap update"))
+ test_sockmap_update(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash update"))
+ test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap update in unsafe context"))
+ test_sockmap_invalid_update();
+ if (test__start_subtest("sockmap copy"))
+ test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash copy"))
+ test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 8547ecbdc61f..ec281b0363b8 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -193,11 +193,10 @@ static void run_test(int cgroup_fd)
if (CHECK_FAIL(server_fd < 0))
goto close_bpf_object;
+ pthread_mutex_lock(&server_started_mtx);
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
(void *)&server_fd)))
goto close_server_fd;
-
- pthread_mutex_lock(&server_started_mtx);
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 2061a6beac0f..b25c9c45c148 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -13,6 +13,7 @@ static int getsetsockopt(void)
char cc[16]; /* TCP_CA_NAME_MAX */
} buf = {};
socklen_t optlen;
+ char *big_buf = NULL;
fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) {
@@ -22,24 +23,31 @@ static int getsetsockopt(void)
/* IP_TOS - BPF bypass */
- buf.u8[0] = 0x08;
- err = setsockopt(fd, SOL_IP, IP_TOS, &buf, 1);
+ optlen = getpagesize() * 2;
+ big_buf = calloc(1, optlen);
+ if (!big_buf) {
+ log_err("Couldn't allocate two pages");
+ goto err;
+ }
+
+ *(int *)big_buf = 0x08;
+ err = setsockopt(fd, SOL_IP, IP_TOS, big_buf, optlen);
if (err) {
log_err("Failed to call setsockopt(IP_TOS)");
goto err;
}
- buf.u8[0] = 0x00;
+ memset(big_buf, 0, optlen);
optlen = 1;
- err = getsockopt(fd, SOL_IP, IP_TOS, &buf, &optlen);
+ err = getsockopt(fd, SOL_IP, IP_TOS, big_buf, &optlen);
if (err) {
log_err("Failed to call getsockopt(IP_TOS)");
goto err;
}
- if (buf.u8[0] != 0x08) {
- log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08",
- buf.u8[0]);
+ if (*big_buf != 0x08) {
+ log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08",
+ (int)*big_buf);
goto err;
}
@@ -78,6 +86,28 @@ static int getsetsockopt(void)
goto err;
}
+ /* IP_FREEBIND - BPF can't access optval past PAGE_SIZE */
+
+ optlen = getpagesize() * 2;
+ memset(big_buf, 0, optlen);
+
+ err = setsockopt(fd, SOL_IP, IP_FREEBIND, big_buf, optlen);
+ if (err != 0) {
+ log_err("Failed to call setsockopt, ret=%d", err);
+ goto err;
+ }
+
+ err = getsockopt(fd, SOL_IP, IP_FREEBIND, big_buf, &optlen);
+ if (err != 0) {
+ log_err("Failed to call getsockopt, ret=%d", err);
+ goto err;
+ }
+
+ if (optlen != 1 || *(__u8 *)big_buf != 0x55) {
+ log_err("Unexpected IP_FREEBIND getsockopt, optlen=%d, optval=0x%x",
+ optlen, *(__u8 *)big_buf);
+ }
+
/* SO_SNDBUF is overwritten */
buf.u32 = 0x01010101;
@@ -124,9 +154,11 @@ static int getsetsockopt(void)
goto err;
}
+ free(big_buf);
close(fd);
return 0;
err:
+ free(big_buf);
close(fd);
return -1;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 1ae00cd3174e..7577a77a4c4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -1,5 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
+
+static void *spin_lock_thread(void *arg)
+{
+ __u32 duration, retval;
+ int err, prog_fd = *(u32 *) arg;
+
+ err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+ pthread_exit(arg);
+}
void test_spinlock(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f002e3090d92..11a769e18f5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void)
{
__u64 sample_freq = 5000; /* fallback to 5000 on error */
FILE *f;
+ __u32 duration = 0;
f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
if (f == NULL)
return sample_freq;
- fscanf(f, "%llu", &sample_freq);
+ CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate",
+ "return default value: 5000,err %d\n", -errno);
fclose(f);
return sample_freq;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c
new file mode 100644
index 000000000000..a00abf58c037
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <time.h>
+#include "test_subprogs.skel.h"
+
+static int duration;
+
+void test_subprogs(void)
+{
+ struct test_subprogs *skel;
+ int err;
+
+ skel = test_subprogs__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ err = test_subprogs__attach(skel);
+ if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12);
+ CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17);
+ CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
+ CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
+
+cleanup:
+ test_subprogs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index bb8fe646dd9f..ee27d68d2a1c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
/* test_tailcall_1 checks basic functionality by patching multiple locations
* in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -472,6 +473,329 @@ out:
bpf_object__close(obj);
}
+/* test_tailcall_bpf2bpf_1 purpose is to make sure that tailcalls are working
+ * correctly in correlation with BPF subprograms
+ */
+static void test_tailcall_bpf2bpf_1(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ /* nop -> jmp */
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ 0, &retval, &duration);
+ CHECK(err || retval != 1, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+
+ /* jmp -> nop, call subprog that will do tailcall */
+ i = 1;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ 0, &retval, &duration);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ /* make sure that subprog can access ctx and entry prog that
+ * called this subprog can properly return
+ */
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ 0, &retval, &duration);
+ CHECK(err || retval != sizeof(pkt_v4) * 2,
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_2 checks that the count value of the tail call limit
+ * enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call.
+ */
+static void test_tailcall_bpf2bpf_2(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char buff[128] = {};
+
+ err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier/0");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
+ err, errno, val);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_3 checks that non-trivial amount of stack (up to
+ * 256 bytes) can be used within bpf subprograms that have the tailcalls
+ * in them
+ */
+static void test_tailcall_bpf2bpf_3(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 3,
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 1;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4),
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 2,
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved
+ * across tailcalls combined with bpf2bpf calls. for making sure that tailcall
+ * counter behaves correctly, bpf program will go through following flow:
+ *
+ * entry -> entry_subprog -> tailcall0 -> bpf_func0 -> subprog0 ->
+ * -> tailcall1 -> bpf_func1 -> subprog1 -> tailcall2 -> bpf_func2 ->
+ * subprog2 [here bump global counter] --------^
+ *
+ * We go through first two tailcalls and start counting from the subprog2 where
+ * the loop begins. At the end of the test make sure that the global counter is
+ * equal to 31, because tailcall counter includes the first two tailcalls
+ * whereas global counter is incremented only on loop presented on flow above.
+ */
+static void test_tailcall_bpf2bpf_4(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n",
+ err, errno, val);
+
+out:
+ bpf_object__close(obj);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -484,4 +808,12 @@ void test_tailcalls(void)
test_tailcall_4();
if (test__start_subtest("tailcall_5"))
test_tailcall_5();
+ if (test__start_subtest("tailcall_bpf2bpf_1"))
+ test_tailcall_bpf2bpf_1();
+ if (test__start_subtest("tailcall_bpf2bpf_2"))
+ test_tailcall_bpf2bpf_2();
+ if (test__start_subtest("tailcall_bpf2bpf_3"))
+ test_tailcall_bpf2bpf_3();
+ if (test__start_subtest("tailcall_bpf2bpf_4"))
+ test_tailcall_bpf2bpf_4();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
new file mode 100644
index 000000000000..c85174cdcb77
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -0,0 +1,610 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/compiler.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_hdr_options.h"
+#include "test_tcp_hdr_options.skel.h"
+#include "test_misc_tcp_hdr_options.skel.h"
+
+#define LO_ADDR6 "::1"
+#define CG_NAME "/tcpbpf-hdr-opt-test"
+
+struct bpf_test_option exp_passive_estab_in;
+struct bpf_test_option exp_active_estab_in;
+struct bpf_test_option exp_passive_fin_in;
+struct bpf_test_option exp_active_fin_in;
+struct hdr_stg exp_passive_hdr_stg;
+struct hdr_stg exp_active_hdr_stg = { .active = true, };
+
+static struct test_misc_tcp_hdr_options *misc_skel;
+static struct test_tcp_hdr_options *skel;
+static int lport_linum_map_fd;
+static int hdr_stg_map_fd;
+static __u32 duration;
+static int cg_fd;
+
+struct sk_fds {
+ int srv_fd;
+ int passive_fd;
+ int active_fd;
+ int passive_lport;
+ int active_lport;
+};
+
+static int create_netns(void)
+{
+ if (CHECK(unshare(CLONE_NEWNET), "create netns",
+ "unshare(CLONE_NEWNET): %s (%d)",
+ strerror(errno), errno))
+ return -1;
+
+ if (CHECK(system("ip link set dev lo up"), "run ip cmd",
+ "failed to bring lo link up\n"))
+ return -1;
+
+ return 0;
+}
+
+static int write_sysctl(const char *sysctl, const char *value)
+{
+ int fd, err, len;
+
+ fd = open(sysctl, O_WRONLY);
+ if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
+ sysctl, strerror(errno), errno))
+ return -1;
+
+ len = strlen(value);
+ err = write(fd, value, len);
+ close(fd);
+ if (CHECK(err != len, "write sysctl",
+ "write(%s, %s): err:%d %s (%d)\n",
+ sysctl, value, err, strerror(errno), errno))
+ return -1;
+
+ return 0;
+}
+
+static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
+{
+ fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
+ prefix ? : "", hdr_stg->active, hdr_stg->resend_syn,
+ hdr_stg->syncookie, hdr_stg->fastopen);
+}
+
+static void print_option(const struct bpf_test_option *opt, const char *prefix)
+{
+ fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n",
+ prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand);
+}
+
+static void sk_fds_close(struct sk_fds *sk_fds)
+{
+ close(sk_fds->srv_fd);
+ close(sk_fds->passive_fd);
+ close(sk_fds->active_fd);
+}
+
+static int sk_fds_shutdown(struct sk_fds *sk_fds)
+{
+ int ret, abyte;
+
+ shutdown(sk_fds->active_fd, SHUT_WR);
+ ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte));
+ if (CHECK(ret != 0, "read-after-shutdown(passive_fd):",
+ "ret:%d %s (%d)\n",
+ ret, strerror(errno), errno))
+ return -1;
+
+ shutdown(sk_fds->passive_fd, SHUT_WR);
+ ret = read(sk_fds->active_fd, &abyte, sizeof(abyte));
+ if (CHECK(ret != 0, "read-after-shutdown(active_fd):",
+ "ret:%d %s (%d)\n",
+ ret, strerror(errno), errno))
+ return -1;
+
+ return 0;
+}
+
+static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
+{
+ const char fast[] = "FAST!!!";
+ struct sockaddr_in6 addr6;
+ socklen_t len;
+
+ sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+ if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error;
+
+ if (fast_open)
+ sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast,
+ sizeof(fast), 0);
+ else
+ sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0);
+
+ if (CHECK_FAIL(sk_fds->active_fd == -1)) {
+ close(sk_fds->srv_fd);
+ goto error;
+ }
+
+ len = sizeof(addr6);
+ if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(srv_fd)", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error_close;
+ sk_fds->passive_lport = ntohs(addr6.sin6_port);
+
+ len = sizeof(addr6);
+ if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(active_fd)", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error_close;
+ sk_fds->active_lport = ntohs(addr6.sin6_port);
+
+ sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0);
+ if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n",
+ strerror(errno), errno))
+ goto error_close;
+
+ if (fast_open) {
+ char bytes_in[sizeof(fast)];
+ int ret;
+
+ ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in));
+ if (CHECK(ret != sizeof(fast), "read fastopen syn data",
+ "expected=%lu actual=%d\n", sizeof(fast), ret)) {
+ close(sk_fds->passive_fd);
+ goto error_close;
+ }
+ }
+
+ return 0;
+
+error_close:
+ close(sk_fds->active_fd);
+ close(sk_fds->srv_fd);
+
+error:
+ memset(sk_fds, -1, sizeof(*sk_fds));
+ return -1;
+}
+
+static int check_hdr_opt(const struct bpf_test_option *exp,
+ const struct bpf_test_option *act,
+ const char *hdr_desc)
+{
+ if (CHECK(memcmp(exp, act, sizeof(*exp)),
+ "expected-vs-actual", "unexpected %s\n", hdr_desc)) {
+ print_option(exp, "expected: ");
+ print_option(act, " actual: ");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_hdr_stg(const struct hdr_stg *exp, int fd,
+ const char *stg_desc)
+{
+ struct hdr_stg act;
+
+ if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
+ "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n",
+ stg_desc, strerror(errno), errno))
+ return -1;
+
+ if (CHECK(memcmp(exp, &act, sizeof(*exp)),
+ "expected-vs-actual", "unexpected %s\n", stg_desc)) {
+ print_hdr_stg(exp, "expected: ");
+ print_hdr_stg(&act, " actual: ");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_error_linum(const struct sk_fds *sk_fds)
+{
+ unsigned int nr_errors = 0;
+ struct linum_err linum_err;
+ int lport;
+
+ lport = sk_fds->passive_lport;
+ if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+ fprintf(stderr,
+ "bpf prog error out at lport:passive(%d), linum:%u err:%d\n",
+ lport, linum_err.linum, linum_err.err);
+ nr_errors++;
+ }
+
+ lport = sk_fds->active_lport;
+ if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+ fprintf(stderr,
+ "bpf prog error out at lport:active(%d), linum:%u err:%d\n",
+ lport, linum_err.linum, linum_err.err);
+ nr_errors++;
+ }
+
+ return nr_errors;
+}
+
+static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
+{
+ const __u32 expected_inherit_cb_flags =
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_STATE_CB_FLAG;
+
+ if (sk_fds_shutdown(sk_fds))
+ goto check_linum;
+
+ if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags,
+ "Unexpected inherit_cb_flags", "0x%x != 0x%x\n",
+ skel->bss->inherit_cb_flags, expected_inherit_cb_flags))
+ goto check_linum;
+
+ if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
+ "passive_hdr_stg"))
+ goto check_linum;
+
+ if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd,
+ "active_hdr_stg"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in,
+ "passive_estab_in"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in,
+ "active_estab_in"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in,
+ "passive_fin_in"))
+ goto check_linum;
+
+ check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in,
+ "active_fin_in");
+
+check_linum:
+ CHECK_FAIL(check_error_linum(sk_fds));
+ sk_fds_close(sk_fds);
+}
+
+static void prepare_out(void)
+{
+ skel->bss->active_syn_out = exp_passive_estab_in;
+ skel->bss->passive_synack_out = exp_active_estab_in;
+
+ skel->bss->active_fin_out = exp_passive_fin_in;
+ skel->bss->passive_fin_out = exp_active_fin_in;
+}
+
+static void reset_test(void)
+{
+ size_t optsize = sizeof(struct bpf_test_option);
+ int lport, err;
+
+ memset(&skel->bss->passive_synack_out, 0, optsize);
+ memset(&skel->bss->passive_fin_out, 0, optsize);
+
+ memset(&skel->bss->passive_estab_in, 0, optsize);
+ memset(&skel->bss->passive_fin_in, 0, optsize);
+
+ memset(&skel->bss->active_syn_out, 0, optsize);
+ memset(&skel->bss->active_fin_out, 0, optsize);
+
+ memset(&skel->bss->active_estab_in, 0, optsize);
+ memset(&skel->bss->active_fin_in, 0, optsize);
+
+ skel->bss->inherit_cb_flags = 0;
+
+ skel->data->test_kind = TCPOPT_EXP;
+ skel->data->test_magic = 0xeB9F;
+
+ memset(&exp_passive_estab_in, 0, optsize);
+ memset(&exp_active_estab_in, 0, optsize);
+ memset(&exp_passive_fin_in, 0, optsize);
+ memset(&exp_active_fin_in, 0, optsize);
+
+ memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg));
+ memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg));
+ exp_active_hdr_stg.active = true;
+
+ err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport);
+ while (!err) {
+ bpf_map_delete_elem(lport_linum_map_fd, &lport);
+ err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport);
+ }
+}
+
+static void fastopen_estab(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ exp_passive_hdr_stg.fastopen = true;
+
+ prepare_out();
+
+ /* Allow fastopen without fastopen cookie */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, true)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void syncookie_estab(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS |
+ OPTION_F_RESEND;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ exp_passive_hdr_stg.syncookie = true;
+ exp_active_hdr_stg.resend_syn = true,
+
+ prepare_out();
+
+ /* Clear the RESEND to ensure the bpf prog can learn
+ * want_cookie and set the RESEND by itself.
+ */
+ skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND;
+
+ /* Enforce syncookie mode */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void fin(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_fin_in.flags = OPTION_F_RAND;
+ exp_passive_fin_in.rand = 0xfa;
+
+ exp_active_fin_in.flags = OPTION_F_RAND;
+ exp_active_fin_in.rand = 0xce;
+
+ prepare_out();
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void __simple_estab(bool exprm)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ prepare_out();
+
+ if (!exprm) {
+ skel->data->test_kind = 0xB9;
+ skel->data->test_magic = 0;
+ }
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void no_exprm_estab(void)
+{
+ __simple_estab(false);
+}
+
+static void simple_estab(void)
+{
+ __simple_estab(true);
+}
+
+static void misc(void)
+{
+ const char send_msg[] = "MISC!!!";
+ char recv_msg[sizeof(send_msg)];
+ const unsigned int nr_data = 2;
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+ int i, ret;
+
+ lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map);
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
+ if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
+ PTR_ERR(link)))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ for (i = 0; i < nr_data; i++) {
+ /* MSG_EOR to ensure skb will not be combined */
+ ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg),
+ MSG_EOR);
+ if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n",
+ ret))
+ goto check_linum;
+
+ ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
+ if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n",
+ ret))
+ goto check_linum;
+ }
+
+ if (sk_fds_shutdown(&sk_fds))
+ goto check_linum;
+
+ CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn",
+ "expected (1) != actual (%u)\n",
+ misc_skel->bss->nr_syn);
+
+ CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data",
+ "expected (%u) != actual (%u)\n",
+ nr_data, misc_skel->bss->nr_data);
+
+ /* The last ACK may have been delayed, so it is either 1 or 2. */
+ CHECK(misc_skel->bss->nr_pure_ack != 1 &&
+ misc_skel->bss->nr_pure_ack != 2,
+ "unexpected nr_pure_ack",
+ "expected (1 or 2) != actual (%u)\n",
+ misc_skel->bss->nr_pure_ack);
+
+ CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin",
+ "expected (1) != actual (%u)\n",
+ misc_skel->bss->nr_fin);
+
+check_linum:
+ CHECK_FAIL(check_error_linum(&sk_fds));
+ sk_fds_close(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+struct test {
+ const char *desc;
+ void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, name }
+static struct test tests[] = {
+ DEF_TEST(simple_estab),
+ DEF_TEST(no_exprm_estab),
+ DEF_TEST(syncookie_estab),
+ DEF_TEST(fastopen_estab),
+ DEF_TEST(fin),
+ DEF_TEST(misc),
+};
+
+void test_tcp_hdr_options(void)
+{
+ int i;
+
+ skel = test_tcp_hdr_options__open_and_load();
+ if (CHECK(!skel, "open and load skel", "failed"))
+ return;
+
+ misc_skel = test_misc_tcp_hdr_options__open_and_load();
+ if (CHECK(!misc_skel, "open and load misc test skel", "failed"))
+ goto skel_destroy;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (CHECK_FAIL(cg_fd < 0))
+ goto skel_destroy;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (!test__start_subtest(tests[i].desc))
+ continue;
+
+ if (create_netns())
+ break;
+
+ tests[i].run();
+
+ reset_test();
+ }
+
+ close(cg_fd);
+skel_destroy:
+ test_misc_tcp_hdr_options__destroy(misc_skel);
+ test_tcp_hdr_options__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index e56b52ab41da..d207e968e6b1 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "network_helpers.h"
struct tcp_rtt_storage {
__u32 invoked;
@@ -87,34 +88,6 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
return err;
}
-static int connect_to_server(int server_fd)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create client socket");
- return -1;
- }
-
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto out;
- }
-
- if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
- log_err("Fail to connect to server");
- goto out;
- }
-
- return fd;
-
-out:
- close(fd);
- return -1;
-}
static int run_test(int cgroup_fd, int server_fd)
{
@@ -145,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd)
goto close_bpf_object;
}
- client_fd = connect_to_server(server_fd);
+ client_fd = connect_to_fd(server_fd, 0);
if (client_fd < 0) {
err = -1;
goto close_bpf_object;
@@ -180,103 +153,22 @@ close_bpf_object:
return err;
}
-static int start_server(void)
-{
- struct sockaddr_in addr = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
- };
- int fd;
-
- fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
- if (fd < 0) {
- log_err("Failed to create server socket");
- return -1;
- }
-
- if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
- log_err("Failed to bind socket");
- close(fd);
- return -1;
- }
-
- return fd;
-}
-
-static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
-static volatile bool server_done = false;
-
-static void *server_thread(void *arg)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd = *(int *)arg;
- int client_fd;
- int err;
-
- err = listen(fd, 1);
-
- pthread_mutex_lock(&server_started_mtx);
- pthread_cond_signal(&server_started);
- pthread_mutex_unlock(&server_started_mtx);
-
- if (CHECK_FAIL(err < 0)) {
- perror("Failed to listed on socket");
- return ERR_PTR(err);
- }
-
- while (true) {
- client_fd = accept(fd, (struct sockaddr *)&addr, &len);
- if (client_fd == -1 && errno == EAGAIN) {
- usleep(50);
- continue;
- }
- break;
- }
- if (CHECK_FAIL(client_fd < 0)) {
- perror("Failed to accept client");
- return ERR_PTR(err);
- }
-
- while (!server_done)
- usleep(50);
-
- close(client_fd);
-
- return NULL;
-}
-
void test_tcp_rtt(void)
{
int server_fd, cgroup_fd;
- pthread_t tid;
- void *server_res;
cgroup_fd = test__join_cgroup("/tcp_rtt");
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server();
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
- if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
- (void *)&server_fd)))
- goto close_server_fd;
-
- pthread_mutex_lock(&server_started_mtx);
- pthread_cond_wait(&server_started, &server_started_mtx);
- pthread_mutex_unlock(&server_started_mtx);
-
CHECK_FAIL(run_test(cgroup_fd, server_fd));
- server_done = true;
- CHECK_FAIL(pthread_join(tid, &server_res));
- CHECK_FAIL(IS_ERR(server_res));
-
-close_server_fd:
close(server_fd);
+
close_cgroup_fd:
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
new file mode 100644
index 000000000000..172c999e523c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <test_progs.h>
+
+#define TDIR "/sys/kernel/debug"
+
+static int read_iter(char *file)
+{
+ /* 1024 should be enough to get contiguous 4 "iter" letters at some point */
+ char buf[1024];
+ int fd, len;
+
+ fd = open(file, 0);
+ if (fd < 0)
+ return -1;
+ while ((len = read(fd, buf, sizeof(buf))) > 0)
+ if (strstr(buf, "iter")) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+ return -1;
+}
+
+static int fn(void)
+{
+ int err, duration = 0;
+
+ err = unshare(CLONE_NEWNS);
+ if (CHECK(err, "unshare", "failed: %d\n", errno))
+ goto out;
+
+ err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL);
+ if (CHECK(err, "mount /", "failed: %d\n", errno))
+ goto out;
+
+ err = umount(TDIR);
+ if (CHECK(err, "umount " TDIR, "failed: %d\n", errno))
+ goto out;
+
+ err = mount("none", TDIR, "tmpfs", 0, NULL);
+ if (CHECK(err, "mount", "mount root failed: %d\n", errno))
+ goto out;
+
+ err = mkdir(TDIR "/fs1", 0777);
+ if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno))
+ goto out;
+ err = mkdir(TDIR "/fs2", 0777);
+ if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno))
+ goto out;
+
+ err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL);
+ if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno))
+ goto out;
+ err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL);
+ if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno))
+ goto out;
+
+ err = read_iter(TDIR "/fs1/maps.debug");
+ if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n"))
+ goto out;
+ err = read_iter(TDIR "/fs2/progs.debug");
+ if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n"))
+ goto out;
+out:
+ umount(TDIR "/fs1");
+ umount(TDIR "/fs2");
+ rmdir(TDIR "/fs1");
+ rmdir(TDIR "/fs2");
+ umount(TDIR);
+ exit(err);
+}
+
+void test_test_bpffs(void)
+{
+ int err, duration = 0, status = 0;
+ pid_t pid;
+
+ pid = fork();
+ if (CHECK(pid == -1, "clone", "clone failed %d", errno))
+ return;
+ if (pid == 0)
+ fn();
+ err = waitpid(pid, &status, 0);
+ if (CHECK(err == -1 && errno != ECHILD, "waitpid", "failed %d", errno))
+ return;
+ if (CHECK(WEXITSTATUS(status), "bpffs test ", "failed %d", WEXITSTATUS(status)))
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 25b068591e9a..193002b14d7f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -19,7 +19,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
log_buf = va_arg(args, char *);
if (!log_buf)
goto out;
- if (strstr(log_buf, err_str) == 0)
+ if (err_str && strstr(log_buf, err_str) == 0)
found = true;
out:
printf(format, log_buf);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
new file mode 100644
index 000000000000..91cd6f357246
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <linux/limits.h>
+
+#include "local_storage.skel.h"
+#include "network_helpers.h"
+
+int create_and_unlink_file(void)
+{
+ char fname[PATH_MAX] = "/tmp/fileXXXXXX";
+ int fd;
+
+ fd = mkstemp(fname);
+ if (fd < 0)
+ return fd;
+
+ close(fd);
+ unlink(fname);
+ return 0;
+}
+
+void test_test_local_storage(void)
+{
+ struct local_storage *skel = NULL;
+ int err, duration = 0, serv_sk = -1;
+
+ skel = local_storage__open_and_load();
+ if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+ goto close_prog;
+
+ err = local_storage__attach(skel);
+ if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+ goto close_prog;
+
+ skel->bss->monitored_pid = getpid();
+
+ err = create_and_unlink_file();
+ if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
+ "inode_local_storage not set\n");
+
+ serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+ goto close_prog;
+
+ CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
+ "sk_local_storage not set\n");
+
+ close(serv_sk);
+
+close_prog:
+ local_storage__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index b17eb2045c1d..6ab29226c99b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -10,6 +10,7 @@
#include <unistd.h>
#include <malloc.h>
#include <stdlib.h>
+#include <unistd.h>
#include "lsm.skel.h"
@@ -55,6 +56,7 @@ void test_test_lsm(void)
{
struct lsm *skel = NULL;
int err, duration = 0;
+ int buf = 1234;
skel = lsm__open_and_load();
if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
@@ -81,6 +83,13 @@ void test_test_lsm(void)
CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
"mprotect_count = %d\n", skel->bss->mprotect_count);
+ syscall(__NR_setdomainname, &buf, -2L);
+ syscall(__NR_setdomainname, 0, -3L);
+ syscall(__NR_setdomainname, ~0L, -4L);
+
+ CHECK(skel->bss->copy_test != 3, "copy_test",
+ "copy_test = %d\n", skel->bss->copy_test);
+
close_prog:
lsm__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 465b371a561d..9966685866fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -96,7 +96,6 @@ void test_test_overhead(void)
if (CHECK(!fexit_prog, "find_probe",
"prog '%s' not found\n", fexit_name))
goto cleanup;
-
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
@@ -142,6 +141,7 @@ void test_test_overhead(void)
goto cleanup;
test_run("fexit");
bpf_link__destroy(link);
+
cleanup:
prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
new file mode 100644
index 000000000000..4ca275101ee0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "progs/profiler.h"
+#include "profiler1.skel.h"
+#include "profiler2.skel.h"
+#include "profiler3.skel.h"
+
+static int sanity_run(struct bpf_program *prog)
+{
+ struct bpf_prog_test_run_attr test_attr = {};
+ __u64 args[] = {1, 2, 3};
+ __u32 duration = 0;
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ test_attr.prog_fd = prog_fd;
+ test_attr.ctx_in = args;
+ test_attr.ctx_size_in = sizeof(args);
+ err = bpf_prog_test_run_xattr(&test_attr);
+ if (CHECK(err || test_attr.retval, "test_run",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, test_attr.retval, duration))
+ return -1;
+ return 0;
+}
+
+void test_test_profiler(void)
+{
+ struct profiler1 *profiler1_skel = NULL;
+ struct profiler2 *profiler2_skel = NULL;
+ struct profiler3 *profiler3_skel = NULL;
+ __u32 duration = 0;
+ int err;
+
+ profiler1_skel = profiler1__open_and_load();
+ if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n"))
+ goto cleanup;
+
+ err = profiler1__attach(profiler1_skel);
+ if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec))
+ goto cleanup;
+
+ profiler2_skel = profiler2__open_and_load();
+ if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n"))
+ goto cleanup;
+
+ err = profiler2__attach(profiler2_skel);
+ if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec))
+ goto cleanup;
+
+ profiler3_skel = profiler3__open_and_load();
+ if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n"))
+ goto cleanup;
+
+ err = profiler3__attach(profiler3_skel);
+ if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec))
+ goto cleanup;
+cleanup:
+ profiler1__destroy(profiler1_skel);
+ profiler2__destroy(profiler2_skel);
+ profiler3__destroy(profiler3_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
new file mode 100644
index 000000000000..924441d4362d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#include "test_pkt_md_access.skel.h"
+#include "test_trace_ext.skel.h"
+#include "test_trace_ext_tracing.skel.h"
+
+static __u32 duration;
+
+void test_trace_ext(void)
+{
+ struct test_pkt_md_access *skel_pkt = NULL;
+ struct test_trace_ext_tracing *skel_trace = NULL;
+ struct test_trace_ext_tracing__bss *bss_trace;
+ struct test_trace_ext *skel_ext = NULL;
+ struct test_trace_ext__bss *bss_ext;
+ int err, pkt_fd, ext_fd;
+ struct bpf_program *prog;
+ char buf[100];
+ __u32 retval;
+ __u64 len;
+
+ /* open/load/attach test_pkt_md_access */
+ skel_pkt = test_pkt_md_access__open_and_load();
+ if (CHECK(!skel_pkt, "setup", "classifier/test_pkt_md_access open failed\n"))
+ goto cleanup;
+
+ err = test_pkt_md_access__attach(skel_pkt);
+ if (CHECK(err, "setup", "classifier/test_pkt_md_access attach failed: %d\n", err))
+ goto cleanup;
+
+ prog = skel_pkt->progs.test_pkt_md_access;
+ pkt_fd = bpf_program__fd(prog);
+
+ /* open extension */
+ skel_ext = test_trace_ext__open();
+ if (CHECK(!skel_ext, "setup", "freplace/test_pkt_md_access open failed\n"))
+ goto cleanup;
+
+ /* set extension's attach target - test_pkt_md_access */
+ prog = skel_ext->progs.test_pkt_md_access_new;
+ bpf_program__set_attach_target(prog, pkt_fd, "test_pkt_md_access");
+
+ /* load/attach extension */
+ err = test_trace_ext__load(skel_ext);
+ if (CHECK(err, "setup", "freplace/test_pkt_md_access load failed\n")) {
+ libbpf_strerror(err, buf, sizeof(buf));
+ fprintf(stderr, "%s\n", buf);
+ goto cleanup;
+ }
+
+ err = test_trace_ext__attach(skel_ext);
+ if (CHECK(err, "setup", "freplace/test_pkt_md_access attach failed: %d\n", err))
+ goto cleanup;
+
+ prog = skel_ext->progs.test_pkt_md_access_new;
+ ext_fd = bpf_program__fd(prog);
+
+ /* open tracing */
+ skel_trace = test_trace_ext_tracing__open();
+ if (CHECK(!skel_trace, "setup", "tracing/test_pkt_md_access_new open failed\n"))
+ goto cleanup;
+
+ /* set tracing's attach target - fentry */
+ prog = skel_trace->progs.fentry;
+ bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+ /* set tracing's attach target - fexit */
+ prog = skel_trace->progs.fexit;
+ bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+ /* load/attach tracing */
+ err = test_trace_ext_tracing__load(skel_trace);
+ if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) {
+ libbpf_strerror(err, buf, sizeof(buf));
+ fprintf(stderr, "%s\n", buf);
+ goto cleanup;
+ }
+
+ err = test_trace_ext_tracing__attach(skel_trace);
+ if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger the test */
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval);
+
+ bss_ext = skel_ext->bss;
+ bss_trace = skel_trace->bss;
+
+ len = bss_ext->ext_called;
+
+ CHECK(bss_ext->ext_called == 0,
+ "check", "failed to trigger freplace/test_pkt_md_access\n");
+ CHECK(bss_trace->fentry_called != len,
+ "check", "failed to trigger fentry/test_pkt_md_access_new\n");
+ CHECK(bss_trace->fexit_called != len,
+ "check", "failed to trigger fexit/test_pkt_md_access_new\n");
+
+cleanup:
+ test_trace_ext_tracing__destroy(skel_trace);
+ test_trace_ext__destroy(skel_ext);
+ test_pkt_md_access__destroy(skel_pkt);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
new file mode 100644
index 000000000000..39b0decb1bb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+
+#include "trace_printk.skel.h"
+
+#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe"
+#define SEARCHMSG "testing,testing"
+
+void test_trace_printk(void)
+{
+ int err, iter = 0, duration = 0, found = 0;
+ struct trace_printk__bss *bss;
+ struct trace_printk *skel;
+ char *buf = NULL;
+ FILE *fp = NULL;
+ size_t buflen;
+
+ skel = trace_printk__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ err = trace_printk__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+
+ err = trace_printk__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ fp = fopen(TRACEBUF, "r");
+ if (CHECK(fp == NULL, "could not open trace buffer",
+ "error %d opening %s", errno, TRACEBUF))
+ goto cleanup;
+
+ /* We do not want to wait forever if this test fails... */
+ fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
+
+ /* wait for tracepoint to trigger */
+ usleep(1);
+ trace_printk__detach(skel);
+
+ if (CHECK(bss->trace_printk_ran == 0,
+ "bpf_trace_printk never ran",
+ "ran == %d", bss->trace_printk_ran))
+ goto cleanup;
+
+ if (CHECK(bss->trace_printk_ret <= 0,
+ "bpf_trace_printk returned <= 0 value",
+ "got %d", bss->trace_printk_ret))
+ goto cleanup;
+
+ /* verify our search string is in the trace buffer */
+ while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
+ if (strstr(buf, SEARCHMSG) != NULL)
+ found++;
+ if (found == bss->trace_printk_ran)
+ break;
+ if (++iter > 1000)
+ break;
+ }
+
+ if (CHECK(!found, "message from bpf_trace_printk not found",
+ "no instance of %s in %s", SEARCHMSG, TRACEBUF))
+ goto cleanup;
+
+cleanup:
+ trace_printk__destroy(skel);
+ free(buf);
+ if (fp)
+ fclose(fp);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
new file mode 100644
index 000000000000..2aba09d4d01b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "udp_limit.skel.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+static int duration;
+
+void test_udp_limit(void)
+{
+ struct udp_limit *skel;
+ int fd1 = -1, fd2 = -1;
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/udp_limit");
+ if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+ return;
+
+ skel = udp_limit__open_and_load();
+ if (CHECK(!skel, "skel-load", "errno %d", errno))
+ goto close_cgroup_fd;
+
+ skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+ skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
+ if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
+ "cg-attach", "sock %ld sock_release %ld",
+ PTR_ERR(skel->links.sock),
+ PTR_ERR(skel->links.sock_release)))
+ goto close_skeleton;
+
+ /* BPF program enforces a single UDP socket per cgroup,
+ * verify that.
+ */
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (CHECK(fd1 < 0, "fd1", "errno %d", errno))
+ goto close_skeleton;
+
+ fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (CHECK(fd2 >= 0, "fd2", "errno %d", errno))
+ goto close_skeleton;
+
+ /* We can reopen again after close. */
+ close(fd1);
+ fd1 = -1;
+
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (CHECK(fd1 < 0, "fd1-again", "errno %d", errno))
+ goto close_skeleton;
+
+ /* Make sure the program was invoked the expected
+ * number of times:
+ * - open fd1 - BPF_CGROUP_INET_SOCK_CREATE
+ * - attempt to openfd2 - BPF_CGROUP_INET_SOCK_CREATE
+ * - close fd1 - BPF_CGROUP_INET_SOCK_RELEASE
+ * - open fd1 again - BPF_CGROUP_INET_SOCK_CREATE
+ */
+ if (CHECK(skel->bss->invocations != 4, "bss-invocations",
+ "invocations=%d", skel->bss->invocations))
+ goto close_skeleton;
+
+ /* We should still have a single socket in use */
+ if (CHECK(skel->bss->in_use != 1, "bss-in_use",
+ "in_use=%d", skel->bss->in_use))
+ goto close_skeleton;
+
+close_skeleton:
+ if (fd1 >= 0)
+ close(fd1);
+ if (fd2 >= 0)
+ close(fd2);
+ udp_limit__destroy(skel);
+close_cgroup_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
new file mode 100644
index 000000000000..dd324b4933db
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_varlen.skel.h"
+
+#define CHECK_VAL(got, exp) \
+ CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \
+ (long)(got), (long)(exp))
+
+void test_varlen(void)
+{
+ int duration = 0, err;
+ struct test_varlen* skel;
+ struct test_varlen__bss *bss;
+ struct test_varlen__data *data;
+ const char str1[] = "Hello, ";
+ const char str2[] = "World!";
+ const char exp_str[] = "Hello, \0World!\0";
+ const int size1 = sizeof(str1);
+ const int size2 = sizeof(str2);
+
+ skel = test_varlen__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+ data = skel->data;
+
+ err = test_varlen__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ bss->test_pid = getpid();
+
+ /* trigger everything */
+ memcpy(bss->buf_in1, str1, size1);
+ memcpy(bss->buf_in2, str2, size2);
+ bss->capture = true;
+ usleep(1);
+ bss->capture = false;
+
+ CHECK_VAL(bss->payload1_len1, size1);
+ CHECK_VAL(bss->payload1_len2, size2);
+ CHECK_VAL(bss->total1, size1 + size2);
+ CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check",
+ "doesn't match!\n");
+
+ CHECK_VAL(data->payload2_len1, size1);
+ CHECK_VAL(data->payload2_len2, size2);
+ CHECK_VAL(data->total2, size1 + size2);
+ CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
+ "doesn't match!\n");
+
+ CHECK_VAL(data->payload3_len1, size1);
+ CHECK_VAL(data->payload3_len2, size2);
+ CHECK_VAL(data->total3, size1 + size2);
+ CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check",
+ "doesn't match!\n");
+
+ CHECK_VAL(data->payload4_len1, size1);
+ CHECK_VAL(data->payload4_len2, size2);
+ CHECK_VAL(data->total4, size1 + size2);
+ CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check",
+ "doesn't match!\n");
+cleanup:
+ test_varlen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index dcb5ecac778e..48921ff74850 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_xdp(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 3744196d7cba..d5c98f2cb12f 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -1,13 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
-void test_xdp_adjust_tail(void)
+void test_xdp_adjust_tail_shrink(void)
{
- const char *file = "./test_adjust_tail.o";
+ const char *file = "./test_xdp_adjust_tail_shrink.o";
+ __u32 duration, retval, size, expect_sz;
struct bpf_object *obj;
- char buf[128];
- __u32 duration, retval, size;
int err, prog_fd;
+ char buf[128];
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
@@ -20,10 +21,121 @@ void test_xdp_adjust_tail(void)
"ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
+ expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != 54,
- "ipv6", "err %d errno %d retval %d size %d\n",
+ CHECK(err || retval != XDP_TX || size != expect_sz,
+ "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
+ err, errno, retval, size, expect_sz);
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_tail_grow(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.o";
+ struct bpf_object *obj;
+ char buf[4096]; /* avoid segfault: large buf to hold grow results */
+ __u32 duration, retval, size, expect_sz;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || retval != XDP_DROP,
+ "ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
+
+ expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
+ buf, &size, &retval, &duration);
+ CHECK(err || retval != XDP_TX || size != expect_sz,
+ "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
+ err, errno, retval, size, expect_sz);
+
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_tail_grow2(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.o";
+ char buf[4096]; /* avoid segfault: large buf to hold grow results */
+ int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
+ struct bpf_object *obj;
+ int err, cnt, i;
+ int max_grow;
+
+ struct bpf_prog_test_run_attr tattr = {
+ .repeat = 1,
+ .data_in = &buf,
+ .data_out = &buf,
+ .data_size_in = 0, /* Per test */
+ .data_size_out = 0, /* Per test */
+ };
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
+ if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ return;
+
+ /* Test case-64 */
+ memset(buf, 1, sizeof(buf));
+ tattr.data_size_in = 64; /* Determine test case via pkt size */
+ tattr.data_size_out = 128; /* Limit copy_size */
+ /* Kernel side alloc packet memory area that is zero init */
+ err = bpf_prog_test_run_xattr(&tattr);
+
+ CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
+ || tattr.retval != XDP_TX
+ || tattr.data_size_out != 192, /* Expected grow size */
+ "case-64",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, tattr.retval, tattr.data_size_out);
+
+ /* Extra checks for data contents */
+ CHECK_ATTR(tattr.data_size_out != 192
+ || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */
+ || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */
+ || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */
+ "case-64-data",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, tattr.retval, tattr.data_size_out);
+
+ /* Test case-128 */
+ memset(buf, 2, sizeof(buf));
+ tattr.data_size_in = 128; /* Determine test case via pkt size */
+ tattr.data_size_out = sizeof(buf); /* Copy everything */
+ err = bpf_prog_test_run_xattr(&tattr);
+
+ max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
+ CHECK_ATTR(err
+ || tattr.retval != XDP_TX
+ || tattr.data_size_out != max_grow,/* Expect max grow size */
+ "case-128",
+ "err %d errno %d retval %d size %d expect-size %d\n",
+ err, errno, tattr.retval, tattr.data_size_out, max_grow);
+
+ /* Extra checks for data content: Count grow size, will contain zeros */
+ for (i = 0, cnt = 0; i < sizeof(buf); i++) {
+ if (buf[i] == 0)
+ cnt++;
+ }
+ CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
+ || tattr.data_size_out != max_grow, /* Total grow size */
+ "case-128-data",
+ "err %d errno %d retval %d size %d grow-size %d\n",
+ err, errno, tattr.retval, tattr.data_size_out, cnt);
+
bpf_object__close(obj);
}
+
+void test_xdp_adjust_tail(void)
+{
+ if (test__start_subtest("xdp_adjust_tail_shrink"))
+ test_xdp_adjust_tail_shrink();
+ if (test__start_subtest("xdp_adjust_tail_grow"))
+ test_xdp_adjust_tail_grow();
+ if (test__start_subtest("xdp_adjust_tail_grow2"))
+ test_xdp_adjust_tail_grow2();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index a0f688c37023..2c6c570b21f8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
#include <net/if.h>
#include "test_xdp.skel.h"
#include "test_xdp_bpf2bpf.skel.h"
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
new file mode 100644
index 000000000000..0176573fe4e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_with_cpumap_helpers.skel.h"
+
+#define IFINDEX_LO 1
+
+void test_xdp_with_cpumap_helpers(void)
+{
+ struct test_xdp_with_cpumap_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_cpumap_val val = {
+ .qsize = 192,
+ };
+ __u32 duration = 0, idx = 0;
+ __u32 len = sizeof(info);
+ int err, prog_fd, map_fd;
+
+ skel = test_xdp_with_cpumap_helpers__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_xdp_with_cpumap_helpers__open_and_load");
+ return;
+ }
+
+ /* can not attach program with cpumaps that allow programs
+ * as xdp generic
+ */
+ prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
+ "should have failed\n");
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+ if (CHECK_FAIL(err))
+ goto out_close;
+
+ val.bpf_prog.fd = prog_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
+ err, errno);
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
+ CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
+ "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+ /* can not attach BPF_XDP_CPUMAP program to a device */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
+ "should have failed\n");
+
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
+ "should have failed\n");
+
+out_close:
+ test_xdp_with_cpumap_helpers__destroy(skel);
+}
+
+void test_xdp_cpumap_attach(void)
+{
+ if (test__start_subtest("cpumap_with_progs"))
+ test_xdp_with_cpumap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
new file mode 100644
index 000000000000..88ef3ec8ac4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_helpers.skel.h"
+
+#define IFINDEX_LO 1
+
+void test_xdp_with_devmap_helpers(void)
+{
+ struct test_xdp_with_devmap_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_devmap_val val = {
+ .ifindex = IFINDEX_LO,
+ };
+ __u32 len = sizeof(info);
+ __u32 duration = 0, idx = 0;
+ int err, dm_fd, map_fd;
+
+
+ skel = test_xdp_with_devmap_helpers__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_xdp_with_devmap_helpers__open_and_load");
+ return;
+ }
+
+ /* can not attach program with DEVMAPs that allow programs
+ * as xdp generic
+ */
+ dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Generic attach of program with 8-byte devmap",
+ "should have failed\n");
+
+ dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+ map_fd = bpf_map__fd(skel->maps.dm_ports);
+ err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
+ if (CHECK_FAIL(err))
+ goto out_close;
+
+ val.bpf_prog.fd = dm_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err, "Add program to devmap entry",
+ "err %d errno %d\n", err, errno);
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
+ CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
+ "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+ /* can not attach BPF_XDP_DEVMAP program to a device */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
+ "should have failed\n");
+
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
+ "should have failed\n");
+
+out_close:
+ test_xdp_with_devmap_helpers__destroy(skel);
+}
+
+void test_neg_xdp_devmap_helpers(void)
+{
+ struct test_xdp_devmap_helpers *skel;
+ __u32 duration = 0;
+
+ skel = test_xdp_devmap_helpers__open_and_load();
+ if (CHECK(skel,
+ "Load of XDP program accessing egress ifindex without attach type",
+ "should have failed\n")) {
+ test_xdp_devmap_helpers__destroy(skel);
+ }
+}
+
+
+void test_xdp_devmap_attach(void)
+{
+ if (test__start_subtest("DEVMAP with programs in entries"))
+ test_xdp_with_devmap_helpers();
+
+ if (test__start_subtest("Verifier check of DEVMAP programs"))
+ test_neg_xdp_devmap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
new file mode 100644
index 000000000000..6f814999b395
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <uapi/linux/if_link.h>
+#include <test_progs.h>
+#include "test_xdp_link.skel.h"
+
+#define IFINDEX_LO 1
+
+void test_xdp_link(void)
+{
+ __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
+ DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
+ struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+ struct bpf_link_info link_info;
+ struct bpf_prog_info prog_info;
+ struct bpf_link *link;
+ __u32 link_info_len = sizeof(link_info);
+ __u32 prog_info_len = sizeof(prog_info);
+
+ skel1 = test_xdp_link__open_and_load();
+ if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n"))
+ goto cleanup;
+ prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
+
+ skel2 = test_xdp_link__open_and_load();
+ if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n"))
+ goto cleanup;
+ prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
+ if (CHECK(err, "fd_info1", "failed %d\n", -errno))
+ goto cleanup;
+ id1 = prog_info.id;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
+ if (CHECK(err, "fd_info2", "failed %d\n", -errno))
+ goto cleanup;
+ id2 = prog_info.id;
+
+ /* set initial prog attachment */
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+ if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
+ goto cleanup;
+
+ /* validate prog ID */
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ CHECK(err || id0 != id1, "id1_check",
+ "loaded prog id %u != id1 %u, err %d", id0, id1, err);
+
+ /* BPF link is not allowed to replace prog attachment */
+ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ bpf_link__destroy(link);
+ /* best-effort detach prog */
+ opts.old_fd = prog_fd1;
+ bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+ goto cleanup;
+ }
+
+ /* detach BPF program */
+ opts.old_fd = prog_fd1;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+ if (CHECK(err, "prog_detach", "failed %d\n", err))
+ goto cleanup;
+
+ /* now BPF link should attach successfully */
+ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ skel1->links.xdp_handler = link;
+
+ /* validate prog ID */
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ if (CHECK(err || id0 != id1, "id1_check",
+ "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+ goto cleanup;
+
+ /* BPF prog attach is not allowed to replace BPF link */
+ opts.old_fd = prog_fd1;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+ if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
+ goto cleanup;
+
+ /* Can't force-update when BPF link is active */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
+ if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
+ goto cleanup;
+
+ /* Can't force-detach when BPF link is active */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
+ goto cleanup;
+
+ /* BPF link is not allowed to replace another BPF link */
+ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ bpf_link__destroy(skel1->links.xdp_handler);
+ skel1->links.xdp_handler = NULL;
+
+ /* new link attach should succeed */
+ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ skel2->links.xdp_handler = link;
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ if (CHECK(err || id0 != id2, "id2_check",
+ "loaded prog id %u != id2 %u, err %d", id0, id1, err))
+ goto cleanup;
+
+ /* updating program under active BPF link works as expected */
+ err = bpf_link__update_program(link, skel1->progs.xdp_handler);
+ if (CHECK(err, "link_upd", "failed: %d\n", err))
+ goto cleanup;
+
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+ if (CHECK(err, "link_info", "failed: %d\n", err))
+ goto cleanup;
+
+ CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
+ "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
+ CHECK(link_info.prog_id != id1, "link_prog_id",
+ "got %u != exp %u\n", link_info.prog_id, id1);
+ CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
+ "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+
+ err = bpf_link__detach(link);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto cleanup;
+
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+ if (CHECK(err, "link_info", "failed: %d\n", err))
+ goto cleanup;
+ CHECK(link_info.prog_id != id1, "link_prog_id",
+ "got %u != exp %u\n", link_info.prog_id, id1);
+ /* ifindex should be zeroed out */
+ CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
+ "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+
+cleanup:
+ test_xdp_link__destroy(skel1);
+ test_xdp_link__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index c9404e6b226e..0281095de266 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_noinline.skel.h"
void test_xdp_noinline(void)
{
- const char *file = "./test_xdp_noinline.o";
unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct test_xdp_noinline *skel;
struct vip key = {.protocol = 6};
struct vip_meta {
__u32 flags;
@@ -23,59 +25,43 @@ void test_xdp_noinline(void)
__u8 flags;
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
- __u32 duration, retval, size;
- int err, i, prog_fd, map_fd;
+ __u32 duration = 0, retval, size;
+ int err, i;
__u64 bytes = 0, pkts = 0;
- struct bpf_object *obj;
char buf[128];
u32 *magic = (u32 *)buf;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ skel = test_xdp_noinline__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "failed\n"))
return;
- map_fd = bpf_find_map(__func__, obj, "vip_map");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &key, &value, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
- map_fd = bpf_find_map(__func__, obj, "ch_rings");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
-
- map_fd = bpf_find_map(__func__, obj, "reals");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
-
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4),
+ NUM_ITER, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
CHECK(err || retval != 1 || size != 54 ||
*magic != MAGIC_VAL, "ipv4",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6),
+ NUM_ITER, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
CHECK(err || retval != 1 || size != 74 ||
*magic != MAGIC_VAL, "ipv6",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
- map_fd = bpf_find_map(__func__, obj, "stats");
- if (map_fd < 0)
- goto out;
- bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
for (i = 0; i < nr_cpus; i++) {
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
- pkts != NUM_ITER * 2)) {
- printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
- bytes, pkts);
- }
-out:
- bpf_object__close(obj);
+ CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2,
+ "stats", "bytes %lld pkts %lld\n",
+ (unsigned long long)bytes, (unsigned long long)pkts);
+ test_xdp_noinline__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index 7897c8f4d363..6939bfd8690f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -15,6 +15,8 @@
*/
#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
@@ -480,10 +482,9 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
if (hystart_detect & HYSTART_DELAY) {
/* obtain the minimum delay of more than sampling packets */
+ if (ca->curr_rtt > delay)
+ ca->curr_rtt = delay;
if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
- if (ca->curr_rtt > delay)
- ca->curr_rtt = delay;
-
ca->sample_cnt++;
} else {
if (ca->curr_rtt > ca->delay_min +
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 3fb4260570b1..4dc1a967776a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -9,6 +9,8 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 9941f0ba471e..5a65f6b51377 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -20,20 +20,20 @@
#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
-#define PROG(F) SEC(#F) int bpf_func_##F
+#define PROG(F) PROG_(F, _##F)
+#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME
/* These are the identifiers of the BPF programs that will be used in tail
* calls. Name is limited to 16 characters, with the terminating character and
* bpf_func_ above, we have only 6 to work with, anything after will be cropped.
*/
-enum {
- IP,
- IPV6,
- IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
- IPV6FR, /* Fragmentation IPv6 Extension Header */
- MPLS,
- VLAN,
-};
+#define IP 0
+#define IPV6 1
+#define IPV6OP 2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
+#define IPV6FR 3 /* Fragmentation IPv6 Extension Header */
+#define MPLS 4
+#define VLAN 5
+#define MAX_PROG 6
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
@@ -59,7 +59,7 @@ struct frag_hdr {
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
- __uint(max_entries, 8);
+ __uint(max_entries, MAX_PROG);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
@@ -118,18 +118,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
switch (proto) {
case bpf_htons(ETH_P_IP):
- bpf_tail_call(skb, &jmp_table, IP);
+ bpf_tail_call_static(skb, &jmp_table, IP);
break;
case bpf_htons(ETH_P_IPV6):
- bpf_tail_call(skb, &jmp_table, IPV6);
+ bpf_tail_call_static(skb, &jmp_table, IPV6);
break;
case bpf_htons(ETH_P_MPLS_MC):
case bpf_htons(ETH_P_MPLS_UC):
- bpf_tail_call(skb, &jmp_table, MPLS);
+ bpf_tail_call_static(skb, &jmp_table, MPLS);
break;
case bpf_htons(ETH_P_8021Q):
case bpf_htons(ETH_P_8021AD):
- bpf_tail_call(skb, &jmp_table, VLAN);
+ bpf_tail_call_static(skb, &jmp_table, VLAN);
break;
default:
/* Protocol not supported */
@@ -246,10 +246,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
- bpf_tail_call(skb, &jmp_table, IPV6OP);
+ bpf_tail_call_static(skb, &jmp_table, IPV6OP);
break;
case IPPROTO_FRAGMENT:
- bpf_tail_call(skb, &jmp_table, IPV6FR);
+ bpf_tail_call_static(skb, &jmp_table, IPV6FR);
break;
default:
return parse_ip_proto(skb, nexthdr);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
new file mode 100644
index 000000000000..6a1255465fd6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
+#define bpf_iter__tcp bpf_iter__tcp___not_used
+#define tcp6_sock tcp6_sock___not_used
+#define bpf_iter__udp bpf_iter__udp___not_used
+#define udp6_sock udp6_sock___not_used
+#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
+#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
+#define bpf_iter__sockmap bpf_iter__sockmap___not_used
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#undef bpf_iter__ipv6_route
+#undef bpf_iter__netlink
+#undef bpf_iter__task
+#undef bpf_iter__task_file
+#undef bpf_iter__tcp
+#undef tcp6_sock
+#undef bpf_iter__udp
+#undef udp6_sock
+#undef bpf_iter__bpf_map_elem
+#undef bpf_iter__bpf_sk_storage_map
+#undef bpf_iter__sockmap
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+ struct bpf_iter_meta *meta;
+ struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+ struct bpf_iter_meta *meta;
+ struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+ __u32 fd;
+ struct file *file;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__tcp {
+ struct bpf_iter_meta *meta;
+ struct sock_common *sk_common;
+ uid_t uid;
+} __attribute__((preserve_access_index));
+
+struct tcp6_sock {
+ struct tcp_sock tcp;
+ struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__udp {
+ struct bpf_iter_meta *meta;
+ struct udp_sock *udp_sk;
+ uid_t uid __attribute__((aligned(8)));
+ int bucket __attribute__((aligned(8)));
+} __attribute__((preserve_access_index));
+
+struct udp6_sock {
+ struct udp_sock udp;
+ struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map_elem {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ void *key;
+ void *value;
+};
+
+struct bpf_iter__bpf_sk_storage_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ struct sock *sk;
+ void *value;
+};
+
+struct bpf_iter__sockmap {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ void *key;
+ struct sock *sk;
+};
+
+struct btf_ptr {
+ void *ptr;
+ __u32 type_id;
+ __u32 flags;
+};
+
+enum {
+ BTF_F_COMPACT = (1ULL << 0),
+ BTF_F_NONAME = (1ULL << 1),
+ BTF_F_PTR_RAW = (1ULL << 2),
+ BTF_F_ZERO = (1ULL << 3),
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
new file mode 100644
index 000000000000..6286023fd62b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap1 SEC(".maps");
+
+__u32 key_sum = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 *key = ctx->key;
+ __u64 *val = ctx->value;
+
+ if (key == (void *)0 || val == (void *)0)
+ return 0;
+
+ bpf_seq_write(ctx->meta->seq, key, sizeof(__u32));
+ bpf_seq_write(ctx->meta->seq, val, sizeof(__u64));
+ key_sum += *key;
+ val_sum += *val;
+ *val = *key;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
new file mode 100644
index 000000000000..6dfce3fd68bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u32);
+} hashmap3 SEC(".maps");
+
+/* will set before prog run */
+bool in_test_mode = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ __u32 seq_num = ctx->meta->seq_num;
+ struct bpf_map *map = ctx->map;
+ struct key_t *key = ctx->key;
+ struct key_t tmp_key;
+ __u64 *val = ctx->value;
+ __u64 tmp_val = 0;
+ int ret;
+
+ if (in_test_mode) {
+ /* test mode is used by selftests to
+ * test functionality of bpf_hash_map iter.
+ *
+ * the above hashmap1 will have correct size
+ * and will be accepted, hashmap2 and hashmap3
+ * should be rejected due to smaller key/value
+ * size.
+ */
+ if (key == (void *)0 || val == (void *)0)
+ return 0;
+
+ /* update the value and then delete the <key, value> pair.
+ * it should not impact the existing 'val' which is still
+ * accessible under rcu.
+ */
+ __builtin_memcpy(&tmp_key, key, sizeof(struct key_t));
+ ret = bpf_map_update_elem(&hashmap1, &tmp_key, &tmp_val, 0);
+ if (ret)
+ return 0;
+ ret = bpf_map_delete_elem(&hashmap1, &tmp_key);
+ if (ret)
+ return 0;
+
+ key_sum_a += key->a;
+ key_sum_b += key->b;
+ key_sum_c += key->c;
+ val_sum += *val;
+ return 0;
+ }
+
+ /* non-test mode, the map is prepared with the
+ * below bpftool command sequence:
+ * bpftool map create /sys/fs/bpf/m1 type hash \
+ * key 12 value 8 entries 3 name map1
+ * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 1 \
+ * value 0 0 0 1 0 0 0 1
+ * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 2 \
+ * value 0 0 0 1 0 0 0 2
+ * The bpftool iter command line:
+ * bpftool iter pin ./bpf_iter_bpf_hash_map.o /sys/fs/bpf/p1 \
+ * map id 77
+ * The below output will be:
+ * map dump starts
+ * 77: (1000000 0 2000000) (200000001000000)
+ * 77: (1000000 0 1000000) (100000001000000)
+ * map dump ends
+ */
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "map dump starts\n");
+
+ if (key == (void *)0 || val == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "map dump ends\n");
+ return 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%d: (%x %d %x) (%llx)\n", map->id,
+ key->a, key->b, key->c, *val);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
new file mode 100644
index 000000000000..08651b23edba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ __u64 seq_num = ctx->meta->seq_num;
+ struct bpf_map *map = ctx->map;
+
+ if (map == (void *)0) {
+ BPF_SEQ_PRINTF(seq, " %%%%%% END %%%%%%\n");
+ return 0;
+ }
+
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " id refcnt usercnt locked_vm\n");
+
+ BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
+ map->usercnt.counter,
+ map->memory.user->locked_vm.counter);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
new file mode 100644
index 000000000000..85fa710fad90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u32);
+} arraymap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __u32 num_cpus = 0;
+
+__u32 key_sum = 0, val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 *key = ctx->key;
+ void *pptr = ctx->value;
+ __u32 step;
+ int i;
+
+ if (key == (void *)0 || pptr == (void *)0)
+ return 0;
+
+ key_sum += *key;
+
+ step = 8;
+ for (i = 0; i < num_cpus; i++) {
+ val_sum += *(__u32 *)pptr;
+ pptr += step;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
new file mode 100644
index 000000000000..feaaa2b89c57
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u32);
+} hashmap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __u32 num_cpus = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u32 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ struct key_t *key = ctx->key;
+ void *pptr = ctx->value;
+ __u32 step;
+ int i;
+
+ if (key == (void *)0 || pptr == (void *)0)
+ return 0;
+
+ key_sum_a += key->a;
+ key_sum_b += key->b;
+ key_sum_c += key->c;
+
+ step = 8;
+ for (i = 0; i < num_cpus; i++) {
+ val_sum += *(__u32 *)pptr;
+ pptr += step;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
new file mode 100644
index 000000000000..6b70ccaba301
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_stg_map SEC(".maps");
+
+__u32 val_sum = 0;
+__u32 ipv6_sk_count = 0;
+
+SEC("iter/bpf_sk_storage_map")
+int dump_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 *val = ctx->value;
+
+ if (sk == (void *)0 || val == (void *)0)
+ return 0;
+
+ if (sk->sk_family == AF_INET6)
+ ipv6_sk_count++;
+
+ val_sum += *val;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
new file mode 100644
index 000000000000..d58d9f1642b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
+
+SEC("iter/ipv6_route")
+int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct fib6_info *rt = ctx->rt;
+ const struct net_device *dev;
+ struct fib6_nh *fib6_nh;
+ unsigned int flags;
+ struct nexthop *nh;
+
+ if (rt == (void *)0)
+ return 0;
+
+ fib6_nh = &rt->fib6_nh[0];
+ flags = rt->fib6_flags;
+
+ /* FIXME: nexthop_is_multipath is not handled here. */
+ nh = rt->nh;
+ if (rt->nh)
+ fib6_nh = &nh->nh_info->fib6_nh;
+
+ BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
+
+ if (CONFIG_IPV6_SUBTREES)
+ BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_src.addr,
+ rt->fib6_src.plen);
+ else
+ BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 00 ");
+
+ if (fib6_nh->fib_nh_gw_family) {
+ flags |= RTF_GATEWAY;
+ BPF_SEQ_PRINTF(seq, "%pi6 ", &fib6_nh->fib_nh_gw6);
+ } else {
+ BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 ");
+ }
+
+ dev = fib6_nh->fib_nh_dev;
+ if (dev)
+ BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric,
+ rt->fib6_ref.refs.counter, 0, flags, dev->name);
+ else
+ BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x\n", rt->fib6_metric,
+ rt->fib6_ref.refs.counter, 0, flags);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
new file mode 100644
index 000000000000..95989f4c99b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static __attribute__((noinline)) struct inode *SOCK_INODE(struct socket *socket)
+{
+ return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
+}
+
+SEC("iter/netlink")
+int dump_netlink(struct bpf_iter__netlink *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct netlink_sock *nlk = ctx->sk;
+ unsigned long group, ino;
+ struct inode *inode;
+ struct socket *sk;
+ struct sock *s;
+
+ if (nlk == (void *)0)
+ return 0;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "sk Eth Pid Groups "
+ "Rmem Wmem Dump Locks Drops "
+ "Inode\n");
+
+ s = &nlk->sk;
+ BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
+
+ if (!nlk->groups) {
+ group = 0;
+ } else {
+ /* FIXME: temporary use bpf_probe_read_kernel here, needs
+ * verifier support to do direct access.
+ */
+ bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]);
+ }
+ BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
+ nlk->portid, (u32)group,
+ s->sk_rmem_alloc.counter,
+ s->sk_wmem_alloc.refs.counter - 1,
+ nlk->cb_running, s->sk_refcnt.refs.counter);
+
+ sk = s->sk_socket;
+ if (!sk) {
+ ino = 0;
+ } else {
+ /* FIXME: container_of inside SOCK_INODE has a forced
+ * type conversion, and direct access cannot be used
+ * with current verifier.
+ */
+ inode = SOCK_INODE(sk);
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ }
+ BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
new file mode 100644
index 000000000000..f3af0e30cead
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Cloudflare */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 64);
+ __type(key, __u32);
+ __type(value, __u64);
+} sockmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 64);
+ __type(key, __u32);
+ __type(value, __u64);
+} sockhash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 64);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst SEC(".maps");
+
+__u32 elems = 0;
+__u32 socks = 0;
+
+SEC("iter/sockmap")
+int copy(struct bpf_iter__sockmap *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 tmp, *key = ctx->key;
+ int ret;
+
+ if (!key)
+ return 0;
+
+ elems++;
+
+ /* We need a temporary buffer on the stack, since the verifier doesn't
+ * let us use the pointer from the context as an argument to the helper.
+ */
+ tmp = *key;
+
+ if (sk) {
+ socks++;
+ return bpf_map_update_elem(&dst, &tmp, sk, 0) != 0;
+ }
+
+ ret = bpf_map_delete_elem(&dst, &tmp);
+ return ret && ret != -ENOENT;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
new file mode 100644
index 000000000000..4983087852a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+
+ if (task == (void *)0) {
+ BPF_SEQ_PRINTF(seq, " === END ===\n");
+ return 0;
+ }
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
new file mode 100644
index 000000000000..a1ddc36f13ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+long tasks = 0;
+long seq_err = 0;
+bool skip = false;
+
+SEC("iter/task")
+int dump_task_struct(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ static struct btf_ptr ptr = { };
+ long ret;
+
+#if __has_builtin(__builtin_btf_type_id)
+ ptr.type_id = bpf_core_type_id_kernel(struct task_struct);
+ ptr.ptr = task;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "Raw BTF task\n");
+
+ ret = bpf_seq_printf_btf(seq, &ptr, sizeof(ptr), 0);
+ switch (ret) {
+ case 0:
+ tasks++;
+ break;
+ case -ERANGE:
+ /* NULL task or task->fs, don't count it as an error. */
+ break;
+ case -E2BIG:
+ return 1;
+ default:
+ seq_err = ret;
+ break;
+ }
+#else
+ skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
new file mode 100644
index 000000000000..b2f7c7c5f952
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int count = 0;
+int tgid = 0;
+
+SEC("iter/task_file")
+int dump_task_file(struct bpf_iter__task_file *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ __u32 fd = ctx->fd;
+ struct file *file = ctx->file;
+
+ if (task == (void *)0 || file == (void *)0)
+ return 0;
+
+ if (ctx->meta->seq_num == 0) {
+ count = 0;
+ BPF_SEQ_PRINTF(seq, " tgid gid fd file\n");
+ }
+
+ if (tgid == task->tgid && task->tgid != task->pid)
+ count++;
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+ (long)file->f_op);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
new file mode 100644
index 000000000000..50e59a2e142e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_STACK_TRACE_DEPTH 64
+unsigned long entries[MAX_STACK_TRACE_DEPTH] = {};
+#define SIZE_OF_ULONG (sizeof(unsigned long))
+
+SEC("iter/task")
+int dump_task_stack(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ long i, retlen;
+
+ if (task == (void *)0)
+ return 0;
+
+ retlen = bpf_get_task_stack(task, entries,
+ MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0);
+ if (retlen < 0)
+ return 0;
+
+ BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid,
+ retlen / SIZE_OF_ULONG);
+ for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
+ if (retlen > i * SIZE_OF_ULONG)
+ BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
+ }
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
new file mode 100644
index 000000000000..54380c5e1069
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+ return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+ return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ 100
+#define NSEC_PER_SEC 1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+ /* The implementation here tailored to a particular
+ * setting of USER_HZ.
+ */
+ u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+ u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+ if ((tick_nsec % user_hz_nsec) == 0) {
+ if (CONFIG_HZ < USER_HZ)
+ return x * (USER_HZ / CONFIG_HZ);
+ else
+ return x / (CONFIG_HZ / USER_HZ);
+ }
+ return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+ if (delta <= 0)
+ return 0;
+
+ return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+ return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+ return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
+ uid_t uid, __u32 seq_num)
+{
+ const struct inet_connection_sock *icsk;
+ const struct fastopen_queue *fastopenq;
+ const struct inet_sock *inet;
+ unsigned long timer_expires;
+ const struct sock *sp;
+ __u16 destp, srcp;
+ __be32 dest, src;
+ int timer_active;
+ int rx_queue;
+ int state;
+
+ icsk = &tp->inet_conn;
+ inet = &icsk->icsk_inet;
+ sp = &inet->sk;
+ fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+ dest = inet->inet_daddr;
+ src = inet->inet_rcv_saddr;
+ destp = bpf_ntohs(inet->inet_dport);
+ srcp = bpf_ntohs(inet->inet_sport);
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ timer_active = 1;
+ timer_expires = icsk->icsk_timeout;
+ } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_active = 4;
+ timer_expires = icsk->icsk_timeout;
+ } else if (timer_pending(&sp->sk_timer)) {
+ timer_active = 2;
+ timer_expires = sp->sk_timer.expires;
+ } else {
+ timer_active = 0;
+ timer_expires = bpf_jiffies64();
+ }
+
+ state = sp->sk_state;
+ if (state == TCP_LISTEN) {
+ rx_queue = sp->sk_ack_backlog;
+ } else {
+ rx_queue = tp->rcv_nxt - tp->copied_seq;
+ if (rx_queue < 0)
+ rx_queue = 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, src, srcp, destp, destp);
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+ state,
+ tp->write_seq - tp->snd_una, rx_queue,
+ timer_active,
+ jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+ icsk->icsk_retransmits, uid,
+ icsk->icsk_probes_out,
+ sock_i_ino(sp),
+ sp->sk_refcnt.refs.counter);
+ BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+ tp,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+ tp->snd_cwnd,
+ state == TCP_LISTEN ? fastopenq->max_qlen
+ : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
+ );
+
+ return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_timewait_sock *tw = &ttw->tw_sk;
+ __u16 destp, srcp;
+ __be32 dest, src;
+ long delta;
+
+ delta = tw->tw_timer.expires - bpf_jiffies64();
+ dest = tw->tw_daddr;
+ src = tw->tw_rcv_saddr;
+ destp = bpf_ntohs(tw->tw_dport);
+ srcp = bpf_ntohs(tw->tw_sport);
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, src, srcp, dest, destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ tw->tw_substate, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+ tw->tw_refcnt.refs.counter, tw);
+
+ return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_request_sock *irsk = &treq->req;
+ struct request_sock *req = &irsk->req;
+ long ttd;
+
+ ttd = req->rsk_timer.expires - bpf_jiffies64();
+
+ if (ttd < 0)
+ ttd = 0;
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, irsk->ir_loc_addr,
+ irsk->ir_num, irsk->ir_rmt_addr,
+ bpf_ntohs(irsk->ir_rmt_port));
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+ req->num_timeout, uid, 0, 0, 0, req);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp4(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ struct seq_file *seq = ctx->meta->seq;
+ struct tcp_timewait_sock *tw;
+ struct tcp_request_sock *req;
+ struct tcp_sock *tp;
+ uid_t uid = ctx->uid;
+ __u32 seq_num;
+
+ if (sk_common == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " sl "
+ "local_address "
+ "rem_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode\n");
+
+ if (sk_common->skc_family != AF_INET)
+ return 0;
+
+ tp = bpf_skc_to_tcp_sock(sk_common);
+ if (tp)
+ return dump_tcp_sock(seq, tp, uid, seq_num);
+
+ tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+ if (tw)
+ return dump_tw_sock(seq, tw, uid, seq_num);
+
+ req = bpf_skc_to_tcp_request_sock(sk_common);
+ if (req)
+ return dump_req_sock(seq, req, uid, seq_num);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
new file mode 100644
index 000000000000..b4fbddfa4e10
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+ return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+ return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ 100
+#define NSEC_PER_SEC 1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+ /* The implementation here tailored to a particular
+ * setting of USER_HZ.
+ */
+ u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+ u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+ if ((tick_nsec % user_hz_nsec) == 0) {
+ if (CONFIG_HZ < USER_HZ)
+ return x * (USER_HZ / CONFIG_HZ);
+ else
+ return x / (CONFIG_HZ / USER_HZ);
+ }
+ return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+ if (delta <= 0)
+ return 0;
+
+ return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+ return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+ return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
+ uid_t uid, __u32 seq_num)
+{
+ const struct inet_connection_sock *icsk;
+ const struct fastopen_queue *fastopenq;
+ const struct in6_addr *dest, *src;
+ const struct inet_sock *inet;
+ unsigned long timer_expires;
+ const struct sock *sp;
+ __u16 destp, srcp;
+ int timer_active;
+ int rx_queue;
+ int state;
+
+ icsk = &tp->tcp.inet_conn;
+ inet = &icsk->icsk_inet;
+ sp = &inet->sk;
+ fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
+ destp = bpf_ntohs(inet->inet_dport);
+ srcp = bpf_ntohs(inet->inet_sport);
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ timer_active = 1;
+ timer_expires = icsk->icsk_timeout;
+ } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_active = 4;
+ timer_expires = icsk->icsk_timeout;
+ } else if (timer_pending(&sp->sk_timer)) {
+ timer_active = 2;
+ timer_expires = sp->sk_timer.expires;
+ } else {
+ timer_active = 0;
+ timer_expires = bpf_jiffies64();
+ }
+
+ state = sp->sk_state;
+ if (state == TCP_LISTEN) {
+ rx_queue = sp->sk_ack_backlog;
+ } else {
+ rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
+ if (rx_queue < 0)
+ rx_queue = 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+ state,
+ tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
+ timer_active,
+ jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+ icsk->icsk_retransmits, uid,
+ icsk->icsk_probes_out,
+ sock_i_ino(sp),
+ sp->sk_refcnt.refs.counter);
+ BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+ tp,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+ tp->tcp.snd_cwnd,
+ state == TCP_LISTEN ? fastopenq->max_qlen
+ : (tcp_in_initial_slowstart(&tp->tcp) ? -1
+ : tp->tcp.snd_ssthresh)
+ );
+
+ return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_timewait_sock *tw = &ttw->tw_sk;
+ const struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ long delta;
+
+ delta = tw->tw_timer.expires - bpf_jiffies64();
+ dest = &tw->tw_v6_daddr;
+ src = &tw->tw_v6_rcv_saddr;
+ destp = bpf_ntohs(tw->tw_dport);
+ srcp = bpf_ntohs(tw->tw_sport);
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ tw->tw_substate, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+ tw->tw_refcnt.refs.counter, tw);
+
+ return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_request_sock *irsk = &treq->req;
+ struct request_sock *req = &irsk->req;
+ struct in6_addr *src, *dest;
+ long ttd;
+
+ ttd = req->rsk_timer.expires - bpf_jiffies64();
+ src = &irsk->ir_v6_loc_addr;
+ dest = &irsk->ir_v6_rmt_addr;
+
+ if (ttd < 0)
+ ttd = 0;
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3],
+ irsk->ir_num,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3],
+ bpf_ntohs(irsk->ir_rmt_port));
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+ req->num_timeout, uid, 0, 0, 0, req);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp6(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ struct seq_file *seq = ctx->meta->seq;
+ struct tcp_timewait_sock *tw;
+ struct tcp_request_sock *req;
+ struct tcp6_sock *tp;
+ uid_t uid = ctx->uid;
+ __u32 seq_num;
+
+ if (sk_common == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " sl "
+ "local_address "
+ "remote_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode\n");
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ tp = bpf_skc_to_tcp6_sock(sk_common);
+ if (tp)
+ return dump_tcp6_sock(seq, tp, uid, seq_num);
+
+ tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+ if (tw)
+ return dump_tw_sock(seq, tw, uid, seq_num);
+
+ req = bpf_skc_to_tcp_request_sock(sk_common);
+ if (req)
+ return dump_req_sock(seq, req, uid, seq_num);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
new file mode 100644
index 000000000000..c71a7c283108
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'a'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
new file mode 100644
index 000000000000..8bdc8dc07444
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'A'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
new file mode 100644
index 000000000000..2a4647f20c46
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ int tgid;
+
+ tgid = task->tgid;
+ bpf_seq_write(seq, &tgid, sizeof(tgid));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
new file mode 100644
index 000000000000..ee49493dc125
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 map1_id = 0, map2_id = 0;
+__u32 map1_accessed = 0, map2_accessed = 0;
+__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
+
+static volatile const __u32 print_len;
+static volatile const __u32 ret1;
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct bpf_map *map = ctx->map;
+ __u64 seq_num;
+ int i, ret = 0;
+
+ if (map == (void *)0)
+ return 0;
+
+ /* only dump map1_id and map2_id */
+ if (map->id != map1_id && map->id != map2_id)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (map->id == map1_id) {
+ map1_seqnum = seq_num;
+ map1_accessed++;
+ }
+
+ if (map->id == map2_id) {
+ if (map2_accessed == 0) {
+ map2_seqnum1 = seq_num;
+ if (ret1)
+ ret = 1;
+ } else {
+ map2_seqnum2 = seq_num;
+ }
+ map2_accessed++;
+ }
+
+ /* fill seq_file buffer */
+ for (i = 0; i < print_len; i++)
+ bpf_seq_write(seq, &seq_num, sizeof(seq_num));
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
new file mode 100644
index 000000000000..e3a7575e81d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+__u32 key_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ void *key = ctx->key;
+
+ if (key == (void *)0)
+ return 0;
+
+ /* out of bound access w.r.t. hashmap1 */
+ key_sum += *(__u32 *)(key + sizeof(struct key_t));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
new file mode 100644
index 000000000000..1c7304f56b1e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ void *value = ctx->value;
+
+ if (value == (void *)0)
+ return 0;
+
+ /* negative offset, verifier failure. */
+ value_sum += *(__u32 *)(value - 4);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
new file mode 100644
index 000000000000..d5e3df66ad9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+int count = 0;
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ char c;
+
+ if (count < 4) {
+ c = START_CHAR + count;
+ bpf_seq_write(seq, &c, sizeof(c));
+ count++;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
new file mode 100644
index 000000000000..f258583afbbd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/udp")
+int dump_udp4(struct bpf_iter__udp *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct inet_sock *inet;
+ __u16 srcp, destp;
+ __be32 dest, src;
+ __u32 seq_num;
+ int rqueue;
+
+ if (udp_sk == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq,
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout "
+ "inode ref pointer drops\n");
+
+ /* filter out udp6 sockets */
+ inet = &udp_sk->inet;
+ if (inet->sk.sk_family == AF_INET6)
+ return 0;
+
+ inet = &udp_sk->inet;
+ dest = inet->inet_daddr;
+ src = inet->inet_rcv_saddr;
+ srcp = bpf_ntohs(inet->inet_sport);
+ destp = bpf_ntohs(inet->inet_dport);
+ rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+
+ BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ",
+ ctx->bucket, src, srcp, dest, destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+ inet->sk.sk_state,
+ inet->sk.sk_wmem_alloc.refs.counter - 1,
+ rqueue,
+ 0, 0L, 0, ctx->uid, 0,
+ sock_i_ino(&inet->sk),
+ inet->sk.sk_refcnt.refs.counter, udp_sk,
+ inet->sk.sk_drops.counter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
new file mode 100644
index 000000000000..65f93bb03f0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define IPV6_SEQ_DGRAM_HEADER \
+ " sl " \
+ "local_address " \
+ "remote_address " \
+ "st tx_queue rx_queue tr tm->when retrnsmt" \
+ " uid timeout inode ref pointer drops\n"
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/udp")
+int dump_udp6(struct bpf_iter__udp *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ const struct in6_addr *dest, *src;
+ struct udp6_sock *udp6_sk;
+ struct inet_sock *inet;
+ __u16 srcp, destp;
+ __u32 seq_num;
+ int rqueue;
+
+ if (udp_sk == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER);
+
+ udp6_sk = bpf_skc_to_udp6_sock(udp_sk);
+ if (udp6_sk == (void *)0)
+ return 0;
+
+ inet = &udp_sk->inet;
+ srcp = bpf_ntohs(inet->inet_sport);
+ destp = bpf_ntohs(inet->inet_dport);
+ rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+ dest = &inet->sk.sk_v6_daddr;
+ src = &inet->sk.sk_v6_rcv_saddr;
+
+ BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ ctx->bucket,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+ inet->sk.sk_state,
+ inet->sk.sk_wmem_alloc.refs.counter - 1,
+ rqueue,
+ 0, 0L, 0, ctx->uid, 0,
+ sock_i_ino(&inet->sk),
+ inet->sk.sk_refcnt.refs.counter, udp_sk,
+ inet->sk.sk_drops.counter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
new file mode 100644
index 000000000000..01378911252b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_NET_H__
+#define __BPF_TRACING_NET_H__
+
+#define AF_INET 2
+#define AF_INET6 10
+
+#define ICSK_TIME_RETRANS 1
+#define ICSK_TIME_PROBE0 3
+#define ICSK_TIME_LOSS_PROBE 5
+#define ICSK_TIME_REO_TIMEOUT 6
+
+#define IFNAMSIZ 16
+
+#define RTF_GATEWAY 0x0002
+
+#define TCP_INFINITE_SSTHRESH 0x7fffffff
+#define TCP_PINGPONG_THRESH 3
+
+#define fib_nh_dev nh_common.nhc_dev
+#define fib_nh_gw_family nh_common.nhc_gw_family
+#define fib_nh_gw6 nh_common.nhc_gw.ipv6
+
+#define inet_daddr sk.__sk_common.skc_daddr
+#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
+#define inet_dport sk.__sk_common.skc_dport
+
+#define ir_loc_addr req.__req_common.skc_rcv_saddr
+#define ir_num req.__req_common.skc_num
+#define ir_rmt_addr req.__req_common.skc_daddr
+#define ir_rmt_port req.__req_common.skc_dport
+#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
+#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
+
+#define sk_family __sk_common.skc_family
+#define sk_rmem_alloc sk_backlog.rmem_alloc
+#define sk_refcnt __sk_common.skc_refcnt
+#define sk_state __sk_common.skc_state
+#define sk_v6_daddr __sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
+
+#define s6_addr32 in6_u.u6_addr32
+
+#define tw_daddr __tw_common.skc_daddr
+#define tw_rcv_saddr __tw_common.skc_rcv_saddr
+#define tw_dport __tw_common.skc_dport
+#define tw_refcnt __tw_common.skc_refcnt
+#define tw_v6_daddr __tw_common.skc_v6_daddr
+#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
new file mode 100644
index 000000000000..48e62f3f074f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
new file mode 100644
index 000000000000..53e5e5a76888
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
new file mode 100644
index 000000000000..d024fb2ac06e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
new file mode 100644
index 000000000000..9de6595d250c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
new file mode 100644
index 000000000000..f3e9904df9c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___err_ambiguous1 x,
+ struct core_reloc_size___err_ambiguous2 y) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
new file mode 100644
index 000000000000..fc3f69e58c71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
new file mode 100644
index 000000000000..51511648b4ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___all_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
new file mode 100644
index 000000000000..67db3dceb279
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
new file mode 100644
index 000000000000..b357fc65431d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___fn_wrong_args x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
new file mode 100644
index 000000000000..8ddf20d33d9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___incompat x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
new file mode 100644
index 000000000000..abbe5bddcefd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
new file mode 100644
index 000000000000..24e7caf4f013
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id___missing_targets x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_data.c b/tools/testing/selftests/bpf/progs/btf_data.c
new file mode 100644
index 000000000000..baa525275bde
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_data.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+struct S {
+ int a;
+ int b;
+ int c;
+};
+
+union U {
+ int a;
+ int b;
+ int c;
+};
+
+struct S1 {
+ int a;
+ int b;
+ int c;
+};
+
+union U1 {
+ int a;
+ int b;
+ int c;
+};
+
+typedef int T;
+typedef int S;
+typedef int U;
+typedef int T1;
+typedef int S1;
+typedef int U1;
+
+struct root_struct {
+ S m_1;
+ T m_2;
+ U m_3;
+ S1 m_4;
+ T1 m_5;
+ U1 m_6;
+ struct S m_7;
+ struct S1 m_8;
+ union U m_9;
+ union U1 m_10;
+};
+
+int func(struct root_struct *root)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_ptr.h b/tools/testing/selftests/bpf/progs/btf_ptr.h
new file mode 100644
index 000000000000..c3c9797c67db
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_ptr.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
+#include "vmlinux.h"
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
+
+struct btf_ptr {
+ void *ptr;
+ __u32 type_id;
+ __u32 flags;
+};
+
+enum {
+ BTF_F_COMPACT = (1ULL << 0),
+ BTF_F_NONAME = (1ULL << 1),
+ BTF_F_PTR_RAW = (1ULL << 2),
+ BTF_F_ZERO = (1ULL << 3),
+};
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
new file mode 100644
index 000000000000..a0778fe7857a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_CG_STORAGE_MULTI_H
+#define __PROGS_CG_STORAGE_MULTI_H
+
+#include <asm/types.h>
+
+struct cgroup_value {
+ __u32 egress_pkts;
+ __u32 ingress_pkts;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
new file mode 100644
index 000000000000..44ad46b33539
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress")
+int egress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
new file mode 100644
index 000000000000..a25373002055
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
new file mode 100644
index 000000000000..a149f33bc533
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, __u64);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
new file mode 100644
index 000000000000..3f757e30d7a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+__u16 g_serv_port = 0;
+
+static inline void set_ip(__u32 *dst, const struct in6_addr *src)
+{
+ dst[0] = src->in6_u.u6_addr32[0];
+ dst[1] = src->in6_u.u6_addr32[1];
+ dst[2] = src->in6_u.u6_addr32[2];
+ dst[3] = src->in6_u.u6_addr32[3];
+}
+
+static inline void set_tuple(struct bpf_sock_tuple *tuple,
+ const struct ipv6hdr *ip6h,
+ const struct tcphdr *tcph)
+{
+ set_ip(tuple->ipv6.saddr, &ip6h->daddr);
+ set_ip(tuple->ipv6.daddr, &ip6h->saddr);
+ tuple->ipv6.sport = tcph->dest;
+ tuple->ipv6.dport = tcph->source;
+}
+
+static inline int is_allowed_peer_cg(struct __sk_buff *skb,
+ const struct ipv6hdr *ip6h,
+ const struct tcphdr *tcph)
+{
+ __u64 cgid, acgid, peer_cgid, peer_acgid;
+ struct bpf_sock_tuple tuple;
+ size_t tuple_len = sizeof(tuple.ipv6);
+ struct bpf_sock *peer_sk;
+
+ set_tuple(&tuple, ip6h, tcph);
+
+ peer_sk = bpf_sk_lookup_tcp(skb, &tuple, tuple_len,
+ BPF_F_CURRENT_NETNS, 0);
+ if (!peer_sk)
+ return 0;
+
+ cgid = bpf_skb_cgroup_id(skb);
+ peer_cgid = bpf_sk_cgroup_id(peer_sk);
+
+ acgid = bpf_skb_ancestor_cgroup_id(skb, 2);
+ peer_acgid = bpf_sk_ancestor_cgroup_id(peer_sk, 2);
+
+ bpf_sk_release(peer_sk);
+
+ return cgid && cgid == peer_cgid && acgid && acgid == peer_acgid;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress_lookup(struct __sk_buff *skb)
+{
+ __u32 serv_port_key = 0;
+ struct ipv6hdr ip6h;
+ struct tcphdr tcph;
+
+ if (skb->protocol != bpf_htons(ETH_P_IPV6))
+ return 1;
+
+ /* For SYN packets coming to listening socket skb->remote_port will be
+ * zero, so IPv6/TCP headers are loaded to identify remote peer
+ * instead.
+ */
+ if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h)))
+ return 1;
+
+ if (ip6h.nexthdr != IPPROTO_TCP)
+ return 1;
+
+ if (bpf_skb_load_bytes(skb, sizeof(ip6h), &tcph, sizeof(tcph)))
+ return 1;
+
+ if (!g_serv_port)
+ return 0;
+
+ if (tcph.dest != g_serv_port)
+ return 1;
+
+ return is_allowed_peer_cg(skb, &ip6h, &tcph);
+}
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index ad3c498a8150..a943d394fd3a 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -8,6 +8,9 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
@@ -16,6 +19,18 @@
#define DST_REWRITE_IP4 0x7f000001U
#define DST_REWRITE_PORT4 4444
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX 16
+#endif
+
+#ifndef TCP_NOTSENT_LOWAT
+#define TCP_NOTSENT_LOWAT 25
+#endif
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
int _version SEC("version") = 1;
__attribute__ ((noinline))
@@ -33,6 +48,102 @@ int do_bind(struct bpf_sock_addr *ctx)
return 1;
}
+static __inline int verify_cc(struct bpf_sock_addr *ctx,
+ char expected[TCP_CA_NAME_MAX])
+{
+ char buf[TCP_CA_NAME_MAX];
+ int i;
+
+ if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+ return 1;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (buf[i] != expected[i])
+ return 1;
+ if (buf[i] == 0)
+ break;
+ }
+
+ return 0;
+}
+
+static __inline int set_cc(struct bpf_sock_addr *ctx)
+{
+ char reno[TCP_CA_NAME_MAX] = "reno";
+ char cubic[TCP_CA_NAME_MAX] = "cubic";
+
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
+ return 1;
+ if (verify_cc(ctx, reno))
+ return 1;
+
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
+ return 1;
+ if (verify_cc(ctx, cubic))
+ return 1;
+
+ return 0;
+}
+
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+ char veth1[IFNAMSIZ] = "test_sock_addr1";
+ char veth2[IFNAMSIZ] = "test_sock_addr2";
+ char missing[IFNAMSIZ] = "nonexistent_dev";
+ char del_bind[IFNAMSIZ] = "";
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth1, sizeof(veth1)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth2, sizeof(veth2)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &missing, sizeof(missing)) != -ENODEV)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &del_bind, sizeof(del_bind)))
+ return 1;
+
+ return 0;
+}
+
+static __inline int set_keepalive(struct bpf_sock_addr *ctx)
+{
+ int zero = 0, one = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
+ return 1;
+ if (ctx->type == SOCK_STREAM) {
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
+ return 1;
+ }
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
+ return 1;
+
+ return 0;
+}
+
+static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
+{
+ int lowat = 65535;
+
+ if (ctx->type == SOCK_STREAM) {
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
+ return 1;
+ }
+
+ return 0;
+}
+
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
@@ -46,6 +157,16 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
+ /* Bind to device and unbind it. */
+ if (bind_to_device(ctx))
+ return 0;
+
+ if (set_keepalive(ctx))
+ return 0;
+
+ if (set_notsent_lowat(ctx))
+ return 0;
+
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
@@ -66,6 +187,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
bpf_sk_release(sk);
+ /* Rewrite congestion control. */
+ if (ctx->type == SOCK_STREAM && set_cc(ctx))
+ return 0;
+
/* Rewrite destination. */
ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
new file mode 100644
index 000000000000..7396308677a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+struct svc_addr {
+ __be32 addr;
+ __be16 port;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct svc_addr);
+} service_mapping SEC(".maps");
+
+SEC("cgroup/connect4")
+int connect4(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa = {};
+ struct svc_addr *orig;
+
+ /* Force local address to 127.0.0.1:22222. */
+ sa.sin_family = AF_INET;
+ sa.sin_port = bpf_htons(22222);
+ sa.sin_addr.s_addr = bpf_htonl(0x7f000001);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+
+ /* Rewire service 1.2.3.4:60000 to backend 127.0.0.1:60123. */
+ if (ctx->user_port == bpf_htons(60000)) {
+ orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!orig)
+ return 0;
+
+ orig->addr = ctx->user_ip4;
+ orig->port = ctx->user_port;
+
+ ctx->user_ip4 = bpf_htonl(0x7f000001);
+ ctx->user_port = bpf_htons(60123);
+ }
+ return 1;
+}
+
+SEC("cgroup/getsockname4")
+int getsockname4(struct bpf_sock_addr *ctx)
+{
+ /* Expose local server as 1.2.3.4:60000 to client. */
+ if (ctx->user_port == bpf_htons(60123)) {
+ ctx->user_ip4 = bpf_htonl(0x01020304);
+ ctx->user_port = bpf_htons(60000);
+ }
+ return 1;
+}
+
+SEC("cgroup/getpeername4")
+int getpeername4(struct bpf_sock_addr *ctx)
+{
+ struct svc_addr *orig;
+
+ /* Expose service 1.2.3.4:60000 as peer instead of backend. */
+ if (ctx->user_port == bpf_htons(60123)) {
+ orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
+ if (orig) {
+ ctx->user_ip4 = orig->addr;
+ ctx->user_port = orig->port;
+ }
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
new file mode 100644
index 000000000000..c1a2b555e9ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+struct svc_addr {
+ __be32 addr[4];
+ __be16 port;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct svc_addr);
+} service_mapping SEC(".maps");
+
+SEC("cgroup/connect6")
+int connect6(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in6 sa = {};
+ struct svc_addr *orig;
+
+ /* Force local address to [::1]:22223. */
+ sa.sin6_family = AF_INET6;
+ sa.sin6_port = bpf_htons(22223);
+ sa.sin6_addr.s6_addr32[3] = bpf_htonl(1);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+
+ /* Rewire service [fc00::1]:60000 to backend [::1]:60124. */
+ if (ctx->user_port == bpf_htons(60000)) {
+ orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!orig)
+ return 0;
+
+ orig->addr[0] = ctx->user_ip6[0];
+ orig->addr[1] = ctx->user_ip6[1];
+ orig->addr[2] = ctx->user_ip6[2];
+ orig->addr[3] = ctx->user_ip6[3];
+ orig->port = ctx->user_port;
+
+ ctx->user_ip6[0] = 0;
+ ctx->user_ip6[1] = 0;
+ ctx->user_ip6[2] = 0;
+ ctx->user_ip6[3] = bpf_htonl(1);
+ ctx->user_port = bpf_htons(60124);
+ }
+ return 1;
+}
+
+SEC("cgroup/getsockname6")
+int getsockname6(struct bpf_sock_addr *ctx)
+{
+ /* Expose local server as [fc00::1]:60000 to client. */
+ if (ctx->user_port == bpf_htons(60124)) {
+ ctx->user_ip6[0] = bpf_htonl(0xfc000000);
+ ctx->user_ip6[1] = 0;
+ ctx->user_ip6[2] = 0;
+ ctx->user_ip6[3] = bpf_htonl(1);
+ ctx->user_port = bpf_htons(60000);
+ }
+ return 1;
+}
+
+SEC("cgroup/getpeername6")
+int getpeername6(struct bpf_sock_addr *ctx)
+{
+ struct svc_addr *orig;
+
+ /* Expose service [fc00::1]:60000 as peer instead of backend. */
+ if (ctx->user_port == bpf_htons(60124)) {
+ orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
+ if (orig) {
+ ctx->user_ip6[0] = orig->addr[0];
+ ctx->user_ip6[1] = orig->addr[1];
+ ctx->user_ip6[2] = orig->addr[2];
+ ctx->user_ip6[3] = orig->addr[3];
+ ctx->user_port = orig->port;
+ }
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 6d598cfbdb3e..e6e616cb7bc9 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -1,5 +1,10 @@
#include <stdint.h>
#include <stdbool.h>
+
+void preserce_ptr_sz_fn(long x) {}
+
+#define __bpf_aligned __attribute__((aligned(8)))
+
/*
* KERNEL
*/
@@ -379,7 +384,7 @@ struct core_reloc_arrays___equiv_zero_sz_arr {
struct core_reloc_arrays_substruct c[3];
struct core_reloc_arrays_substruct d[1][2];
/* equivalent to flexible array */
- struct core_reloc_arrays_substruct f[0][2];
+ struct core_reloc_arrays_substruct f[][2];
};
struct core_reloc_arrays___fixed_arr {
@@ -444,51 +449,51 @@ struct core_reloc_primitives {
char a;
int b;
enum core_reloc_primitives_enum c;
- void *d;
- int (*f)(const char *);
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___diff_enum_def {
char a;
int b;
- void *d;
- int (*f)(const char *);
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
enum {
X = 100,
Y = 200,
- } c; /* inline enum def with differing set of values */
+ } c __bpf_aligned; /* inline enum def with differing set of values */
};
struct core_reloc_primitives___diff_func_proto {
- void (*f)(int); /* incompatible function prototype */
- void *d;
- enum core_reloc_primitives_enum c;
+ void (*f)(int) __bpf_aligned; /* incompatible function prototype */
+ void *d __bpf_aligned;
+ enum core_reloc_primitives_enum c __bpf_aligned;
int b;
char a;
};
struct core_reloc_primitives___diff_ptr_type {
- const char * const d; /* different pointee type + modifiers */
- char a;
+ const char * const d __bpf_aligned; /* different pointee type + modifiers */
+ char a __bpf_aligned;
int b;
enum core_reloc_primitives_enum c;
- int (*f)(const char *);
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___err_non_enum {
char a[1];
int b;
int c; /* int instead of enum */
- void *d;
- int (*f)(const char *);
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___err_non_int {
char a[1];
- int *b; /* ptr instead of int */
- enum core_reloc_primitives_enum c;
- void *d;
- int (*f)(const char *);
+ int *b __bpf_aligned; /* ptr instead of int */
+ enum core_reloc_primitives_enum c __bpf_aligned;
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___err_non_ptr {
@@ -496,7 +501,7 @@ struct core_reloc_primitives___err_non_ptr {
int b;
enum core_reloc_primitives_enum c;
int d; /* int instead of ptr */
- int (*f)(const char *);
+ int (*f)(const char *) __bpf_aligned;
};
/*
@@ -507,7 +512,7 @@ struct core_reloc_mods_output {
};
typedef const int int_t;
-typedef const char *char_ptr_t;
+typedef const char *char_ptr_t __bpf_aligned;
typedef const int arr_t[7];
struct core_reloc_mods_substruct {
@@ -523,9 +528,9 @@ typedef struct {
struct core_reloc_mods {
int a;
int_t b;
- char *c;
+ char *c __bpf_aligned;
char_ptr_t d;
- int e[3];
+ int e[3] __bpf_aligned;
arr_t f;
struct core_reloc_mods_substruct g;
core_reloc_mods_substruct_t h;
@@ -535,9 +540,9 @@ struct core_reloc_mods {
struct core_reloc_mods___mod_swap {
int b;
int_t a;
- char *d;
+ char *d __bpf_aligned;
char_ptr_t c;
- int f[3];
+ int f[3] __bpf_aligned;
arr_t e;
struct {
int y;
@@ -555,7 +560,7 @@ typedef arr1_t arr2_t;
typedef arr2_t arr3_t;
typedef arr3_t arr4_t;
-typedef const char * const volatile fancy_char_ptr_t;
+typedef const char * const volatile fancy_char_ptr_t __bpf_aligned;
typedef core_reloc_mods_substruct_t core_reloc_mods_substruct_tt;
@@ -567,7 +572,7 @@ struct core_reloc_mods___typedefs {
arr4_t e;
fancy_char_ptr_t d;
fancy_char_ptr_t c;
- int3_t b;
+ int3_t b __bpf_aligned;
int3_t a;
};
@@ -647,7 +652,7 @@ struct core_reloc_misc_extensible {
};
/*
- * EXISTENCE
+ * FIELD EXISTENCE
*/
struct core_reloc_existence_output {
int a_exists;
@@ -739,19 +744,19 @@ struct core_reloc_bitfields___bit_sz_change {
int8_t sb4: 1; /* 4 -> 1 */
int32_t sb20: 30; /* 20 -> 30 */
/* non-bitfields */
- uint16_t u32; /* 32 -> 16 */
- int64_t s32; /* 32 -> 64 */
+ uint16_t u32; /* 32 -> 16 */
+ int64_t s32 __bpf_aligned; /* 32 -> 64 */
};
/* turn bitfield into non-bitfield and vice versa */
struct core_reloc_bitfields___bitfield_vs_int {
uint64_t ub1; /* 3 -> 64 non-bitfield */
uint8_t ub2; /* 20 -> 8 non-bitfield */
- int64_t ub7; /* 7 -> 64 non-bitfield signed */
- int64_t sb4; /* 4 -> 64 non-bitfield signed */
- uint64_t sb20; /* 20 -> 16 non-bitfield unsigned */
- int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */
- uint64_t s32: 60; /* 32 non-bitfield -> 60 bitfield */
+ int64_t ub7 __bpf_aligned; /* 7 -> 64 non-bitfield signed */
+ int64_t sb4 __bpf_aligned; /* 4 -> 64 non-bitfield signed */
+ uint64_t sb20 __bpf_aligned; /* 20 -> 16 non-bitfield unsigned */
+ int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */
+ uint64_t s32: 60 __bpf_aligned; /* 32 non-bitfield -> 60 bitfield */
};
struct core_reloc_bitfields___just_big_enough {
@@ -804,3 +809,353 @@ struct core_reloc_size___diff_sz {
void *ptr_field;
enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
};
+
+/* Error case of two candidates with the fields (int_field) at the same
+ * offset, but with differing final relocation values: size 4 vs size 1
+ */
+struct core_reloc_size___err_ambiguous1 {
+ /* int at offset 0 */
+ int int_field;
+
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE___1 = 123 } enum_field;
+};
+
+struct core_reloc_size___err_ambiguous2 {
+ /* char at offset 0 */
+ char int_field;
+
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE___2 = 123 } enum_field;
+};
+
+/*
+ * TYPE EXISTENCE & SIZE
+ */
+struct core_reloc_type_based_output {
+ bool struct_exists;
+ bool union_exists;
+ bool enum_exists;
+ bool typedef_named_struct_exists;
+ bool typedef_anon_struct_exists;
+ bool typedef_struct_ptr_exists;
+ bool typedef_int_exists;
+ bool typedef_enum_exists;
+ bool typedef_void_ptr_exists;
+ bool typedef_func_proto_exists;
+ bool typedef_arr_exists;
+
+ int struct_sz;
+ int union_sz;
+ int enum_sz;
+ int typedef_named_struct_sz;
+ int typedef_anon_struct_sz;
+ int typedef_struct_ptr_sz;
+ int typedef_int_sz;
+ int typedef_enum_sz;
+ int typedef_void_ptr_sz;
+ int typedef_func_proto_sz;
+ int typedef_arr_sz;
+};
+
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+ int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based {
+ struct a_struct f1;
+ union a_union f2;
+ enum an_enum f3;
+ named_struct_typedef f4;
+ anon_struct_typedef f5;
+ struct_ptr_typedef f6;
+ int_typedef f7;
+ enum_typedef f8;
+ void_ptr_typedef f9;
+ func_proto_typedef f10;
+ arr_typedef f11;
+};
+
+/* no types in target */
+struct core_reloc_type_based___all_missing {
+};
+
+/* different type sizes, extra modifiers, anon vs named enums, etc */
+struct a_struct___diff_sz {
+ long x;
+ int y;
+ char z;
+};
+
+union a_union___diff_sz {
+ char yy;
+ char zz;
+};
+
+typedef struct a_struct___diff_sz named_struct_typedef___diff_sz;
+
+typedef struct { long xx, yy, zzz; } anon_struct_typedef___diff_sz;
+
+typedef struct {
+ char aa[1], bb[2], cc[3];
+} *struct_ptr_typedef___diff_sz;
+
+enum an_enum___diff_sz {
+ AN_ENUM_VAL1___diff_sz = 0x123412341234,
+ AN_ENUM_VAL2___diff_sz = 2,
+};
+
+typedef unsigned long int_typedef___diff_sz;
+
+typedef enum an_enum___diff_sz enum_typedef___diff_sz;
+
+typedef const void * const void_ptr_typedef___diff_sz;
+
+typedef int_typedef___diff_sz (*func_proto_typedef___diff_sz)(char);
+
+typedef int arr_typedef___diff_sz[2];
+
+struct core_reloc_type_based___diff_sz {
+ struct a_struct___diff_sz f1;
+ union a_union___diff_sz f2;
+ enum an_enum___diff_sz f3;
+ named_struct_typedef___diff_sz f4;
+ anon_struct_typedef___diff_sz f5;
+ struct_ptr_typedef___diff_sz f6;
+ int_typedef___diff_sz f7;
+ enum_typedef___diff_sz f8;
+ void_ptr_typedef___diff_sz f9;
+ func_proto_typedef___diff_sz f10;
+ arr_typedef___diff_sz f11;
+};
+
+/* incompatibilities between target and local types */
+union a_struct___incompat { /* union instead of struct */
+ int x;
+};
+
+struct a_union___incompat { /* struct instead of union */
+ int y;
+ int z;
+};
+
+/* typedef to union, not to struct */
+typedef union a_struct___incompat named_struct_typedef___incompat;
+
+/* typedef to void pointer, instead of struct */
+typedef void *anon_struct_typedef___incompat;
+
+/* extra pointer indirection */
+typedef struct {
+ int a, b, c;
+} **struct_ptr_typedef___incompat;
+
+/* typedef of a struct with int, instead of int */
+typedef struct { int x; } int_typedef___incompat;
+
+/* typedef to func_proto, instead of enum */
+typedef int (*enum_typedef___incompat)(void);
+
+/* pointer to char instead of void */
+typedef char *void_ptr_typedef___incompat;
+
+/* void return type instead of int */
+typedef void (*func_proto_typedef___incompat)(long);
+
+/* multi-dimensional array instead of a single-dimensional */
+typedef int arr_typedef___incompat[20][2];
+
+struct core_reloc_type_based___incompat {
+ union a_struct___incompat f1;
+ struct a_union___incompat f2;
+ /* the only valid one is enum, to check that something still succeeds */
+ enum an_enum f3;
+ named_struct_typedef___incompat f4;
+ anon_struct_typedef___incompat f5;
+ struct_ptr_typedef___incompat f6;
+ int_typedef___incompat f7;
+ enum_typedef___incompat f8;
+ void_ptr_typedef___incompat f9;
+ func_proto_typedef___incompat f10;
+ arr_typedef___incompat f11;
+};
+
+/* func_proto with incompatible signature */
+typedef void (*func_proto_typedef___fn_wrong_ret1)(long);
+typedef int * (*func_proto_typedef___fn_wrong_ret2)(long);
+typedef struct { int x; } int_struct_typedef;
+typedef int_struct_typedef (*func_proto_typedef___fn_wrong_ret3)(long);
+typedef int (*func_proto_typedef___fn_wrong_arg)(void *);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt1)(long, long);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt2)(void);
+
+struct core_reloc_type_based___fn_wrong_args {
+ /* one valid type to make sure relos still work */
+ struct a_struct f1;
+ func_proto_typedef___fn_wrong_ret1 f2;
+ func_proto_typedef___fn_wrong_ret2 f3;
+ func_proto_typedef___fn_wrong_ret3 f4;
+ func_proto_typedef___fn_wrong_arg f5;
+ func_proto_typedef___fn_wrong_arg_cnt1 f6;
+ func_proto_typedef___fn_wrong_arg_cnt2 f7;
+};
+
+/*
+ * TYPE ID MAPPING (LOCAL AND TARGET)
+ */
+struct core_reloc_type_id_output {
+ int local_anon_struct;
+ int local_anon_union;
+ int local_anon_enum;
+ int local_anon_func_proto_ptr;
+ int local_anon_void_ptr;
+ int local_anon_arr;
+
+ int local_struct;
+ int local_union;
+ int local_enum;
+ int local_int;
+ int local_struct_typedef;
+ int local_func_proto_typedef;
+ int local_arr_typedef;
+
+ int targ_struct;
+ int targ_union;
+ int targ_enum;
+ int targ_int;
+ int targ_struct_typedef;
+ int targ_func_proto_typedef;
+ int targ_arr_typedef;
+};
+
+struct core_reloc_type_id {
+ struct a_struct f1;
+ union a_union f2;
+ enum an_enum f3;
+ named_struct_typedef f4;
+ func_proto_typedef f5;
+ arr_typedef f6;
+};
+
+struct core_reloc_type_id___missing_targets {
+ /* nothing */
+};
+
+/*
+ * ENUMERATOR VALUE EXISTENCE AND VALUE RELOCATION
+ */
+struct core_reloc_enumval_output {
+ bool named_val1_exists;
+ bool named_val2_exists;
+ bool named_val3_exists;
+ bool anon_val1_exists;
+ bool anon_val2_exists;
+ bool anon_val3_exists;
+
+ int named_val1;
+ int named_val2;
+ int anon_val1;
+ int anon_val2;
+};
+
+enum named_enum {
+ NAMED_ENUM_VAL1 = 1,
+ NAMED_ENUM_VAL2 = 2,
+ NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1 = 0x10,
+ ANON_ENUM_VAL2 = 0x20,
+ ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval {
+ enum named_enum f1;
+ anon_enum f2;
+};
+
+/* differing enumerator values */
+enum named_enum___diff {
+ NAMED_ENUM_VAL1___diff = 101,
+ NAMED_ENUM_VAL2___diff = 202,
+ NAMED_ENUM_VAL3___diff = 303,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___diff = 0x11,
+ ANON_ENUM_VAL2___diff = 0x22,
+ ANON_ENUM_VAL3___diff = 0x33,
+} anon_enum___diff;
+
+struct core_reloc_enumval___diff {
+ enum named_enum___diff f1;
+ anon_enum___diff f2;
+};
+
+/* missing (optional) third enum value */
+enum named_enum___val3_missing {
+ NAMED_ENUM_VAL1___val3_missing = 111,
+ NAMED_ENUM_VAL2___val3_missing = 222,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___val3_missing = 0x111,
+ ANON_ENUM_VAL2___val3_missing = 0x222,
+} anon_enum___val3_missing;
+
+struct core_reloc_enumval___val3_missing {
+ enum named_enum___val3_missing f1;
+ anon_enum___val3_missing f2;
+};
+
+/* missing (mandatory) second enum value, should fail */
+enum named_enum___err_missing {
+ NAMED_ENUM_VAL1___err_missing = 1,
+ NAMED_ENUM_VAL3___err_missing = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___err_missing = 0x111,
+ ANON_ENUM_VAL3___err_missing = 0x222,
+} anon_enum___err_missing;
+
+struct core_reloc_enumval___err_missing {
+ enum named_enum___err_missing f1;
+ anon_enum___err_missing f2;
+};
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 9365b686f84b..5f645fdaba6f 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -55,3 +55,25 @@ int BPF_PROG(test6, __u64 a, void *b, short c, int d, void * e, __u64 f)
e == (void *)20 && f == 21;
return 0;
}
+
+struct bpf_fentry_test_t {
+ struct bpf_fentry_test_t *a;
+};
+
+__u64 test7_result = 0;
+SEC("fentry/bpf_fentry_test7")
+int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
+{
+ if (arg == 0)
+ test7_result = 1;
+ return 0;
+}
+
+__u64 test8_result = 0;
+SEC("fentry/bpf_fentry_test8")
+int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
+{
+ if (arg->a == 0)
+ test8_result = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 98e1efe14549..49a84a3a2306 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <linux/stddef.h>
+#include <linux/if_ether.h>
#include <linux/ipv6.h>
#include <linux/bpf.h>
+#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_tracing.h>
@@ -151,4 +153,29 @@ int new_get_constant(long val)
test_get_constant = 1;
return test_get_constant; /* original get_constant() returns val - 122 */
}
+
+__u64 test_pkt_write_access_subprog = 0;
+SEC("freplace/test_pkt_write_access_subprog")
+int new_test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct tcphdr *tcp;
+
+ if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+ return -1;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return -1;
+
+ /* make modifications to the packet data */
+ tcp->check++;
+ tcp->syn = 0;
+
+ test_pkt_write_access_subprog = 1;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index bd1e17d8024c..0952affb22a6 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -56,3 +56,25 @@ int BPF_PROG(test6, __u64 a, void *b, short c, int d, void *e, __u64 f, int ret)
e == (void *)20 && f == 21 && ret == 111;
return 0;
}
+
+struct bpf_fentry_test_t {
+ struct bpf_fentry_test *a;
+};
+
+__u64 test7_result = 0;
+SEC("fexit/bpf_fentry_test7")
+int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
+{
+ if (arg == 0)
+ test7_result = 1;
+ return 0;
+}
+
+__u64 test8_result = 0;
+SEC("fexit/bpf_fentry_test8")
+int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
+{
+ if (arg->a == 0)
+ test8_result = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c
new file mode 100644
index 000000000000..c8943ccee6c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+volatile __u64 test_fmod_ret = 0;
+SEC("fmod_ret/security_new_get_constant")
+int BPF_PROG(fmod_ret_test, long val, int ret)
+{
+ test_fmod_ret = 1;
+ return 120;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
new file mode 100644
index 000000000000..bb2a77c5b62b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define VAR_NUM 2
+
+struct hmap_elem {
+ struct bpf_spin_lock lock;
+ int var[VAR_NUM];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct hmap_elem);
+} hash_map SEC(".maps");
+
+SEC("freplace/handle_kprobe")
+int new_handle_kprobe(struct pt_regs *ctx)
+{
+ struct hmap_elem zero = {}, *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&hash_map, &key);
+ if (!val)
+ return 1;
+ /* spin_lock in hash map */
+ bpf_spin_lock(&val->lock);
+ val->var[0] = 99;
+ bpf_spin_unlock(&val->lock);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
new file mode 100644
index 000000000000..68a5a9db928a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_map_def SEC("maps") sock_map = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2,
+};
+
+SEC("freplace/cls_redirect")
+int freplace_cls_redirect_test(struct __sk_buff *skb)
+{
+ int ret = 0;
+ const int zero = 0;
+ struct bpf_sock *sk;
+
+ sk = bpf_map_lookup_elem(&sock_map, &zero);
+ if (!sk)
+ return TC_ACT_SHOT;
+
+ ret = bpf_map_update_elem(&sock_map, &zero, sk, 0);
+ bpf_sk_release(sk);
+
+ return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
new file mode 100644
index 000000000000..544e5ac90461
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/ipv6.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+SEC("freplace/connect_v4_prog")
+int new_connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ // return value thats in invalid range
+ return 255;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_get_constant.c b/tools/testing/selftests/bpf/progs/freplace_get_constant.c
new file mode 100644
index 000000000000..705e4b64dfc2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_get_constant.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+volatile __u64 test_get_constant = 0;
+SEC("freplace/get_constant")
+int security_new_get_constant(long val)
+{
+ if (val != 123)
+ return 0;
+ test_get_constant = 1;
+ return test_get_constant; /* original get_constant() returns val - 122 */
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/load_bytes_relative.c b/tools/testing/selftests/bpf/progs/load_bytes_relative.c
new file mode 100644
index 000000000000..dc1d04a7a3d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/load_bytes_relative.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} test_result SEC(".maps");
+
+SEC("cgroup_skb/egress")
+int load_bytes_relative(struct __sk_buff *skb)
+{
+ struct ethhdr eth;
+ struct iphdr iph;
+
+ __u32 map_key = 0;
+ __u32 test_passed = 0;
+
+ /* MAC header is not set by the time cgroup_skb/egress triggers */
+ if (bpf_skb_load_bytes_relative(skb, 0, &eth, sizeof(eth),
+ BPF_HDR_START_MAC) != -EFAULT)
+ goto fail;
+
+ if (bpf_skb_load_bytes_relative(skb, 0, &iph, sizeof(iph),
+ BPF_HDR_START_NET))
+ goto fail;
+
+ if (bpf_skb_load_bytes_relative(skb, 0xffff, &iph, sizeof(iph),
+ BPF_HDR_START_NET) != -EFAULT)
+ goto fail;
+
+ test_passed = 1;
+
+fail:
+ bpf_map_update_elem(&test_result, &map_key, &test_passed, BPF_ANY);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
new file mode 100644
index 000000000000..0758ba229ae0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define DUMMY_STORAGE_VALUE 0xdeadbeef
+
+int monitored_pid = 0;
+int inode_storage_result = -1;
+int sk_storage_result = -1;
+
+struct dummy_storage {
+ __u32 value;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct dummy_storage);
+} inode_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct dummy_storage);
+} sk_storage_map SEC(".maps");
+
+/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed.
+ */
+struct sock {} __attribute__((preserve_access_index));
+struct sockaddr {} __attribute__((preserve_access_index));
+struct socket {
+ struct sock *sk;
+} __attribute__((preserve_access_index));
+
+struct inode {} __attribute__((preserve_access_index));
+struct dentry {
+ struct inode *d_inode;
+} __attribute__((preserve_access_index));
+struct file {
+ struct inode *f_inode;
+} __attribute__((preserve_access_index));
+
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (storage->value == DUMMY_STORAGE_VALUE)
+ inode_storage_result = -1;
+
+ inode_storage_result =
+ bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+
+ return 0;
+}
+
+SEC("lsm/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (storage->value == DUMMY_STORAGE_VALUE)
+ sk_storage_result = -1;
+
+ sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+ return 0;
+}
+
+SEC("lsm/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ storage->value = DUMMY_STORAGE_VALUE;
+
+ return 0;
+}
+
+SEC("lsm/file_open")
+int BPF_PROG(file_open, struct file *file)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct dummy_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ if (!file->f_inode)
+ return 0;
+
+ storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ storage->value = DUMMY_STORAGE_VALUE;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index b4598d4bc4f7..ff4d343b94b5 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -9,6 +9,27 @@
#include <bpf/bpf_tracing.h>
#include <errno.h>
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} lru_hash SEC(".maps");
+
char _license[] SEC("license") = "GPL";
int monitored_pid = 0;
@@ -36,13 +57,54 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
return ret;
}
-SEC("lsm/bprm_committed_creds")
+SEC("lsm.s/bprm_committed_creds")
int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
+ char args[64];
+ __u32 key = 0;
+ __u64 *value;
if (monitored_pid == pid)
bprm_count++;
+ bpf_copy_from_user(args, sizeof(args), (void *)bprm->vma->vm_mm->arg_start);
+ bpf_copy_from_user(args, sizeof(args), (void *)bprm->mm->arg_start);
+
+ value = bpf_map_lookup_elem(&array, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&hash, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&lru_hash, &key);
+ if (value)
+ *value = 0;
+
+ return 0;
+}
+SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */
+int BPF_PROG(test_task_free, struct task_struct *task)
+{
+ return 0;
+}
+
+int copy_test = 0;
+
+SEC("fentry.s/__x64_sys_setdomainname")
+int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
+{
+ void *ptr = (void *)PT_REGS_PARM1(regs);
+ int len = PT_REGS_PARM2(regs);
+ int buf = 0;
+ long ret;
+
+ ret = bpf_copy_from_user(&buf, sizeof(buf), ptr);
+ if (len == -2 && ret == 0 && buf == 1234)
+ copy_test++;
+ if (len == -3 && ret == -EFAULT)
+ copy_test++;
+ if (len == -4 && ret == -EFAULT)
+ copy_test++;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
new file mode 100644
index 000000000000..c325405751e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -0,0 +1,694 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define LOOP_BOUND 0xf
+#define MAX_ENTRIES 8
+#define HALF_ENTRIES (MAX_ENTRIES >> 1)
+
+_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
+
+enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
+__u32 g_line = 0;
+
+#define VERIFY_TYPE(type, func) ({ \
+ g_map_type = type; \
+ if (!func()) \
+ return 0; \
+})
+
+
+#define VERIFY(expr) ({ \
+ g_line = __LINE__; \
+ if (!(expr)) \
+ return 0; \
+})
+
+struct bpf_map_memory {
+ __u32 pages;
+} __attribute__((preserve_access_index));
+
+struct bpf_map {
+ enum bpf_map_type map_type;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 id;
+ struct bpf_map_memory memory;
+} __attribute__((preserve_access_index));
+
+static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
+ __u32 value_size, __u32 max_entries)
+{
+ VERIFY(map->map_type == g_map_type);
+ VERIFY(map->key_size == key_size);
+ VERIFY(map->value_size == value_size);
+ VERIFY(map->max_entries == max_entries);
+ VERIFY(map->id > 0);
+ VERIFY(map->memory.pages > 0);
+
+ return 1;
+}
+
+static inline int check_bpf_map_ptr(struct bpf_map *indirect,
+ struct bpf_map *direct)
+{
+ VERIFY(indirect->map_type == direct->map_type);
+ VERIFY(indirect->key_size == direct->key_size);
+ VERIFY(indirect->value_size == direct->value_size);
+ VERIFY(indirect->max_entries == direct->max_entries);
+ VERIFY(indirect->id == direct->id);
+ VERIFY(indirect->memory.pages == direct->memory.pages);
+
+ return 1;
+}
+
+static inline int check(struct bpf_map *indirect, struct bpf_map *direct,
+ __u32 key_size, __u32 value_size, __u32 max_entries)
+{
+ VERIFY(check_bpf_map_ptr(indirect, direct));
+ VERIFY(check_bpf_map_fields(indirect, key_size, value_size,
+ max_entries));
+ return 1;
+}
+
+static inline int check_default(struct bpf_map *indirect,
+ struct bpf_map *direct)
+{
+ VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+ MAX_ENTRIES));
+ return 1;
+}
+
+static __noinline int
+check_default_noinline(struct bpf_map *indirect, struct bpf_map *direct)
+{
+ VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+ MAX_ENTRIES));
+ return 1;
+}
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+struct bpf_htab {
+ struct bpf_map map;
+ atomic_t count;
+ __u32 n_buckets;
+ __u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_hash SEC(".maps");
+
+static inline int check_hash(void)
+{
+ struct bpf_htab *hash = (struct bpf_htab *)&m_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_hash;
+ int i;
+
+ VERIFY(check_default_noinline(&hash->map, map));
+
+ VERIFY(hash->n_buckets == MAX_ENTRIES);
+ VERIFY(hash->elem_size == 64);
+
+ VERIFY(hash->count.counter == 0);
+ for (i = 0; i < HALF_ENTRIES; ++i) {
+ const __u32 key = i;
+ const __u32 val = 1;
+
+ if (bpf_map_update_elem(hash, &key, &val, 0))
+ return 0;
+ }
+ VERIFY(hash->count.counter == HALF_ENTRIES);
+
+ return 1;
+}
+
+struct bpf_array {
+ struct bpf_map map;
+ __u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_array SEC(".maps");
+
+static inline int check_array(void)
+{
+ struct bpf_array *array = (struct bpf_array *)&m_array;
+ struct bpf_map *map = (struct bpf_map *)&m_array;
+ int i, n_lookups = 0, n_keys = 0;
+
+ VERIFY(check_default(&array->map, map));
+
+ VERIFY(array->elem_size == 8);
+
+ for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) {
+ const __u32 key = i;
+ __u32 *val = bpf_map_lookup_elem(array, &key);
+
+ ++n_lookups;
+ if (val)
+ ++n_keys;
+ }
+
+ VERIFY(n_lookups == MAX_ENTRIES);
+ VERIFY(n_keys == MAX_ENTRIES);
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_prog_array SEC(".maps");
+
+static inline int check_prog_array(void)
+{
+ struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array;
+ struct bpf_map *map = (struct bpf_map *)&m_prog_array;
+
+ VERIFY(check_default(&prog_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_perf_event_array SEC(".maps");
+
+static inline int check_perf_event_array(void)
+{
+ struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array;
+ struct bpf_map *map = (struct bpf_map *)&m_perf_event_array;
+
+ VERIFY(check_default(&perf_event_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_percpu_hash SEC(".maps");
+
+static inline int check_percpu_hash(void)
+{
+ struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_hash;
+
+ VERIFY(check_default(&percpu_hash->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_percpu_array SEC(".maps");
+
+static inline int check_percpu_array(void)
+{
+ struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_array;
+
+ VERIFY(check_default(&percpu_array->map, map));
+
+ return 1;
+}
+
+struct bpf_stack_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u64);
+} m_stack_trace SEC(".maps");
+
+static inline int check_stack_trace(void)
+{
+ struct bpf_stack_map *stack_trace =
+ (struct bpf_stack_map *)&m_stack_trace;
+ struct bpf_map *map = (struct bpf_map *)&m_stack_trace;
+
+ VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64),
+ MAX_ENTRIES));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_cgroup_array SEC(".maps");
+
+static inline int check_cgroup_array(void)
+{
+ struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array;
+ struct bpf_map *map = (struct bpf_map *)&m_cgroup_array;
+
+ VERIFY(check_default(&cgroup_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_lru_hash SEC(".maps");
+
+static inline int check_lru_hash(void)
+{
+ struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_lru_hash;
+
+ VERIFY(check_default(&lru_hash->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_lru_percpu_hash SEC(".maps");
+
+static inline int check_lru_percpu_hash(void)
+{
+ struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash;
+
+ VERIFY(check_default(&lru_percpu_hash->map, map));
+
+ return 1;
+}
+
+struct lpm_trie {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct lpm_key {
+ struct bpf_lpm_trie_key trie_key;
+ __u32 data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, struct lpm_key);
+ __type(value, __u32);
+} m_lpm_trie SEC(".maps");
+
+static inline int check_lpm_trie(void)
+{
+ struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie;
+ struct bpf_map *map = (struct bpf_map *)&m_lpm_trie;
+
+ VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32),
+ MAX_ENTRIES));
+
+ return 1;
+}
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+ });
+} m_array_of_maps SEC(".maps") = {
+ .values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+
+static inline int check_array_of_maps(void)
+{
+ struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps;
+ struct bpf_map *map = (struct bpf_map *)&m_array_of_maps;
+
+ VERIFY(check_default(&array_of_maps->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+ __array(values, struct inner_map);
+} m_hash_of_maps SEC(".maps") = {
+ .values = {
+ [2] = &inner_map,
+ },
+};
+
+static inline int check_hash_of_maps(void)
+{
+ struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps;
+ struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps;
+
+ VERIFY(check_default(&hash_of_maps->map, map));
+
+ return 1;
+}
+
+struct bpf_dtab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_devmap SEC(".maps");
+
+static inline int check_devmap(void)
+{
+ struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap;
+ struct bpf_map *map = (struct bpf_map *)&m_devmap;
+
+ VERIFY(check_default(&devmap->map, map));
+
+ return 1;
+}
+
+struct bpf_stab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sockmap SEC(".maps");
+
+static inline int check_sockmap(void)
+{
+ struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap;
+ struct bpf_map *map = (struct bpf_map *)&m_sockmap;
+
+ VERIFY(check_default(&sockmap->map, map));
+
+ return 1;
+}
+
+struct bpf_cpu_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_cpumap SEC(".maps");
+
+static inline int check_cpumap(void)
+{
+ struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap;
+ struct bpf_map *map = (struct bpf_map *)&m_cpumap;
+
+ VERIFY(check_default(&cpumap->map, map));
+
+ return 1;
+}
+
+struct xsk_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_xskmap SEC(".maps");
+
+static inline int check_xskmap(void)
+{
+ struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap;
+ struct bpf_map *map = (struct bpf_map *)&m_xskmap;
+
+ VERIFY(check_default(&xskmap->map, map));
+
+ return 1;
+}
+
+struct bpf_shtab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sockhash SEC(".maps");
+
+static inline int check_sockhash(void)
+{
+ struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash;
+ struct bpf_map *map = (struct bpf_map *)&m_sockhash;
+
+ VERIFY(check_default(&sockhash->map, map));
+
+ return 1;
+}
+
+struct bpf_cgroup_storage_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+} m_cgroup_storage SEC(".maps");
+
+static inline int check_cgroup_storage(void)
+{
+ struct bpf_cgroup_storage_map *cgroup_storage =
+ (struct bpf_cgroup_storage_map *)&m_cgroup_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage;
+
+ VERIFY(check(&cgroup_storage->map, map,
+ sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct reuseport_array {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_reuseport_sockarray SEC(".maps");
+
+static inline int check_reuseport_sockarray(void)
+{
+ struct reuseport_array *reuseport_sockarray =
+ (struct reuseport_array *)&m_reuseport_sockarray;
+ struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray;
+
+ VERIFY(check_default(&reuseport_sockarray->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+} m_percpu_cgroup_storage SEC(".maps");
+
+static inline int check_percpu_cgroup_storage(void)
+{
+ struct bpf_cgroup_storage_map *percpu_cgroup_storage =
+ (struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage;
+
+ VERIFY(check(&percpu_cgroup_storage->map, map,
+ sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct bpf_queue_stack {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(value, __u32);
+} m_queue SEC(".maps");
+
+static inline int check_queue(void)
+{
+ struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue;
+ struct bpf_map *map = (struct bpf_map *)&m_queue;
+
+ VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(value, __u32);
+} m_stack SEC(".maps");
+
+static inline int check_stack(void)
+{
+ struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack;
+ struct bpf_map *map = (struct bpf_map *)&m_stack;
+
+ VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+ return 1;
+}
+
+struct bpf_local_storage_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sk_storage SEC(".maps");
+
+static inline int check_sk_storage(void)
+{
+ struct bpf_local_storage_map *sk_storage =
+ (struct bpf_local_storage_map *)&m_sk_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
+
+ VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_devmap_hash SEC(".maps");
+
+static inline int check_devmap_hash(void)
+{
+ struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_devmap_hash;
+
+ VERIFY(check_default(&devmap_hash->map, map));
+
+ return 1;
+}
+
+struct bpf_ringbuf_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} m_ringbuf SEC(".maps");
+
+static inline int check_ringbuf(void)
+{
+ struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
+ struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
+
+ VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress")
+int cg_skb(void *ctx)
+{
+ VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace);
+ VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie);
+ VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps);
+ VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps);
+ VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap);
+ VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash);
+ VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ check_reuseport_sockarray);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ check_percpu_cgroup_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue);
+ VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack);
+ VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf);
+
+ return 1;
+}
+
+__u32 _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/metadata_unused.c b/tools/testing/selftests/bpf/progs/metadata_unused.c
new file mode 100644
index 000000000000..672a0d19f8d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_unused.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "foo";
+volatile const int bpf_metadata_b SEC(".rodata") = 1;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/metadata_used.c b/tools/testing/selftests/bpf/progs/metadata_used.c
new file mode 100644
index 000000000000..b7198e65383d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_used.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "bar";
+volatile const int bpf_metadata_b SEC(".rodata") = 2;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+ return bpf_metadata_b ? 1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
new file mode 100644
index 000000000000..6b670039ea67
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include "btf_ptr.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+long ret = 0;
+int num_subtests = 0;
+int ran_subtests = 0;
+bool skip = false;
+
+#define STRSIZE 2048
+#define EXPECTED_STRSIZE 256
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, char[STRSIZE]);
+} strdata SEC(".maps");
+
+static int __strncmp(const void *m1, const void *m2, size_t len)
+{
+ const unsigned char *s1 = m1;
+ const unsigned char *s2 = m2;
+ int i, delta = 0;
+
+ for (i = 0; i < len; i++) {
+ delta = s1[i] - s2[i];
+ if (delta || s1[i] == 0 || s2[i] == 0)
+ break;
+ }
+ return delta;
+}
+
+#if __has_builtin(__builtin_btf_type_id)
+#define TEST_BTF(_str, _type, _flags, _expected, ...) \
+ do { \
+ static const char _expectedval[EXPECTED_STRSIZE] = \
+ _expected; \
+ static const char _ptrtype[64] = #_type; \
+ __u64 _hflags = _flags | BTF_F_COMPACT; \
+ static _type _ptrdata = __VA_ARGS__; \
+ static struct btf_ptr _ptr = { }; \
+ int _cmp; \
+ \
+ ++num_subtests; \
+ if (ret < 0) \
+ break; \
+ ++ran_subtests; \
+ _ptr.ptr = &_ptrdata; \
+ _ptr.type_id = bpf_core_type_id_kernel(_type); \
+ if (_ptr.type_id <= 0) { \
+ ret = -EINVAL; \
+ break; \
+ } \
+ ret = bpf_snprintf_btf(_str, STRSIZE, \
+ &_ptr, sizeof(_ptr), _hflags); \
+ if (ret) \
+ break; \
+ _cmp = __strncmp(_str, _expectedval, EXPECTED_STRSIZE); \
+ if (_cmp != 0) { \
+ bpf_printk("(%d) got %s", _cmp, _str); \
+ bpf_printk("(%d) expected %s", _cmp, \
+ _expectedval); \
+ ret = -EBADMSG; \
+ break; \
+ } \
+ } while (0)
+#endif
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_C(_str, _type, _flags, ...) \
+ TEST_BTF(_str, _type, _flags, "(" #_type ")" #__VA_ARGS__, \
+ __VA_ARGS__)
+
+/* TRACE_EVENT(netif_receive_skb,
+ * TP_PROTO(struct sk_buff *skb),
+ */
+SEC("tp_btf/netif_receive_skb")
+int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
+{
+ static __u64 flags[] = { 0, BTF_F_COMPACT, BTF_F_ZERO, BTF_F_PTR_RAW,
+ BTF_F_NONAME, BTF_F_COMPACT | BTF_F_ZERO |
+ BTF_F_PTR_RAW | BTF_F_NONAME };
+ static struct btf_ptr p = { };
+ __u32 key = 0;
+ int i, __ret;
+ char *str;
+
+#if __has_builtin(__builtin_btf_type_id)
+ str = bpf_map_lookup_elem(&strdata, &key);
+ if (!str)
+ return 0;
+
+ /* Ensure we can write skb string representation */
+ p.type_id = bpf_core_type_id_kernel(struct sk_buff);
+ p.ptr = skb;
+ for (i = 0; i < ARRAY_SIZE(flags); i++) {
+ ++num_subtests;
+ ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+ if (ret < 0)
+ bpf_printk("returned %d when writing skb", ret);
+ ++ran_subtests;
+ }
+
+ /* Check invalid ptr value */
+ p.ptr = 0;
+ __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+ if (__ret >= 0) {
+ bpf_printk("printing NULL should generate error, got (%d)",
+ __ret);
+ ret = -ERANGE;
+ }
+
+ /* Verify type display for various types. */
+
+ /* simple int */
+ TEST_BTF_C(str, int, 0, 1234);
+ TEST_BTF(str, int, BTF_F_NONAME, "1234", 1234);
+ /* zero value should be printed at toplevel */
+ TEST_BTF(str, int, 0, "(int)0", 0);
+ TEST_BTF(str, int, BTF_F_NONAME, "0", 0);
+ TEST_BTF(str, int, BTF_F_ZERO, "(int)0", 0);
+ TEST_BTF(str, int, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+ TEST_BTF_C(str, int, 0, -4567);
+ TEST_BTF(str, int, BTF_F_NONAME, "-4567", -4567);
+
+ /* simple char */
+ TEST_BTF_C(str, char, 0, 100);
+ TEST_BTF(str, char, BTF_F_NONAME, "100", 100);
+ /* zero value should be printed at toplevel */
+ TEST_BTF(str, char, 0, "(char)0", 0);
+ TEST_BTF(str, char, BTF_F_NONAME, "0", 0);
+ TEST_BTF(str, char, BTF_F_ZERO, "(char)0", 0);
+ TEST_BTF(str, char, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+
+ /* simple typedef */
+ TEST_BTF_C(str, uint64_t, 0, 100);
+ TEST_BTF(str, u64, BTF_F_NONAME, "1", 1);
+ /* zero value should be printed at toplevel */
+ TEST_BTF(str, u64, 0, "(u64)0", 0);
+ TEST_BTF(str, u64, BTF_F_NONAME, "0", 0);
+ TEST_BTF(str, u64, BTF_F_ZERO, "(u64)0", 0);
+ TEST_BTF(str, u64, BTF_F_NONAME|BTF_F_ZERO, "0", 0);
+
+ /* typedef struct */
+ TEST_BTF_C(str, atomic_t, 0, {.counter = (int)1,});
+ TEST_BTF(str, atomic_t, BTF_F_NONAME, "{1,}", {.counter = 1,});
+ /* typedef with 0 value should be printed at toplevel */
+ TEST_BTF(str, atomic_t, 0, "(atomic_t){}", {.counter = 0,});
+ TEST_BTF(str, atomic_t, BTF_F_NONAME, "{}", {.counter = 0,});
+ TEST_BTF(str, atomic_t, BTF_F_ZERO, "(atomic_t){.counter = (int)0,}",
+ {.counter = 0,});
+ TEST_BTF(str, atomic_t, BTF_F_NONAME|BTF_F_ZERO,
+ "{0,}", {.counter = 0,});
+
+ /* enum where enum value does (and does not) exist */
+ TEST_BTF_C(str, enum bpf_cmd, 0, BPF_MAP_CREATE);
+ TEST_BTF(str, enum bpf_cmd, 0, "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+ "BPF_MAP_CREATE", 0);
+
+ TEST_BTF(str, enum bpf_cmd, BTF_F_ZERO, "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+ "BPF_MAP_CREATE", BPF_MAP_CREATE);
+ TEST_BTF_C(str, enum bpf_cmd, 0, 2000);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "2000", 2000);
+
+ /* simple struct */
+ TEST_BTF_C(str, struct btf_enum, 0,
+ {.name_off = (__u32)3,.val = (__s32)-1,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{3,-1,}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{-1,}",
+ { .name_off = 0, .val = -1,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME|BTF_F_ZERO, "{0,-1,}",
+ { .name_off = 0, .val = -1,});
+ /* empty struct should be printed */
+ TEST_BTF(str, struct btf_enum, 0, "(struct btf_enum){}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF(str, struct btf_enum, BTF_F_ZERO,
+ "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+ { .name_off = 0, .val = 0,});
+
+ /* struct with pointers */
+ TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+ "(struct list_head){.next = (struct list_head *)0x0000000000000001,}",
+ { .next = (struct list_head *)1 });
+ /* NULL pointer should not be displayed */
+ TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+ "(struct list_head){}",
+ { .next = (struct list_head *)0 });
+
+ /* struct with char array */
+ TEST_BTF(str, struct bpf_prog_info, 0,
+ "(struct bpf_prog_info){.name = (char[])['f','o','o',],}",
+ { .name = "foo",});
+ TEST_BTF(str, struct bpf_prog_info, BTF_F_NONAME,
+ "{['f','o','o',],}",
+ {.name = "foo",});
+ /* leading null char means do not display string */
+ TEST_BTF(str, struct bpf_prog_info, 0,
+ "(struct bpf_prog_info){}",
+ {.name = {'\0', 'f', 'o', 'o'}});
+ /* handle non-printable characters */
+ TEST_BTF(str, struct bpf_prog_info, 0,
+ "(struct bpf_prog_info){.name = (char[])[1,2,3,],}",
+ { .name = {1, 2, 3, 0}});
+
+ /* struct with non-char array */
+ TEST_BTF(str, struct __sk_buff, 0,
+ "(struct __sk_buff){.cb = (__u32[])[1,2,3,4,5,],}",
+ { .cb = {1, 2, 3, 4, 5,},});
+ TEST_BTF(str, struct __sk_buff, BTF_F_NONAME,
+ "{[1,2,3,4,5,],}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ /* For non-char, arrays, show non-zero values only */
+ TEST_BTF(str, struct __sk_buff, 0,
+ "(struct __sk_buff){.cb = (__u32[])[1,],}",
+ { .cb = { 0, 0, 1, 0, 0},});
+
+ /* struct with bitfields */
+ TEST_BTF_C(str, struct bpf_insn, 0,
+ {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+ TEST_BTF(str, struct bpf_insn, BTF_F_NONAME, "{1,0x2,0x3,4,5,}",
+ {.code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+ .imm = 5,});
+#else
+ skip = true;
+#endif
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
new file mode 100644
index 000000000000..25467d13c356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 16384);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(stack_trace_t));
+} stackmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackdata_map SEC(".maps");
+
+long stackid_kernel = 1;
+long stackid_user = 1;
+long stack_kernel = 1;
+long stack_user = 1;
+
+SEC("perf_event")
+int oncpu(void *ctx)
+{
+ stack_trace_t *trace;
+ __u32 key = 0;
+ long val;
+
+ val = bpf_get_stackid(ctx, &stackmap, 0);
+ if (val > 0)
+ stackid_kernel = 2;
+ val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+ if (val > 0)
+ stackid_user = 2;
+
+ trace = bpf_map_lookup_elem(&stackdata_map, &key);
+ if (!trace)
+ return 0;
+
+ val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), 0);
+ if (val > 0)
+ stack_kernel = 2;
+
+ val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), BPF_F_USER_STACK);
+ if (val > 0)
+ stack_user = 2;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
new file mode 100644
index 000000000000..e5ab4836a641
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(value_size, sizeof(int));
+ __uint(key_size, sizeof(int));
+} perfbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_perfbuf(void *ctx)
+{
+ __u64 *sample;
+ int i;
+
+ for (i = 0; i < batch_cnt; i++) {
+ if (bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU,
+ &sample_val, sizeof(sample_val)))
+ __sync_add_and_fetch(&dropped, 1);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h
new file mode 100644
index 000000000000..3bac4fdd4bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler.h
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#pragma once
+
+#define TASK_COMM_LEN 16
+#define MAX_ANCESTORS 4
+#define MAX_PATH 256
+#define KILL_TARGET_LEN 64
+#define CTL_MAXNAME 10
+#define MAX_ARGS_LEN 4096
+#define MAX_FILENAME_LEN 512
+#define MAX_ENVIRON_LEN 8192
+#define MAX_PATH_DEPTH 32
+#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH)
+#define MAX_CGROUPS_PATH_DEPTH 8
+
+#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN
+
+#define MAX_CGROUP_PAYLOAD_LEN \
+ (MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH))
+
+#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
+
+#define MAX_SYSCTL_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH)
+
+#define MAX_KILL_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \
+ KILL_TARGET_LEN)
+
+#define MAX_EXEC_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \
+ MAX_ARGS_LEN + MAX_ENVIRON_LEN)
+
+#define MAX_FILEMOD_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \
+ MAX_FILEPATH_LENGTH)
+
+enum data_type {
+ INVALID_EVENT,
+ EXEC_EVENT,
+ FORK_EVENT,
+ KILL_EVENT,
+ SYSCTL_EVENT,
+ FILEMOD_EVENT,
+ MAX_DATA_TYPE_EVENT
+};
+
+enum filemod_type {
+ FMOD_OPEN,
+ FMOD_LINK,
+ FMOD_SYMLINK,
+};
+
+struct ancestors_data_t {
+ pid_t ancestor_pids[MAX_ANCESTORS];
+ uint32_t ancestor_exec_ids[MAX_ANCESTORS];
+ uint64_t ancestor_start_times[MAX_ANCESTORS];
+ uint32_t num_ancestors;
+};
+
+struct var_metadata_t {
+ enum data_type type;
+ pid_t pid;
+ uint32_t exec_id;
+ uid_t uid;
+ gid_t gid;
+ uint64_t start_time;
+ uint32_t cpu_id;
+ uint64_t bpf_stats_num_perf_events;
+ uint64_t bpf_stats_start_ktime_ns;
+ uint8_t comm_length;
+};
+
+struct cgroup_data_t {
+ ino_t cgroup_root_inode;
+ ino_t cgroup_proc_inode;
+ uint64_t cgroup_root_mtime;
+ uint64_t cgroup_proc_mtime;
+ uint16_t cgroup_root_length;
+ uint16_t cgroup_proc_length;
+ uint16_t cgroup_full_length;
+ int cgroup_full_path_root_pos;
+};
+
+struct var_sysctl_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ struct ancestors_data_t ancestors_info;
+ uint8_t sysctl_val_length;
+ uint16_t sysctl_path_length;
+ char payload[MAX_SYSCTL_PAYLOAD_LEN];
+};
+
+struct var_kill_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ struct ancestors_data_t ancestors_info;
+ pid_t kill_target_pid;
+ int kill_sig;
+ uint32_t kill_count;
+ uint64_t last_kill_time;
+ uint8_t kill_target_name_length;
+ uint8_t kill_target_cgroup_proc_length;
+ char payload[MAX_KILL_PAYLOAD_LEN];
+ size_t payload_length;
+};
+
+struct var_exec_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ pid_t parent_pid;
+ uint32_t parent_exec_id;
+ uid_t parent_uid;
+ uint64_t parent_start_time;
+ uint16_t bin_path_length;
+ uint16_t cmdline_length;
+ uint16_t environment_length;
+ char payload[MAX_EXEC_PAYLOAD_LEN];
+};
+
+struct var_fork_data_t {
+ struct var_metadata_t meta;
+ pid_t parent_pid;
+ uint32_t parent_exec_id;
+ uint64_t parent_start_time;
+ char payload[MAX_METADATA_PAYLOAD_LEN];
+};
+
+struct var_filemod_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ enum filemod_type fmod_type;
+ unsigned int dst_flags;
+ uint32_t src_device_id;
+ uint32_t dst_device_id;
+ ino_t src_inode;
+ ino_t dst_inode;
+ uint16_t src_filepath_length;
+ uint16_t dst_filepath_length;
+ char payload[MAX_FILEMOD_PAYLOAD_LEN];
+};
+
+struct profiler_config_struct {
+ bool fetch_cgroups_from_bpf;
+ ino_t cgroup_fs_inode;
+ ino_t cgroup_login_session_inode;
+ uint64_t kill_signals_mask;
+ ino_t inode_filter;
+ uint32_t stale_info_secs;
+ bool use_variable_buffers;
+ bool read_environ_from_exec;
+ bool enable_cgroup_v1_resolver;
+};
+
+struct bpf_func_stats_data {
+ uint64_t time_elapsed_ns;
+ uint64_t num_executions;
+ uint64_t num_perf_events;
+};
+
+struct bpf_func_stats_ctx {
+ uint64_t start_time_ns;
+ struct bpf_func_stats_data* bpf_func_stats_data_val;
+};
+
+enum bpf_function_id {
+ profiler_bpf_proc_sys_write,
+ profiler_bpf_sched_process_exec,
+ profiler_bpf_sched_process_exit,
+ profiler_bpf_sys_enter_kill,
+ profiler_bpf_do_filp_open_ret,
+ profiler_bpf_sched_process_fork,
+ profiler_bpf_vfs_link,
+ profiler_bpf_vfs_symlink,
+ profiler_bpf_max_function_id
+};
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
new file mode 100644
index 000000000000..00578311a423
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -0,0 +1,969 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "profiler.h"
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define O_WRONLY 00000001
+#define O_RDWR 00000002
+#define O_DIRECTORY 00200000
+#define __O_TMPFILE 020000000
+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+#define MAX_ERRNO 4095
+#define S_IFMT 00170000
+#define S_IFSOCK 0140000
+#define S_IFLNK 0120000
+#define S_IFREG 0100000
+#define S_IFBLK 0060000
+#define S_IFDIR 0040000
+#define S_IFCHR 0020000
+#define S_IFIFO 0010000
+#define S_ISUID 0004000
+#define S_ISGID 0002000
+#define S_ISVTX 0001000
+#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
+#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
+#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
+#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
+#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
+#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
+#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
+
+#define KILL_DATA_ARRAY_SIZE 8
+
+struct var_kill_data_arr_t {
+ struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
+};
+
+union any_profiler_data_t {
+ struct var_exec_data_t var_exec;
+ struct var_kill_data_t var_kill;
+ struct var_sysctl_data_t var_sysctl;
+ struct var_filemod_data_t var_filemod;
+ struct var_fork_data_t var_fork;
+ struct var_kill_data_arr_t var_kill_data_arr;
+};
+
+volatile struct profiler_config_struct bpf_config = {};
+
+#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
+#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
+#define CGROUP_LOGIN_SESSION_INODE \
+ (bpf_config.cgroup_login_session_inode)
+#define KILL_SIGNALS (bpf_config.kill_signals_mask)
+#define STALE_INFO (bpf_config.stale_info_secs)
+#define INODE_FILTER (bpf_config.inode_filter)
+#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
+#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
+
+struct kernfs_iattrs___52 {
+ struct iattr ia_iattr;
+};
+
+struct kernfs_node___52 {
+ union /* kernfs_node_id */ {
+ struct {
+ u32 ino;
+ u32 generation;
+ };
+ u64 id;
+ } id;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, union any_profiler_data_t);
+} data_heap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, KILL_DATA_ARRAY_SIZE);
+ __type(key, u32);
+ __type(value, struct var_kill_data_arr_t);
+} var_tpid_to_data SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, profiler_bpf_max_function_id);
+ __type(key, u32);
+ __type(value, struct bpf_func_stats_data);
+} bpf_func_stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, bool);
+ __uint(max_entries, 16);
+} allowed_devices SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, bool);
+ __uint(max_entries, 1024);
+} allowed_file_inodes SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, bool);
+ __uint(max_entries, 1024);
+} allowed_directory_inodes SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, bool);
+ __uint(max_entries, 16);
+} disallowed_exec_inodes SEC(".maps");
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+#endif
+
+static INLINE bool IS_ERR(const void* ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static INLINE u32 get_userspace_pid()
+{
+ return bpf_get_current_pid_tgid() >> 32;
+}
+
+static INLINE bool is_init_process(u32 tgid)
+{
+ return tgid == 1 || tgid == 0;
+}
+
+static INLINE unsigned long
+probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
+{
+ len = len < max ? len : max;
+ if (len > 1) {
+ if (bpf_probe_read(dst, len, src))
+ return 0;
+ } else if (len == 1) {
+ if (bpf_probe_read(dst, 1, src))
+ return 0;
+ }
+ return len;
+}
+
+static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
+ int spid)
+{
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
+ if (arr_struct->array[i].meta.pid == spid)
+ return i;
+ return -1;
+}
+
+static INLINE void populate_ancestors(struct task_struct* task,
+ struct ancestors_data_t* ancestors_data)
+{
+ struct task_struct* parent = task;
+ u32 num_ancestors, ppid;
+
+ ancestors_data->num_ancestors = 0;
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
+ parent = BPF_CORE_READ(parent, real_parent);
+ if (parent == NULL)
+ break;
+ ppid = BPF_CORE_READ(parent, tgid);
+ if (is_init_process(ppid))
+ break;
+ ancestors_data->ancestor_pids[num_ancestors] = ppid;
+ ancestors_data->ancestor_exec_ids[num_ancestors] =
+ BPF_CORE_READ(parent, self_exec_id);
+ ancestors_data->ancestor_start_times[num_ancestors] =
+ BPF_CORE_READ(parent, start_time);
+ ancestors_data->num_ancestors = num_ancestors;
+ }
+}
+
+static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
+ struct kernfs_node* cgroup_root_node,
+ void* payload,
+ int* root_pos)
+{
+ void* payload_start = payload;
+ size_t filepart_length;
+
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
+ filepart_length =
+ bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
+ if (!cgroup_node)
+ return payload;
+ if (cgroup_node == cgroup_root_node)
+ *root_pos = payload - payload_start;
+ if (filepart_length <= MAX_PATH) {
+ barrier_var(filepart_length);
+ payload += filepart_length;
+ }
+ cgroup_node = BPF_CORE_READ(cgroup_node, parent);
+ }
+ return payload;
+}
+
+static ino_t get_inode_from_kernfs(struct kernfs_node* node)
+{
+ struct kernfs_node___52* node52 = (void*)node;
+
+ if (bpf_core_field_exists(node52->id.ino)) {
+ barrier_var(node52);
+ return BPF_CORE_READ(node52, id.ino);
+ } else {
+ barrier_var(node);
+ return (u64)BPF_CORE_READ(node, id);
+ }
+}
+
+int pids_cgrp_id = 1;
+
+static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
+ struct task_struct* task,
+ void* payload)
+{
+ struct kernfs_node* root_kernfs =
+ BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
+ struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
+
+ if (ENABLE_CGROUP_V1_RESOLVER) {
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys_state* subsys =
+ BPF_CORE_READ(task, cgroups, subsys[i]);
+ if (subsys != NULL) {
+ int subsys_id = BPF_CORE_READ(subsys, ss, id);
+ if (subsys_id == pids_cgrp_id) {
+ proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
+ root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
+ break;
+ }
+ }
+ }
+ }
+
+ cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
+ cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
+
+ if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
+ cgroup_data->cgroup_root_mtime =
+ BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
+ cgroup_data->cgroup_proc_mtime =
+ BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
+ } else {
+ struct kernfs_iattrs___52* root_iattr =
+ (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
+ cgroup_data->cgroup_root_mtime =
+ BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
+
+ struct kernfs_iattrs___52* proc_iattr =
+ (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
+ cgroup_data->cgroup_proc_mtime =
+ BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
+ }
+
+ cgroup_data->cgroup_root_length = 0;
+ cgroup_data->cgroup_proc_length = 0;
+ cgroup_data->cgroup_full_length = 0;
+
+ size_t cgroup_root_length =
+ bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
+ barrier_var(cgroup_root_length);
+ if (cgroup_root_length <= MAX_PATH) {
+ barrier_var(cgroup_root_length);
+ cgroup_data->cgroup_root_length = cgroup_root_length;
+ payload += cgroup_root_length;
+ }
+
+ size_t cgroup_proc_length =
+ bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
+ barrier_var(cgroup_proc_length);
+ if (cgroup_proc_length <= MAX_PATH) {
+ barrier_var(cgroup_proc_length);
+ cgroup_data->cgroup_proc_length = cgroup_proc_length;
+ payload += cgroup_proc_length;
+ }
+
+ if (FETCH_CGROUPS_FROM_BPF) {
+ cgroup_data->cgroup_full_path_root_pos = -1;
+ void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
+ &cgroup_data->cgroup_full_path_root_pos);
+ cgroup_data->cgroup_full_length = payload_end_pos - payload;
+ payload = payload_end_pos;
+ }
+
+ return (void*)payload;
+}
+
+static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
+ struct task_struct* task,
+ u32 pid, void* payload)
+{
+ u64 uid_gid = bpf_get_current_uid_gid();
+
+ metadata->uid = (u32)uid_gid;
+ metadata->gid = uid_gid >> 32;
+ metadata->pid = pid;
+ metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
+ metadata->start_time = BPF_CORE_READ(task, start_time);
+ metadata->comm_length = 0;
+
+ size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
+ barrier_var(comm_length);
+ if (comm_length <= TASK_COMM_LEN) {
+ barrier_var(comm_length);
+ metadata->comm_length = comm_length;
+ payload += comm_length;
+ }
+
+ return (void*)payload;
+}
+
+static INLINE struct var_kill_data_t*
+get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
+{
+ int zero = 0;
+ struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
+
+ if (kill_data == NULL)
+ return NULL;
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
+ payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
+ size_t payload_length = payload - (void*)kill_data->payload;
+ kill_data->payload_length = payload_length;
+ populate_ancestors(task, &kill_data->ancestors_info);
+ kill_data->meta.type = KILL_EVENT;
+ kill_data->kill_target_pid = tpid;
+ kill_data->kill_sig = sig;
+ kill_data->kill_count = 1;
+ kill_data->last_kill_time = bpf_ktime_get_ns();
+ return kill_data;
+}
+
+static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
+{
+ if ((KILL_SIGNALS & (1ULL << sig)) == 0)
+ return 0;
+
+ u32 spid = get_userspace_pid();
+ struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
+
+ if (arr_struct == NULL) {
+ struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
+ int zero = 0;
+
+ if (kill_data == NULL)
+ return 0;
+ arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
+ if (arr_struct == NULL)
+ return 0;
+ bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
+ } else {
+ int index = get_var_spid_index(arr_struct, spid);
+
+ if (index == -1) {
+ struct var_kill_data_t* kill_data =
+ get_var_kill_data(ctx, spid, tpid, sig);
+ if (kill_data == NULL)
+ return 0;
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
+ if (arr_struct->array[i].meta.pid == 0) {
+ bpf_probe_read(&arr_struct->array[i],
+ sizeof(arr_struct->array[i]), kill_data);
+ bpf_map_update_elem(&var_tpid_to_data, &tpid,
+ arr_struct, 0);
+
+ return 0;
+ }
+ return 0;
+ }
+
+ struct var_kill_data_t* kill_data = &arr_struct->array[index];
+
+ u64 delta_sec =
+ (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
+
+ if (delta_sec < STALE_INFO) {
+ kill_data->kill_count++;
+ kill_data->last_kill_time = bpf_ktime_get_ns();
+ bpf_probe_read(&arr_struct->array[index],
+ sizeof(arr_struct->array[index]),
+ kill_data);
+ } else {
+ struct var_kill_data_t* kill_data =
+ get_var_kill_data(ctx, spid, tpid, sig);
+ if (kill_data == NULL)
+ return 0;
+ bpf_probe_read(&arr_struct->array[index],
+ sizeof(arr_struct->array[index]),
+ kill_data);
+ }
+ }
+ bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
+ return 0;
+}
+
+static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
+ enum bpf_function_id func_id)
+{
+ int func_id_key = func_id;
+
+ bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
+ bpf_stat_ctx->bpf_func_stats_data_val =
+ bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
+ if (bpf_stat_ctx->bpf_func_stats_data_val)
+ bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
+}
+
+static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
+{
+ if (bpf_stat_ctx->bpf_func_stats_data_val)
+ bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
+ bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
+}
+
+static INLINE void
+bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
+ struct var_metadata_t* meta)
+{
+ if (bpf_stat_ctx->bpf_func_stats_data_val) {
+ bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
+ meta->bpf_stats_num_perf_events =
+ bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
+ }
+ meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
+ meta->cpu_id = bpf_get_smp_processor_id();
+}
+
+static INLINE size_t
+read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
+{
+ size_t length = 0;
+ size_t filepart_length;
+ struct dentry* parent_dentry;
+
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < MAX_PATH_DEPTH; i++) {
+ filepart_length = bpf_probe_read_str(payload, MAX_PATH,
+ BPF_CORE_READ(filp_dentry, d_name.name));
+ barrier_var(filepart_length);
+ if (filepart_length > MAX_PATH)
+ break;
+ barrier_var(filepart_length);
+ payload += filepart_length;
+ length += filepart_length;
+
+ parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
+ if (filp_dentry == parent_dentry)
+ break;
+ filp_dentry = parent_dentry;
+ }
+
+ return length;
+}
+
+static INLINE bool
+is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
+{
+ struct dentry* parent_dentry;
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < MAX_PATH_DEPTH; i++) {
+ u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
+ bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
+
+ if (allowed_dir != NULL)
+ return true;
+ parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
+ if (filp_dentry == parent_dentry)
+ break;
+ filp_dentry = parent_dentry;
+ }
+ return false;
+}
+
+static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
+ u32* device_id,
+ u64* file_ino)
+{
+ u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
+ *device_id = dev_id;
+ bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
+
+ if (allowed_device == NULL)
+ return false;
+
+ u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
+ *file_ino = ino;
+ bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
+
+ if (allowed_file == NULL)
+ if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
+ return false;
+ return true;
+}
+
+SEC("kprobe/proc_sys_write")
+ssize_t BPF_KPROBE(kprobe__proc_sys_write,
+ struct file* filp, const char* buf,
+ size_t count, loff_t* ppos)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
+
+ u32 pid = get_userspace_pid();
+ int zero = 0;
+ struct var_sysctl_data_t* sysctl_data =
+ bpf_map_lookup_elem(&data_heap, &zero);
+ if (!sysctl_data)
+ goto out;
+
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+ sysctl_data->meta.type = SYSCTL_EVENT;
+ void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
+ payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
+
+ populate_ancestors(task, &sysctl_data->ancestors_info);
+
+ sysctl_data->sysctl_val_length = 0;
+ sysctl_data->sysctl_path_length = 0;
+
+ size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
+ barrier_var(sysctl_val_length);
+ if (sysctl_val_length <= CTL_MAXNAME) {
+ barrier_var(sysctl_val_length);
+ sysctl_data->sysctl_val_length = sysctl_val_length;
+ payload += sysctl_val_length;
+ }
+
+ size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
+ BPF_CORE_READ(filp, f_path.dentry, d_name.name));
+ barrier_var(sysctl_path_length);
+ if (sysctl_path_length <= MAX_PATH) {
+ barrier_var(sysctl_path_length);
+ sysctl_data->sysctl_path_length = sysctl_path_length;
+ payload += sysctl_path_length;
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
+ unsigned long data_len = payload - (void*)sysctl_data;
+ data_len = data_len > sizeof(struct var_sysctl_data_t)
+ ? sizeof(struct var_sysctl_data_t)
+ : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("tracepoint/syscalls/sys_enter_kill")
+int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
+ int pid = ctx->args[0];
+ int sig = ctx->args[1];
+ int ret = trace_var_sys_kill(ctx, pid, sig);
+ bpf_stats_exit(&stats_ctx);
+ return ret;
+};
+
+SEC("raw_tracepoint/sched_process_exit")
+int raw_tracepoint__sched_process_exit(void* ctx)
+{
+ int zero = 0;
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
+
+ u32 tpid = get_userspace_pid();
+
+ struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
+ struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
+
+ if (arr_struct == NULL || kill_data == NULL)
+ goto out;
+
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+ struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
+
+#ifdef UNROLL
+#pragma unroll
+#endif
+ for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
+ struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
+
+ if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
+ bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
+ void* payload = kill_data->payload;
+ size_t offset = kill_data->payload_length;
+ if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
+ return 0;
+ payload += offset;
+
+ kill_data->kill_target_name_length = 0;
+ kill_data->kill_target_cgroup_proc_length = 0;
+
+ size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
+ barrier_var(comm_length);
+ if (comm_length <= TASK_COMM_LEN) {
+ barrier_var(comm_length);
+ kill_data->kill_target_name_length = comm_length;
+ payload += comm_length;
+ }
+
+ size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
+ BPF_CORE_READ(proc_kernfs, name));
+ barrier_var(cgroup_proc_length);
+ if (cgroup_proc_length <= KILL_TARGET_LEN) {
+ barrier_var(cgroup_proc_length);
+ kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
+ payload += cgroup_proc_length;
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
+ unsigned long data_len = (void*)payload - (void*)kill_data;
+ data_len = data_len > sizeof(struct var_kill_data_t)
+ ? sizeof(struct var_kill_data_t)
+ : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
+ }
+ }
+ bpf_map_delete_elem(&var_tpid_to_data, &tpid);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("raw_tracepoint/sched_process_exec")
+int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
+
+ struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
+ u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
+
+ bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
+ if (should_filter_binprm != NULL)
+ goto out;
+
+ int zero = 0;
+ struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!proc_exec_data)
+ goto out;
+
+ if (INODE_FILTER && inode != INODE_FILTER)
+ return 0;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ proc_exec_data->meta.type = EXEC_EVENT;
+ proc_exec_data->bin_path_length = 0;
+ proc_exec_data->cmdline_length = 0;
+ proc_exec_data->environment_length = 0;
+ void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
+ proc_exec_data->payload);
+ payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
+
+ struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
+ proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
+ proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
+ proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
+ proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
+
+ const char* filename = BPF_CORE_READ(bprm, filename);
+ size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
+ barrier_var(bin_path_length);
+ if (bin_path_length <= MAX_FILENAME_LEN) {
+ barrier_var(bin_path_length);
+ proc_exec_data->bin_path_length = bin_path_length;
+ payload += bin_path_length;
+ }
+
+ void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
+ void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
+ unsigned int cmdline_length = probe_read_lim(payload, arg_start,
+ arg_end - arg_start, MAX_ARGS_LEN);
+
+ if (cmdline_length <= MAX_ARGS_LEN) {
+ barrier_var(cmdline_length);
+ proc_exec_data->cmdline_length = cmdline_length;
+ payload += cmdline_length;
+ }
+
+ if (READ_ENVIRON_FROM_EXEC) {
+ void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
+ void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
+ unsigned long env_len = probe_read_lim(payload, env_start,
+ env_end - env_start, MAX_ENVIRON_LEN);
+ if (cmdline_length <= MAX_ENVIRON_LEN) {
+ proc_exec_data->environment_length = env_len;
+ payload += env_len;
+ }
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
+ unsigned long data_len = payload - (void*)proc_exec_data;
+ data_len = data_len > sizeof(struct var_exec_data_t)
+ ? sizeof(struct var_exec_data_t)
+ : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("kretprobe/do_filp_open")
+int kprobe_ret__do_filp_open(struct pt_regs* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
+
+ struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
+
+ if (filp == NULL || IS_ERR(filp))
+ goto out;
+ unsigned int flags = BPF_CORE_READ(filp, f_flags);
+ if ((flags & (O_RDWR | O_WRONLY)) == 0)
+ goto out;
+ if ((flags & O_TMPFILE) > 0)
+ goto out;
+ struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
+ umode_t mode = BPF_CORE_READ(file_inode, i_mode);
+ if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
+ S_ISSOCK(mode))
+ goto out;
+
+ struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
+ u32 device_id = 0;
+ u64 file_ino = 0;
+ if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
+ goto out;
+
+ int zero = 0;
+ struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!filemod_data)
+ goto out;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ filemod_data->meta.type = FILEMOD_EVENT;
+ filemod_data->fmod_type = FMOD_OPEN;
+ filemod_data->dst_flags = flags;
+ filemod_data->src_inode = 0;
+ filemod_data->dst_inode = file_ino;
+ filemod_data->src_device_id = 0;
+ filemod_data->dst_device_id = device_id;
+ filemod_data->src_filepath_length = 0;
+ filemod_data->dst_filepath_length = 0;
+
+ void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+ filemod_data->payload);
+ payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+ size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->dst_filepath_length = len;
+ }
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+ unsigned long data_len = payload - (void*)filemod_data;
+ data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("kprobe/vfs_link")
+int BPF_KPROBE(kprobe__vfs_link,
+ struct dentry* old_dentry, struct inode* dir,
+ struct dentry* new_dentry, struct inode** delegated_inode)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
+
+ u32 src_device_id = 0;
+ u64 src_file_ino = 0;
+ u32 dst_device_id = 0;
+ u64 dst_file_ino = 0;
+ if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
+ !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
+ goto out;
+
+ int zero = 0;
+ struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!filemod_data)
+ goto out;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ filemod_data->meta.type = FILEMOD_EVENT;
+ filemod_data->fmod_type = FMOD_LINK;
+ filemod_data->dst_flags = 0;
+ filemod_data->src_inode = src_file_ino;
+ filemod_data->dst_inode = dst_file_ino;
+ filemod_data->src_device_id = src_device_id;
+ filemod_data->dst_device_id = dst_device_id;
+ filemod_data->src_filepath_length = 0;
+ filemod_data->dst_filepath_length = 0;
+
+ void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+ filemod_data->payload);
+ payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+ size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->src_filepath_length = len;
+ }
+
+ len = read_absolute_file_path_from_dentry(new_dentry, payload);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->dst_filepath_length = len;
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+ unsigned long data_len = payload - (void*)filemod_data;
+ data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("kprobe/vfs_symlink")
+int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
+ const char* oldname)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
+
+ u32 dst_device_id = 0;
+ u64 dst_file_ino = 0;
+ if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
+ goto out;
+
+ int zero = 0;
+ struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!filemod_data)
+ goto out;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ filemod_data->meta.type = FILEMOD_EVENT;
+ filemod_data->fmod_type = FMOD_SYMLINK;
+ filemod_data->dst_flags = 0;
+ filemod_data->src_inode = 0;
+ filemod_data->dst_inode = dst_file_ino;
+ filemod_data->src_device_id = 0;
+ filemod_data->dst_device_id = dst_device_id;
+ filemod_data->src_filepath_length = 0;
+ filemod_data->dst_filepath_length = 0;
+
+ void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+ filemod_data->payload);
+ payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+ size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->src_filepath_length = len;
+ }
+ len = read_absolute_file_path_from_dentry(dentry, payload);
+ barrier_var(len);
+ if (len <= MAX_FILEPATH_LENGTH) {
+ barrier_var(len);
+ payload += len;
+ filemod_data->dst_filepath_length = len;
+ }
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+ unsigned long data_len = payload - (void*)filemod_data;
+ data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("raw_tracepoint/sched_process_fork")
+int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
+
+ int zero = 0;
+ struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!fork_data)
+ goto out;
+
+ struct task_struct* parent = (struct task_struct*)ctx->args[0];
+ struct task_struct* child = (struct task_struct*)ctx->args[1];
+ fork_data->meta.type = FORK_EVENT;
+
+ void* payload = populate_var_metadata(&fork_data->meta, child,
+ BPF_CORE_READ(child, pid), fork_data->payload);
+ fork_data->parent_pid = BPF_CORE_READ(parent, pid);
+ fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
+ fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
+
+ unsigned long data_len = payload - (void*)fork_data;
+ data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c
new file mode 100644
index 000000000000..4df9088bfc00
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler1.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
+#define UNROLL
+#define INLINE __always_inline
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/profiler2.c b/tools/testing/selftests/bpf/progs/profiler2.c
new file mode 100644
index 000000000000..0f32a3cbf556
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler2.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) /**/
+/* undef #define UNROLL */
+#define INLINE /**/
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/profiler3.c b/tools/testing/selftests/bpf/progs/profiler3.c
new file mode 100644
index 000000000000..6249fc31ccb0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler3.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) /**/
+#define UNROLL
+#define INLINE __noinline
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index cc615b82b56e..2fb7adafb6b6 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -67,7 +67,12 @@ typedef struct {
void* co_name; // PyCodeObject.co_name
} FrameData;
-static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *get_thread_state(void *tls_base, PidData *pidData)
{
void* thread_state;
int key;
@@ -155,7 +160,9 @@ struct {
} stackmap SEC(".maps");
#ifdef GLOBAL_FUNC
-__attribute__((noinline))
+__noinline
+#elif defined(SUBPROGS)
+static __noinline
#else
static __always_inline
#endif
diff --git a/tools/testing/selftests/bpf/progs/pyperf_subprogs.c b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
new file mode 100644
index 000000000000..60e27a7f0cca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define STACK_MAX_LEN 50
+#define SUBPROGS
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
new file mode 100644
index 000000000000..123607d314d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+const volatile long use_output = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+const volatile long wakeup_data_size = 0;
+
+static __always_inline long get_flags()
+{
+ long sz;
+
+ if (!wakeup_data_size)
+ return 0;
+
+ sz = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+ return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_ringbuf(void *ctx)
+{
+ long *sample, flags;
+ int i;
+
+ if (!use_output) {
+ for (i = 0; i < batch_cnt; i++) {
+ sample = bpf_ringbuf_reserve(&ringbuf,
+ sizeof(sample_val), 0);
+ if (!sample) {
+ __sync_add_and_fetch(&dropped, 1);
+ } else {
+ *sample = sample_val;
+ flags = get_flags();
+ bpf_ringbuf_submit(sample, flags);
+ }
+ }
+ } else {
+ for (i = 0; i < batch_cnt; i++) {
+ flags = get_flags();
+ if (bpf_ringbuf_output(&ringbuf, &sample_val,
+ sizeof(sample_val), flags))
+ __sync_add_and_fetch(&dropped, 1);
+ }
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index d5a5eeb5fb52..712df7b49cb1 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -8,6 +8,10 @@
char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = 1;
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
#define SOL_CUSTOM 0xdeadbeef
struct sockopt_sk {
@@ -28,12 +32,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
- if (ctx->level == SOL_IP && ctx->optname == IP_TOS)
+ if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
+ ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
return 1;
+ }
if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
/* Not interested in SOL_SOCKET:SO_SNDBUF;
@@ -51,6 +57,26 @@ int _getsockopt(struct bpf_sockopt *ctx)
return 1;
}
+ if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ ctx->retval = 0; /* Reset system call return value to zero */
+
+ /* Always export 0x55 */
+ optval[0] = 0x55;
+ ctx->optlen = 1;
+
+ /* Userspace buffer is PAGE_SIZE * 2, but BPF
+ * program can only see the first PAGE_SIZE
+ * bytes of data.
+ */
+ if (optval_end - optval != PAGE_SIZE)
+ return 0; /* EPERM, unexpected data size */
+
+ return 1;
+ }
+
if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */
@@ -81,12 +107,14 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
- if (ctx->level == SOL_IP && ctx->optname == IP_TOS)
+ if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
+ ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
return 1;
+ }
if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
/* Overwrite SO_SNDBUF value */
@@ -112,6 +140,28 @@ int _setsockopt(struct bpf_sockopt *ctx)
return 1;
}
+ if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+ /* Original optlen is larger than PAGE_SIZE. */
+ if (ctx->optlen != PAGE_SIZE * 2)
+ return 0; /* EPERM, unexpected data size */
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ /* Make sure we can trim the buffer. */
+ optval[0] = 0;
+ ctx->optlen = 1;
+
+ /* Usepace buffer is PAGE_SIZE * 2, but BPF
+ * program can only see the first PAGE_SIZE
+ * bytes of data.
+ */
+ if (optval_end - optval != PAGE_SIZE)
+ return 0; /* EPERM, unexpected data size */
+
+ return 1;
+ }
+
if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index ad61b722a9de..7de534f38c3f 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -266,8 +266,12 @@ struct tls_index {
uint64_t offset;
};
-static __always_inline void *calc_location(struct strobe_value_loc *loc,
- void *tls_base)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
{
/*
* tls_mode value is:
@@ -327,10 +331,15 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
: NULL;
}
-static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
- size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void read_int_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data)
{
void *location = calc_location(&cfg->int_locs[idx], tls_base);
if (!location)
@@ -440,8 +449,13 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
*/
-static __always_inline void *read_strobe_meta(struct task_struct *task,
- struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *read_strobe_meta(struct task_struct *task,
+ struct strobemeta_payload *data)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0};
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
new file mode 100644
index 000000000000..b6c01f8fc559
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 13
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#define SUBPROGS
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c
index 1f407e65ae52..7115bcefbe8a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall1.c
@@ -26,20 +26,20 @@ int entry(struct __sk_buff *skb)
/* Multiple locations to make sure we patch
* all of them.
*/
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
-
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
-
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
return 3;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c
index a093e739cf0e..0431e4fe7efd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall2.c
@@ -13,14 +13,14 @@ struct {
SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
return 0;
}
SEC("classifier/1")
int bpf_func_1(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
return 1;
}
@@ -33,25 +33,25 @@ int bpf_func_2(struct __sk_buff *skb)
SEC("classifier/3")
int bpf_func_3(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 4);
+ bpf_tail_call_static(skb, &jmp_table, 4);
return 3;
}
SEC("classifier/4")
int bpf_func_4(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 3);
+ bpf_tail_call_static(skb, &jmp_table, 3);
return 4;
}
SEC("classifier")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
/* Check multi-prog update. */
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
/* Check tail call limit. */
- bpf_tail_call(skb, &jmp_table, 3);
+ bpf_tail_call_static(skb, &jmp_table, 3);
return 3;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index cabda877cf0a..739dc2a51e74 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -16,14 +16,14 @@ SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
{
count++;
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 1;
}
SEC("classifier")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
new file mode 100644
index 000000000000..0103f3dd9f02
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+#define TAIL_FUNC(x) \
+ SEC("classifier/" #x) \
+ int bpf_func_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ return skb->len * 2;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
new file mode 100644
index 000000000000..7b1c04183824
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ if (load_byte(skb, 0))
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ else
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return 1;
+}
+
+static volatile int count;
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ count++;
+ return subprog_tail(skb);
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
new file mode 100644
index 000000000000..0d5482bea6c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+__noinline
+int subprog_tail2(struct __sk_buff *skb)
+{
+ volatile char arr[64] = {};
+
+ if (load_word(skb, 0) || load_half(skb, 0))
+ bpf_tail_call_static(skb, &jmp_table, 10);
+ else
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
+ return skb->len;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ volatile char arr[64] = {};
+
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ return skb->len * 2;
+}
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ return subprog_tail2(skb);
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ return skb->len * 3;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
new file mode 100644
index 000000000000..9a1b166b7fbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int count;
+
+__noinline
+int subprog_tail_2(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ return skb->len * 3;
+}
+
+__noinline
+int subprog_tail_1(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ return skb->len * 2;
+}
+
+__noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return skb->len;
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+ return subprog_tail_2(skb);
+}
+
+SEC("classifier/2")
+int bpf_func_2(struct __sk_buff *skb)
+{
+ count++;
+ return subprog_tail_2(skb);
+}
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ return subprog_tail_1(skb);
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_autoload.c b/tools/testing/selftests/bpf/progs/test_autoload.c
new file mode 100644
index 000000000000..62c8cdec6d5d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoload.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+bool prog3_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+ prog1_called = true;
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+ prog2_called = true;
+ return 0;
+}
+
+struct fake_kernel_struct {
+ int whatever;
+} __attribute__((preserve_access_index));
+
+SEC("fentry/unexisting-kprobe-will-fail-if-loaded")
+int prog3(const void *ctx)
+{
+ struct fake_kernel_struct *fake = (void *)ctx;
+ fake->whatever = 123;
+ prog3_called = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
new file mode 100644
index 000000000000..c1e0c8c7c55f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} inner_map1 SEC(".maps"),
+ inner_map2 SEC(".maps");
+
+struct inner_map_sz2 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} inner_map_sz2 SEC(".maps");
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ /* it's possible to use anonymous struct as inner map definition here */
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ /* changing max_entries to 2 will fail during load
+ * due to incompatibility with inner_map definition */
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ });
+} outer_arr SEC(".maps") = {
+ /* (void *) cast is necessary because we didn't use `struct inner_map`
+ * in __inner(values, ...)
+ * Actually, a conscious effort is required to screw up initialization
+ * of inner map slots, which is a great thing!
+ */
+ .values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
+};
+
+struct inner_map_sz3 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_INNER_MAP);
+ __uint(max_entries, 3);
+ __type(key, int);
+ __type(value, int);
+} inner_map3 SEC(".maps"),
+ inner_map4 SEC(".maps");
+
+struct inner_map_sz4 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_INNER_MAP);
+ __uint(max_entries, 5);
+ __type(key, int);
+ __type(value, int);
+} inner_map5 SEC(".maps");
+
+struct outer_arr_dyn {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_INNER_MAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ });
+} outer_arr_dyn SEC(".maps") = {
+ .values = {
+ [0] = (void *)&inner_map3,
+ [1] = (void *)&inner_map4,
+ [2] = (void *)&inner_map5,
+ },
+};
+
+struct outer_hash {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 5);
+ __uint(key_size, sizeof(int));
+ /* Here everything works flawlessly due to reuse of struct inner_map
+ * and compiler will complain at the attempt to use non-inner_map
+ * references below. This is great experience.
+ */
+ __array(values, struct inner_map);
+} outer_hash SEC(".maps") = {
+ .values = {
+ [0] = &inner_map2,
+ [4] = &inner_map1,
+ },
+};
+
+struct sockarr_sz1 {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockarr_sz1 SEC(".maps");
+
+struct sockarr_sz2 {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} sockarr_sz2 SEC(".maps");
+
+struct outer_sockarr_sz1 {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct sockarr_sz1);
+} outer_sockarr SEC(".maps") = {
+ .values = { (void *)&sockarr_sz1 },
+};
+
+int input = 0;
+
+SEC("raw_tp/sys_enter")
+int handle__sys_enter(void *ctx)
+{
+ struct inner_map *inner_map;
+ int key = 0, val;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &key);
+ if (!inner_map)
+ return 1;
+ val = input;
+ bpf_map_update_elem(inner_map, &key, &val, 0);
+
+ inner_map = bpf_map_lookup_elem(&outer_hash, &key);
+ if (!inner_map)
+ return 1;
+ val = input + 1;
+ bpf_map_update_elem(inner_map, &key, &val, 0);
+
+ inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key);
+ if (!inner_map)
+ return 1;
+ val = input + 2;
+ bpf_map_update_elem(inner_map, &key, &val, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
new file mode 100644
index 000000000000..9a6b85dd52d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <string.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
+
+struct sockaddr_in6 srv_sa6 = {};
+__u16 listen_tp_sport = 0;
+__u16 req_sk_sport = 0;
+__u32 recv_cookie = 0;
+__u32 gen_cookie = 0;
+__u32 linum = 0;
+
+#define LOG() ({ if (!linum) linum = __LINE__; })
+
+static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th,
+ struct tcp_sock *tp,
+ struct __sk_buff *skb)
+{
+ if (th->syn) {
+ __s64 mss_cookie;
+ void *data_end;
+
+ data_end = (void *)(long)(skb->data_end);
+
+ if (th->doff * 4 != 40) {
+ LOG();
+ return;
+ }
+
+ if ((void *)th + 40 > data_end) {
+ LOG();
+ return;
+ }
+
+ mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h),
+ th, 40);
+ if (mss_cookie < 0) {
+ if (mss_cookie != -ENOENT)
+ LOG();
+ } else {
+ gen_cookie = (__u32)mss_cookie;
+ }
+ } else if (gen_cookie) {
+ /* It was in cookie mode */
+ int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h),
+ th, sizeof(*th));
+
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ LOG();
+ } else {
+ recv_cookie = bpf_ntohl(th->ack_seq) - 1;
+ }
+ }
+}
+
+static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple *tuple;
+ struct bpf_sock *bpf_skc;
+ unsigned int tuple_len;
+ struct tcphdr *th;
+ void *data_end;
+
+ data_end = (void *)(long)(skb->data_end);
+
+ th = (struct tcphdr *)(ip6h + 1);
+ if (th + 1 > data_end)
+ return TC_ACT_OK;
+
+ /* Is it the testing traffic? */
+ if (th->dest != srv_sa6.sin6_port)
+ return TC_ACT_OK;
+
+ tuple_len = sizeof(tuple->ipv6);
+ tuple = (struct bpf_sock_tuple *)&ip6h->saddr;
+ if ((void *)tuple + tuple_len > data_end) {
+ LOG();
+ return TC_ACT_OK;
+ }
+
+ bpf_skc = bpf_skc_lookup_tcp(skb, tuple, tuple_len,
+ BPF_F_CURRENT_NETNS, 0);
+ if (!bpf_skc) {
+ LOG();
+ return TC_ACT_OK;
+ }
+
+ if (bpf_skc->state == BPF_TCP_NEW_SYN_RECV) {
+ struct request_sock *req_sk;
+
+ req_sk = (struct request_sock *)bpf_skc_to_tcp_request_sock(bpf_skc);
+ if (!req_sk) {
+ LOG();
+ goto release;
+ }
+
+ if (bpf_sk_assign(skb, req_sk, 0)) {
+ LOG();
+ goto release;
+ }
+
+ req_sk_sport = req_sk->__req_common.skc_num;
+
+ bpf_sk_release(req_sk);
+ return TC_ACT_OK;
+ } else if (bpf_skc->state == BPF_TCP_LISTEN) {
+ struct tcp_sock *tp;
+
+ tp = bpf_skc_to_tcp_sock(bpf_skc);
+ if (!tp) {
+ LOG();
+ goto release;
+ }
+
+ if (bpf_sk_assign(skb, tp, 0)) {
+ LOG();
+ goto release;
+ }
+
+ listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num;
+
+ test_syncookie_helper(ip6h, th, tp, skb);
+ bpf_sk_release(tp);
+ return TC_ACT_OK;
+ }
+
+ if (bpf_sk_assign(skb, bpf_skc, 0))
+ LOG();
+
+release:
+ bpf_sk_release(bpf_skc);
+ return TC_ACT_OK;
+}
+
+SEC("classifier/ingress")
+int cls_ingress(struct __sk_buff *skb)
+{
+ struct ipv6hdr *ip6h;
+ struct ethhdr *eth;
+ void *data_end;
+
+ data_end = (void *)(long)(skb->data_end);
+
+ eth = (struct ethhdr *)(long)(skb->data);
+ if (eth + 1 > data_end)
+ return TC_ACT_OK;
+
+ if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+ return TC_ACT_OK;
+
+ ip6h = (struct ipv6hdr *)(eth + 1);
+ if (ip6h + 1 > data_end)
+ return TC_ACT_OK;
+
+ if (ip6h->nexthdr == IPPROTO_TCP)
+ return handle_ip6_tcp(ip6h, skb);
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
new file mode 100644
index 000000000000..c9f8464996ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -0,0 +1,1068 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2019, 2020 Cloudflare
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "test_cls_redirect.h"
+
+#ifdef SUBPROGS
+#define INLINING __noinline
+#else
+#define INLINING __always_inline
+#endif
+
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+
+#define IP_OFFSET_MASK (0x1FFF)
+#define IP_MF (0x2000)
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+
+/**
+ * Destination port and IP used for UDP encapsulation.
+ */
+static volatile const __be16 ENCAPSULATION_PORT;
+static volatile const __be32 ENCAPSULATION_IP;
+
+typedef struct {
+ uint64_t processed_packets_total;
+ uint64_t l3_protocol_packets_total_ipv4;
+ uint64_t l3_protocol_packets_total_ipv6;
+ uint64_t l4_protocol_packets_total_tcp;
+ uint64_t l4_protocol_packets_total_udp;
+ uint64_t accepted_packets_total_syn;
+ uint64_t accepted_packets_total_syn_cookies;
+ uint64_t accepted_packets_total_last_hop;
+ uint64_t accepted_packets_total_icmp_echo_request;
+ uint64_t accepted_packets_total_established;
+ uint64_t forwarded_packets_total_gue;
+ uint64_t forwarded_packets_total_gre;
+
+ uint64_t errors_total_unknown_l3_proto;
+ uint64_t errors_total_unknown_l4_proto;
+ uint64_t errors_total_malformed_ip;
+ uint64_t errors_total_fragmented_ip;
+ uint64_t errors_total_malformed_icmp;
+ uint64_t errors_total_unwanted_icmp;
+ uint64_t errors_total_malformed_icmp_pkt_too_big;
+ uint64_t errors_total_malformed_tcp;
+ uint64_t errors_total_malformed_udp;
+ uint64_t errors_total_icmp_echo_replies;
+ uint64_t errors_total_malformed_encapsulation;
+ uint64_t errors_total_encap_adjust_failed;
+ uint64_t errors_total_encap_buffer_too_small;
+ uint64_t errors_total_redirect_loop;
+} metrics_t;
+
+typedef enum {
+ INVALID = 0,
+ UNKNOWN,
+ ECHO_REQUEST,
+ SYN,
+ SYN_COOKIE,
+ ESTABLISHED,
+} verdict_t;
+
+typedef struct {
+ uint16_t src, dst;
+} flow_ports_t;
+
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv4.dport) -
+ offsetof(struct bpf_sock_tuple, ipv4.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv6.dport) -
+ offsetof(struct bpf_sock_tuple, ipv6.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+
+typedef int ret_t;
+
+/* This is a bit of a hack. We need a return value which allows us to
+ * indicate that the regular flow of the program should continue,
+ * while allowing functions to use XDP_PASS and XDP_DROP, etc.
+ */
+static const ret_t CONTINUE_PROCESSING = -1;
+
+/* Convenience macro to call functions which return ret_t.
+ */
+#define MAYBE_RETURN(x) \
+ do { \
+ ret_t __ret = x; \
+ if (__ret != CONTINUE_PROCESSING) \
+ return __ret; \
+ } while (0)
+
+/* Linux packet pointers are either aligned to NET_IP_ALIGN (aka 2 bytes),
+ * or not aligned if the arch supports efficient unaligned access.
+ *
+ * Since the verifier ensures that eBPF packet accesses follow these rules,
+ * we can tell LLVM to emit code as if we always had a larger alignment.
+ * It will yell at us if we end up on a platform where this is not valid.
+ */
+typedef uint8_t *net_ptr __attribute__((align_value(8)));
+
+typedef struct buf {
+ struct __sk_buff *skb;
+ net_ptr head;
+ /* NB: tail musn't have alignment other than 1, otherwise
+ * LLVM will go and eliminate code, e.g. when checking packet lengths.
+ */
+ uint8_t *const tail;
+} buf_t;
+
+static __always_inline size_t buf_off(const buf_t *buf)
+{
+ /* Clang seems to optimize constructs like
+ * a - b + c
+ * if c is known:
+ * r? = c
+ * r? -= b
+ * r? += a
+ *
+ * This is a problem if a and b are packet pointers,
+ * since the verifier allows subtracting two pointers to
+ * get a scalar, but not a scalar and a pointer.
+ *
+ * Use inline asm to break this optimization.
+ */
+ size_t off = (size_t)buf->head;
+ asm("%0 -= %1" : "+r"(off) : "r"(buf->skb->data));
+ return off;
+}
+
+static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len)
+{
+ if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
+ return false;
+ }
+
+ buf->head += len;
+ return true;
+}
+
+static __always_inline bool buf_skip(buf_t *buf, const size_t len)
+{
+ /* Check whether off + len is valid in the non-linear part. */
+ if (buf_off(buf) + len > buf->skb->len) {
+ return false;
+ }
+
+ buf->head += len;
+ return true;
+}
+
+/* Returns a pointer to the start of buf, or NULL if len is
+ * larger than the remaining data. Consumes len bytes on a successful
+ * call.
+ *
+ * If scratch is not NULL, the function will attempt to load non-linear
+ * data via bpf_skb_load_bytes. On success, scratch is returned.
+ */
+static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch)
+{
+ if (buf->head + len > buf->tail) {
+ if (scratch == NULL) {
+ return NULL;
+ }
+
+ return buf_copy(buf, scratch, len) ? scratch : NULL;
+ }
+
+ void *ptr = buf->head;
+ buf->head += len;
+ return ptr;
+}
+
+static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
+{
+ if (ipv4->ihl <= 5) {
+ return true;
+ }
+
+ return buf_skip(buf, (ipv4->ihl - 5) * 4);
+}
+
+static INLINING bool ipv4_is_fragment(const struct iphdr *ip)
+{
+ uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
+ return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
+}
+
+static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
+{
+ struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
+ if (ipv4 == NULL) {
+ return NULL;
+ }
+
+ if (ipv4->ihl < 5) {
+ return NULL;
+ }
+
+ if (!pkt_skip_ipv4_options(pkt, ipv4)) {
+ return NULL;
+ }
+
+ return ipv4;
+}
+
+/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
+static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
+{
+ if (!buf_copy(pkt, ports, sizeof(*ports))) {
+ return false;
+ }
+
+ /* Ports in the L4 headers are reversed, since we are parsing an ICMP
+ * payload which is going towards the eyeball.
+ */
+ uint16_t dst = ports->src;
+ ports->src = ports->dst;
+ ports->dst = dst;
+ return true;
+}
+
+static INLINING uint16_t pkt_checksum_fold(uint32_t csum)
+{
+ /* The highest reasonable value for an IPv4 header
+ * checksum requires two folds, so we just do that always.
+ */
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (uint16_t)~csum;
+}
+
+static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+
+ /* An IP header without options is 20 bytes. Two of those
+ * are the checksum, which we always set to zero. Hence,
+ * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7,
+ * which fits in 32 bit.
+ */
+ _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes");
+ uint32_t acc = 0;
+ uint16_t *ipw = (uint16_t *)iph;
+
+#pragma clang loop unroll(full)
+ for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
+ acc += ipw[i];
+ }
+
+ iph->check = pkt_checksum_fold(acc);
+}
+
+static INLINING
+bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
+ const struct ipv6hdr *ipv6,
+ uint8_t *upper_proto,
+ bool *is_fragment)
+{
+ /* We understand five extension headers.
+ * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
+ * headers should occur once, except Destination Options, which may
+ * occur twice. Hence we give up after 6 headers.
+ */
+ struct {
+ uint8_t next;
+ uint8_t len;
+ } exthdr = {
+ .next = ipv6->nexthdr,
+ };
+ *is_fragment = false;
+
+#pragma clang loop unroll(full)
+ for (int i = 0; i < 6; i++) {
+ switch (exthdr.next) {
+ case IPPROTO_FRAGMENT:
+ *is_fragment = true;
+ /* NB: We don't check that hdrlen == 0 as per spec. */
+ /* fallthrough; */
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_MH:
+ if (!buf_copy(pkt, &exthdr, sizeof(exthdr))) {
+ return false;
+ }
+
+ /* hdrlen is in 8-octet units, and excludes the first 8 octets. */
+ if (!buf_skip(pkt,
+ (exthdr.len + 1) * 8 - sizeof(exthdr))) {
+ return false;
+ }
+
+ /* Decode next header */
+ break;
+
+ default:
+ /* The next header is not one of the known extension
+ * headers, treat it as the upper layer header.
+ *
+ * This handles IPPROTO_NONE.
+ *
+ * Encapsulating Security Payload (50) and Authentication
+ * Header (51) also end up here (and will trigger an
+ * unknown proto error later). They have a custom header
+ * format and seem too esoteric to care about.
+ */
+ *upper_proto = exthdr.next;
+ return true;
+ }
+ }
+
+ /* We never found an upper layer header. */
+ return false;
+}
+
+/* This function has to be inlined, because the verifier otherwise rejects it
+ * due to returning a pointer to the stack. This is technically correct, since
+ * scratch is allocated on the stack. However, this usage should be safe since
+ * it's the callers stack after all.
+ */
+static __always_inline struct ipv6hdr *
+pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
+ bool *is_fragment)
+{
+ struct ipv6hdr *ipv6 = buf_assign(pkt, sizeof(*ipv6), scratch);
+ if (ipv6 == NULL) {
+ return NULL;
+ }
+
+ if (!pkt_skip_ipv6_extension_headers(pkt, ipv6, proto, is_fragment)) {
+ return NULL;
+ }
+
+ return ipv6;
+}
+
+/* Global metrics, per CPU
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, unsigned int);
+ __type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static INLINING metrics_t *get_global_metrics(void)
+{
+ uint64_t key = 0;
+ return bpf_map_lookup_elem(&metrics_map, &key);
+}
+
+static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead = payload_off - sizeof(struct ethhdr);
+
+ // Changing the ethertype if the encapsulated packet is ipv6
+ if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+ encap->eth.h_proto = bpf_htons(ETH_P_IPV6);
+ }
+
+ if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
+ return TC_ACT_SHOT;
+
+ return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
+{
+ metrics->forwarded_packets_total_gre++;
+
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead =
+ payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
+ int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
+ uint16_t proto = ETH_P_IP;
+
+ /* Loop protection: the inner packet's TTL is decremented as a safeguard
+ * against any forwarding loop. As the only interesting field is the TTL
+ * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes
+ * as they handle the split packets if needed (no need for the data to be
+ * in the linear section).
+ */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+ proto = ETH_P_IPV6;
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1, 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ } else {
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl,
+ 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ /* IPv4 also has a checksum to patch. While the TTL is only one byte,
+ * this function only works for 2 and 4 bytes arguments (the result is
+ * the same).
+ */
+ rc = bpf_l3_csum_replace(
+ skb, payload_off + offsetof(struct iphdr, check), ttl,
+ ttl - 1, 2);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1,
+ 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ }
+
+ if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
+ metrics->errors_total_encap_adjust_failed++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ buf_t pkt = {
+ .skb = skb,
+ .head = (uint8_t *)(long)skb->data,
+ .tail = (uint8_t *)(long)skb->data_end,
+ };
+
+ encap_gre_t *encap_gre = buf_assign(&pkt, sizeof(encap_gre_t), NULL);
+ if (encap_gre == NULL) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre->ip.protocol = IPPROTO_GRE;
+ encap_gre->ip.daddr = next_hop->s_addr;
+ encap_gre->ip.saddr = ENCAPSULATION_IP;
+ encap_gre->ip.tot_len =
+ bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta);
+ encap_gre->gre.flags = 0;
+ encap_gre->gre.protocol = bpf_htons(proto);
+ pkt_ipv4_checksum((void *)&encap_gre->ip);
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
+{
+ /* swap L2 addresses */
+ /* This assumes that packets are received from a router.
+ * So just swapping the MAC addresses here will make the packet go back to
+ * the router, which will send it to the appropriate machine.
+ */
+ unsigned char temp[ETH_ALEN];
+ memcpy(temp, encap->eth.h_dest, sizeof(temp));
+ memcpy(encap->eth.h_dest, encap->eth.h_source,
+ sizeof(encap->eth.h_dest));
+ memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count - 1 &&
+ encap->unigue.last_hop_gre) {
+ return forward_with_gre(skb, encap, next_hop, metrics);
+ }
+
+ metrics->forwarded_packets_total_gue++;
+ uint32_t old_saddr = encap->ip.saddr;
+ encap->ip.saddr = encap->ip.daddr;
+ encap->ip.daddr = next_hop->s_addr;
+ if (encap->unigue.next_hop < encap->unigue.hop_count) {
+ encap->unigue.next_hop++;
+ }
+
+ /* Remove ip->saddr, add next_hop->s_addr */
+ const uint64_t off = offsetof(typeof(*encap), ip.check);
+ int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4);
+ if (ret < 0) {
+ return TC_ACT_SHOT;
+ }
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static INLINING ret_t skip_next_hops(buf_t *pkt, int n)
+{
+ switch (n) {
+ case 1:
+ if (!buf_skip(pkt, sizeof(struct in_addr)))
+ return TC_ACT_SHOT;
+ case 0:
+ return CONTINUE_PROCESSING;
+
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+/* Get the next hop from the GLB header.
+ *
+ * Sets next_hop->s_addr to 0 if there are no more hops left.
+ * pkt is positioned just after the variable length GLB header
+ * iff the call is successful.
+ */
+static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
+ struct in_addr *next_hop)
+{
+ if (encap->unigue.next_hop > encap->unigue.hop_count) {
+ return TC_ACT_SHOT;
+ }
+
+ /* Skip "used" next hops. */
+ MAYBE_RETURN(skip_next_hops(pkt, encap->unigue.next_hop));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count) {
+ /* No more next hops, we are at the end of the GLB header. */
+ next_hop->s_addr = 0;
+ return CONTINUE_PROCESSING;
+ }
+
+ if (!buf_copy(pkt, next_hop, sizeof(*next_hop))) {
+ return TC_ACT_SHOT;
+ }
+
+ /* Skip the remainig next hops (may be zero). */
+ return skip_next_hops(pkt, encap->unigue.hop_count -
+ encap->unigue.next_hop - 1);
+}
+
+/* Fill a bpf_sock_tuple to be used with the socket lookup functions.
+ * This is a kludge that let's us work around verifier limitations:
+ *
+ * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
+ *
+ * clang will substitue a costant for sizeof, which allows the verifier
+ * to track it's value. Based on this, it can figure out the constant
+ * return value, and calling code works while still being "generic" to
+ * IPv4 and IPv6.
+ */
+static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+ uint64_t iphlen, uint16_t sport, uint16_t dport)
+{
+ switch (iphlen) {
+ case sizeof(struct iphdr): {
+ struct iphdr *ipv4 = (struct iphdr *)iph;
+ tuple->ipv4.daddr = ipv4->daddr;
+ tuple->ipv4.saddr = ipv4->saddr;
+ tuple->ipv4.sport = sport;
+ tuple->ipv4.dport = dport;
+ return sizeof(tuple->ipv4);
+ }
+
+ case sizeof(struct ipv6hdr): {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph;
+ memcpy(&tuple->ipv6.daddr, &ipv6->daddr,
+ sizeof(tuple->ipv6.daddr));
+ memcpy(&tuple->ipv6.saddr, &ipv6->saddr,
+ sizeof(tuple->ipv6.saddr));
+ tuple->ipv6.sport = sport;
+ tuple->ipv6.dport = dport;
+ return sizeof(tuple->ipv6);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+static INLINING verdict_t classify_tcp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ void *iph, struct tcphdr *tcp)
+{
+ struct bpf_sock *sk =
+ bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+ if (sk == NULL) {
+ return UNKNOWN;
+ }
+
+ if (sk->state != BPF_TCP_LISTEN) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ if (iph != NULL && tcp != NULL) {
+ /* Kludge: we've run out of arguments, but need the length of the ip header. */
+ uint64_t iphlen = sizeof(struct iphdr);
+ if (tuplen == sizeof(tuple->ipv6)) {
+ iphlen = sizeof(struct ipv6hdr);
+ }
+
+ if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp,
+ sizeof(*tcp)) == 0) {
+ bpf_sk_release(sk);
+ return SYN_COOKIE;
+ }
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static INLINING verdict_t classify_udp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen)
+{
+ struct bpf_sock *sk =
+ bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+ if (sk == NULL) {
+ return UNKNOWN;
+ }
+
+ if (sk->state == BPF_TCP_ESTABLISHED) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ metrics_t *metrics)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ return classify_tcp(skb, tuple, tuplen, NULL, NULL);
+
+ case IPPROTO_UDP:
+ return classify_udp(skb, tuple, tuplen);
+
+ default:
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+}
+
+static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
+{
+ struct icmphdr icmp;
+ if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp.type == ICMP_ECHOREPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp.type == ICMP_ECHO) {
+ return ECHO_REQUEST;
+ }
+
+ if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ struct iphdr _ip4;
+ const struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
+ if (ipv4 == NULL) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ /* The source address in the outer IP header is from the entity that
+ * originated the ICMP message. Use the original IP header to restore
+ * the correct flow tuple.
+ */
+ struct bpf_sock_tuple tuple;
+ tuple.ipv4.saddr = ipv4->daddr;
+ tuple.ipv4.daddr = ipv4->saddr;
+
+ if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv4.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(pkt->skb, ipv4->protocol, &tuple,
+ sizeof(tuple.ipv4), metrics);
+}
+
+static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
+{
+ struct icmp6hdr icmp6;
+ if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) {
+ return ECHO_REQUEST;
+ }
+
+ if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ bool is_fragment;
+ uint8_t l4_proto;
+ struct ipv6hdr _ipv6;
+ const struct ipv6hdr *ipv6 =
+ pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
+ if (ipv6 == NULL) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ /* Swap source and dest addresses. */
+ struct bpf_sock_tuple tuple;
+ memcpy(&tuple.ipv6.saddr, &ipv6->daddr, sizeof(tuple.ipv6.saddr));
+ memcpy(&tuple.ipv6.daddr, &ipv6->saddr, sizeof(tuple.ipv6.daddr));
+
+ if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv6.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(pkt->skb, l4_proto, &tuple, sizeof(tuple.ipv6),
+ metrics);
+}
+
+static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
+{
+ metrics->l4_protocol_packets_total_tcp++;
+
+ struct tcphdr _tcp;
+ struct tcphdr *tcp = buf_assign(pkt, sizeof(_tcp), &_tcp);
+ if (tcp == NULL) {
+ metrics->errors_total_malformed_tcp++;
+ return INVALID;
+ }
+
+ if (tcp->syn) {
+ return SYN;
+ }
+
+ struct bpf_sock_tuple tuple;
+ uint64_t tuplen =
+ fill_tuple(&tuple, iph, iphlen, tcp->source, tcp->dest);
+ return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
+}
+
+static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
+{
+ metrics->l4_protocol_packets_total_udp++;
+
+ struct udphdr _udp;
+ struct udphdr *udph = buf_assign(pkt, sizeof(_udp), &_udp);
+ if (udph == NULL) {
+ metrics->errors_total_malformed_udp++;
+ return INVALID;
+ }
+
+ struct bpf_sock_tuple tuple;
+ uint64_t tuplen =
+ fill_tuple(&tuple, iph, iphlen, udph->source, udph->dest);
+ return classify_udp(pkt->skb, &tuple, tuplen);
+}
+
+static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
+{
+ metrics->l3_protocol_packets_total_ipv4++;
+
+ struct iphdr _ip4;
+ struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
+ if (ipv4 == NULL) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4->version != 4) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4_is_fragment(ipv4)) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (ipv4->protocol) {
+ case IPPROTO_ICMP:
+ return process_icmpv4(pkt, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(pkt, ipv4, sizeof(*ipv4), metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(pkt, ipv4, sizeof(*ipv4), metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
+{
+ metrics->l3_protocol_packets_total_ipv6++;
+
+ uint8_t l4_proto;
+ bool is_fragment;
+ struct ipv6hdr _ipv6;
+ struct ipv6hdr *ipv6 =
+ pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
+ if (ipv6 == NULL) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv6->version != 6) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (l4_proto) {
+ case IPPROTO_ICMPV6:
+ return process_icmpv6(pkt, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(pkt, ipv6, sizeof(*ipv6), metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(pkt, ipv6, sizeof(*ipv6), metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+SEC("classifier/cls_redirect")
+int cls_redirect(struct __sk_buff *skb)
+{
+ metrics_t *metrics = get_global_metrics();
+ if (metrics == NULL) {
+ return TC_ACT_SHOT;
+ }
+
+ metrics->processed_packets_total++;
+
+ /* Pass bogus packets as long as we're not sure they're
+ * destined for us.
+ */
+ if (skb->protocol != bpf_htons(ETH_P_IP)) {
+ return TC_ACT_OK;
+ }
+
+ encap_headers_t *encap;
+
+ /* Make sure that all encapsulation headers are available in
+ * the linear portion of the skb. This makes it easy to manipulate them.
+ */
+ if (bpf_skb_pull_data(skb, sizeof(*encap))) {
+ return TC_ACT_OK;
+ }
+
+ buf_t pkt = {
+ .skb = skb,
+ .head = (uint8_t *)(long)skb->data,
+ .tail = (uint8_t *)(long)skb->data_end,
+ };
+
+ encap = buf_assign(&pkt, sizeof(*encap), NULL);
+ if (encap == NULL) {
+ return TC_ACT_OK;
+ }
+
+ if (encap->ip.ihl != 5) {
+ /* We never have any options. */
+ return TC_ACT_OK;
+ }
+
+ if (encap->ip.daddr != ENCAPSULATION_IP ||
+ encap->ip.protocol != IPPROTO_UDP) {
+ return TC_ACT_OK;
+ }
+
+ /* TODO Check UDP length? */
+ if (encap->udp.dest != ENCAPSULATION_PORT) {
+ return TC_ACT_OK;
+ }
+
+ /* We now know that the packet is destined to us, we can
+ * drop bogus ones.
+ */
+ if (ipv4_is_fragment((void *)&encap->ip)) {
+ metrics->errors_total_fragmented_ip++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.variant != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.control != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.flags != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.hlen !=
+ sizeof(encap->unigue) / 4 + encap->unigue.hop_count) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.version != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.reserved != 0) {
+ return TC_ACT_SHOT;
+ }
+
+ struct in_addr next_hop;
+ MAYBE_RETURN(get_next_hop(&pkt, encap, &next_hop));
+
+ if (next_hop.s_addr == 0) {
+ metrics->accepted_packets_total_last_hop++;
+ return accept_locally(skb, encap);
+ }
+
+ verdict_t verdict;
+ switch (encap->gue.proto_ctype) {
+ case IPPROTO_IPIP:
+ verdict = process_ipv4(&pkt, metrics);
+ break;
+
+ case IPPROTO_IPV6:
+ verdict = process_ipv6(&pkt, metrics);
+ break;
+
+ default:
+ metrics->errors_total_unknown_l3_proto++;
+ return TC_ACT_SHOT;
+ }
+
+ switch (verdict) {
+ case INVALID:
+ /* metrics have already been bumped */
+ return TC_ACT_SHOT;
+
+ case UNKNOWN:
+ return forward_to_next_hop(skb, encap, &next_hop, metrics);
+
+ case ECHO_REQUEST:
+ metrics->accepted_packets_total_icmp_echo_request++;
+ break;
+
+ case SYN:
+ if (encap->unigue.forward_syn) {
+ return forward_to_next_hop(skb, encap, &next_hop,
+ metrics);
+ }
+
+ metrics->accepted_packets_total_syn++;
+ break;
+
+ case SYN_COOKIE:
+ metrics->accepted_packets_total_syn_cookies++;
+ break;
+
+ case ESTABLISHED:
+ metrics->accepted_packets_total_established++;
+ break;
+ }
+
+ return accept_locally(skb, encap);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
new file mode 100644
index 000000000000..76eab0aacba0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright 2019, 2020 Cloudflare */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+
+struct gre_base_hdr {
+ uint16_t flags;
+ uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ uint8_t hlen : 5, control : 1, variant : 2;
+#else
+ uint8_t variant : 2, control : 1, hlen : 5;
+#endif
+ uint8_t proto_ctype;
+ uint16_t flags;
+};
+
+struct unigue {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ uint8_t _r : 2, last_hop_gre : 1, forward_syn : 1, version : 4;
+#else
+ uint8_t version : 4, forward_syn : 1, last_hop_gre : 1, _r : 2;
+#endif
+ uint8_t reserved;
+ uint8_t next_hop;
+ uint8_t hop_count;
+ // Next hops go here
+} __attribute__((packed));
+
+typedef struct {
+ struct ethhdr eth;
+ struct iphdr ip;
+ struct gre_base_hdr gre;
+} __attribute__((packed)) encap_gre_t;
+
+typedef struct {
+ struct ethhdr eth;
+ struct iphdr ip;
+ struct udphdr udp;
+ struct guehdr gue;
+ struct unigue unigue;
+} __attribute__((packed)) encap_headers_t;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
new file mode 100644
index 000000000000..eed26b70e3a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
@@ -0,0 +1,2 @@
+#define SUBPROGS
+#include "test_cls_redirect.c"
diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c
new file mode 100644
index 000000000000..44f5aa2e8956
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* fields of exactly the same size */
+struct test_struct___samesize {
+ void *ptr;
+ unsigned long long val1;
+ unsigned int val2;
+ unsigned short val3;
+ unsigned char val4;
+} __attribute((preserve_access_index));
+
+/* unsigned fields that have to be downsized by libbpf */
+struct test_struct___downsize {
+ void *ptr;
+ unsigned long val1;
+ unsigned long val2;
+ unsigned long val3;
+ unsigned long val4;
+ /* total sz: 40 */
+} __attribute__((preserve_access_index));
+
+/* fields with signed integers of wrong size, should be rejected */
+struct test_struct___signed {
+ void *ptr;
+ long val1;
+ long val2;
+ long val3;
+ long val4;
+} __attribute((preserve_access_index));
+
+/* real layout and sizes according to test's (32-bit) BTF */
+struct test_struct___real {
+ unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
+ unsigned int val2;
+ unsigned long long val1;
+ unsigned short val3;
+ unsigned char val4;
+ unsigned char _pad;
+ /* total sz: 20 */
+};
+
+struct test_struct___real input = {
+ .ptr = 0x01020304,
+ .val1 = 0x1020304050607080,
+ .val2 = 0x0a0b0c0d,
+ .val3 = 0xfeed,
+ .val4 = 0xb9,
+ ._pad = 0xff, /* make sure no accidental zeros are present */
+};
+
+unsigned long long ptr_samesized = 0;
+unsigned long long val1_samesized = 0;
+unsigned long long val2_samesized = 0;
+unsigned long long val3_samesized = 0;
+unsigned long long val4_samesized = 0;
+struct test_struct___real output_samesized = {};
+
+unsigned long long ptr_downsized = 0;
+unsigned long long val1_downsized = 0;
+unsigned long long val2_downsized = 0;
+unsigned long long val3_downsized = 0;
+unsigned long long val4_downsized = 0;
+struct test_struct___real output_downsized = {};
+
+unsigned long long ptr_probed = 0;
+unsigned long long val1_probed = 0;
+unsigned long long val2_probed = 0;
+unsigned long long val3_probed = 0;
+unsigned long long val4_probed = 0;
+
+unsigned long long ptr_signed = 0;
+unsigned long long val1_signed = 0;
+unsigned long long val2_signed = 0;
+unsigned long long val3_signed = 0;
+unsigned long long val4_signed = 0;
+struct test_struct___real output_signed = {};
+
+SEC("raw_tp/sys_exit")
+int handle_samesize(void *ctx)
+{
+ struct test_struct___samesize *in = (void *)&input;
+ struct test_struct___samesize *out = (void *)&output_samesized;
+
+ ptr_samesized = (unsigned long long)in->ptr;
+ val1_samesized = in->val1;
+ val2_samesized = in->val2;
+ val3_samesized = in->val3;
+ val4_samesized = in->val4;
+
+ out->ptr = in->ptr;
+ out->val1 = in->val1;
+ out->val2 = in->val2;
+ out->val3 = in->val3;
+ out->val4 = in->val4;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int handle_downsize(void *ctx)
+{
+ struct test_struct___downsize *in = (void *)&input;
+ struct test_struct___downsize *out = (void *)&output_downsized;
+
+ ptr_downsized = (unsigned long long)in->ptr;
+ val1_downsized = in->val1;
+ val2_downsized = in->val2;
+ val3_downsized = in->val3;
+ val4_downsized = in->val4;
+
+ out->ptr = in->ptr;
+ out->val1 = in->val1;
+ out->val2 = in->val2;
+ out->val3 = in->val3;
+ out->val4 = in->val4;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_probed(void *ctx)
+{
+ struct test_struct___downsize *in = (void *)&input;
+ __u64 tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+ ptr_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
+ val1_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
+ val2_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
+ val3_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
+ val4_probed = tmp;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_signed(void *ctx)
+{
+ struct test_struct___signed *in = (void *)&input;
+ struct test_struct___signed *out = (void *)&output_signed;
+
+ val2_signed = in->val2;
+ val3_signed = in->val3;
+ val4_signed = in->val4;
+
+ out->val2= in->val2;
+ out->val3= in->val3;
+ out->val4= in->val4;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
new file mode 100644
index 000000000000..e7ef3dada2bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+enum named_enum {
+ NAMED_ENUM_VAL1 = 1,
+ NAMED_ENUM_VAL2 = 2,
+ NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1 = 0x10,
+ ANON_ENUM_VAL2 = 0x20,
+ ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval_output {
+ bool named_val1_exists;
+ bool named_val2_exists;
+ bool named_val3_exists;
+ bool anon_val1_exists;
+ bool anon_val2_exists;
+ bool anon_val3_exists;
+
+ int named_val1;
+ int named_val2;
+ int anon_val1;
+ int anon_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enumval(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_enum_value)
+ struct core_reloc_enumval_output *out = (void *)&data.out;
+ enum named_enum named = 0;
+ anon_enum anon = 0;
+
+ out->named_val1_exists = bpf_core_enum_value_exists(named, NAMED_ENUM_VAL1);
+ out->named_val2_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL2);
+ out->named_val3_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL3);
+
+ out->anon_val1_exists = bpf_core_enum_value_exists(anon, ANON_ENUM_VAL1);
+ out->anon_val2_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL2);
+ out->anon_val3_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL3);
+
+ out->named_val1 = bpf_core_enum_value(named, NAMED_ENUM_VAL1);
+ out->named_val2 = bpf_core_enum_value(named, NAMED_ENUM_VAL2);
+ /* NAMED_ENUM_VAL3 value is optional */
+
+ out->anon_val1 = bpf_core_enum_value(anon, ANON_ENUM_VAL1);
+ out->anon_val2 = bpf_core_enum_value(anon, ANON_ENUM_VAL2);
+ /* ANON_ENUM_VAL3 value is optional */
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index aba928fd60d3..145028b52ad8 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <stdint.h>
+#include <stdbool.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
@@ -11,6 +12,7 @@ char _license[] SEC("license") = "GPL";
struct {
char in[256];
char out[256];
+ bool skip;
uint64_t my_pid_tgid;
} data = {};
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
new file mode 100644
index 000000000000..fb60f8195c53
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+ int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based_output {
+ bool struct_exists;
+ bool union_exists;
+ bool enum_exists;
+ bool typedef_named_struct_exists;
+ bool typedef_anon_struct_exists;
+ bool typedef_struct_ptr_exists;
+ bool typedef_int_exists;
+ bool typedef_enum_exists;
+ bool typedef_void_ptr_exists;
+ bool typedef_func_proto_exists;
+ bool typedef_arr_exists;
+
+ int struct_sz;
+ int union_sz;
+ int enum_sz;
+ int typedef_named_struct_sz;
+ int typedef_anon_struct_sz;
+ int typedef_struct_ptr_sz;
+ int typedef_int_sz;
+ int typedef_enum_sz;
+ int typedef_void_ptr_sz;
+ int typedef_func_proto_sz;
+ int typedef_arr_sz;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_based(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_type_info)
+ struct core_reloc_type_based_output *out = (void *)&data.out;
+
+ out->struct_exists = bpf_core_type_exists(struct a_struct);
+ out->union_exists = bpf_core_type_exists(union a_union);
+ out->enum_exists = bpf_core_type_exists(enum an_enum);
+ out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
+ out->typedef_anon_struct_exists = bpf_core_type_exists(anon_struct_typedef);
+ out->typedef_struct_ptr_exists = bpf_core_type_exists(struct_ptr_typedef);
+ out->typedef_int_exists = bpf_core_type_exists(int_typedef);
+ out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
+ out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+ out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
+ out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
+
+ out->struct_sz = bpf_core_type_size(struct a_struct);
+ out->union_sz = bpf_core_type_size(union a_union);
+ out->enum_sz = bpf_core_type_size(enum an_enum);
+ out->typedef_named_struct_sz = bpf_core_type_size(named_struct_typedef);
+ out->typedef_anon_struct_sz = bpf_core_type_size(anon_struct_typedef);
+ out->typedef_struct_ptr_sz = bpf_core_type_size(struct_ptr_typedef);
+ out->typedef_int_sz = bpf_core_type_size(int_typedef);
+ out->typedef_enum_sz = bpf_core_type_size(enum_typedef);
+ out->typedef_void_ptr_sz = bpf_core_type_size(void_ptr_typedef);
+ out->typedef_func_proto_sz = bpf_core_type_size(func_proto_typedef);
+ out->typedef_arr_sz = bpf_core_type_size(arr_typedef);
+#else
+ data.skip = true;
+#endif
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
new file mode 100644
index 000000000000..22aba3f6e344
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+/* some types are shared with test_core_reloc_type_based.c */
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_id_output {
+ int local_anon_struct;
+ int local_anon_union;
+ int local_anon_enum;
+ int local_anon_func_proto_ptr;
+ int local_anon_void_ptr;
+ int local_anon_arr;
+
+ int local_struct;
+ int local_union;
+ int local_enum;
+ int local_int;
+ int local_struct_typedef;
+ int local_func_proto_typedef;
+ int local_arr_typedef;
+
+ int targ_struct;
+ int targ_union;
+ int targ_enum;
+ int targ_int;
+ int targ_struct_typedef;
+ int targ_func_proto_typedef;
+ int targ_arr_typedef;
+};
+
+/* preserve types even if Clang doesn't support built-in */
+struct a_struct t1 = {};
+union a_union t2 = {};
+enum an_enum t3 = 0;
+named_struct_typedef t4 = {};
+func_proto_typedef t5 = 0;
+arr_typedef t6 = {};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_id(void *ctx)
+{
+ /* We use __builtin_btf_type_id() in this tests, but up until the time
+ * __builtin_preserve_type_info() was added it contained a bug that
+ * would make this test fail. The bug was fixed ([0]) with addition of
+ * __builtin_preserve_type_info(), though, so that's what we are using
+ * to detect whether this test has to be executed, however strange
+ * that might look like.
+ *
+ * [0] https://reviews.llvm.org/D85174
+ */
+#if __has_builtin(__builtin_preserve_type_info)
+ struct core_reloc_type_id_output *out = (void *)&data.out;
+
+ out->local_anon_struct = bpf_core_type_id_local(struct { int marker_field; });
+ out->local_anon_union = bpf_core_type_id_local(union { int marker_field; });
+ out->local_anon_enum = bpf_core_type_id_local(enum { MARKER_ENUM_VAL = 123 });
+ out->local_anon_func_proto_ptr = bpf_core_type_id_local(_Bool(*)(int));
+ out->local_anon_void_ptr = bpf_core_type_id_local(void *);
+ out->local_anon_arr = bpf_core_type_id_local(_Bool[47]);
+
+ out->local_struct = bpf_core_type_id_local(struct a_struct);
+ out->local_union = bpf_core_type_id_local(union a_union);
+ out->local_enum = bpf_core_type_id_local(enum an_enum);
+ out->local_int = bpf_core_type_id_local(int);
+ out->local_struct_typedef = bpf_core_type_id_local(named_struct_typedef);
+ out->local_func_proto_typedef = bpf_core_type_id_local(func_proto_typedef);
+ out->local_arr_typedef = bpf_core_type_id_local(arr_typedef);
+
+ out->targ_struct = bpf_core_type_id_kernel(struct a_struct);
+ out->targ_union = bpf_core_type_id_kernel(union a_union);
+ out->targ_enum = bpf_core_type_id_kernel(enum an_enum);
+ out->targ_int = bpf_core_type_id_kernel(int);
+ out->targ_struct_typedef = bpf_core_type_id_kernel(named_struct_typedef);
+ out->targ_func_proto_typedef = bpf_core_type_id_kernel(func_proto_typedef);
+ out->targ_arr_typedef = bpf_core_type_id_kernel(arr_typedef);
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_retro.c b/tools/testing/selftests/bpf/progs/test_core_retro.c
new file mode 100644
index 000000000000..20861ec2f674
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_retro.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct task_struct {
+ int tgid;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} exp_tgid_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} results SEC(".maps");
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_sys_enter(void *ctx)
+{
+ struct task_struct *task = (void *)bpf_get_current_task();
+ int tgid = BPF_CORE_READ(task, tgid);
+ int zero = 0;
+ int real_tgid = bpf_get_current_pid_tgid() >> 32;
+ int *exp_tgid = bpf_map_lookup_elem(&exp_tgid_map, &zero);
+
+ /* only pass through sys_enters from test process */
+ if (!exp_tgid || *exp_tgid != real_tgid)
+ return 0;
+
+ bpf_map_update_elem(&results, &zero, &tgid, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
new file mode 100644
index 000000000000..84e1f883f97b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_PATH_LEN 128
+#define MAX_FILES 7
+
+pid_t my_pid = 0;
+__u32 cnt_stat = 0;
+__u32 cnt_close = 0;
+char paths_stat[MAX_FILES][MAX_PATH_LEN] = {};
+char paths_close[MAX_FILES][MAX_PATH_LEN] = {};
+int rets_stat[MAX_FILES] = {};
+int rets_close[MAX_FILES] = {};
+
+int called_stat = 0;
+int called_close = 0;
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ __u32 cnt = cnt_stat;
+ int ret;
+
+ called_stat = 1;
+
+ if (pid != my_pid)
+ return 0;
+
+ if (cnt >= MAX_FILES)
+ return 0;
+ ret = bpf_d_path(path, paths_stat[cnt], MAX_PATH_LEN);
+
+ rets_stat[cnt] = ret;
+ cnt_stat++;
+ return 0;
+}
+
+SEC("fentry/filp_close")
+int BPF_PROG(prog_close, struct file *file, void *id)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ __u32 cnt = cnt_close;
+ int ret;
+
+ called_close = 1;
+
+ if (pid != my_pid)
+ return 0;
+
+ if (cnt >= MAX_FILES)
+ return 0;
+ ret = bpf_d_path(&file->f_path,
+ paths_close[cnt], MAX_PATH_LEN);
+
+ rets_close[cnt] = ret;
+ cnt_close++;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c
new file mode 100644
index 000000000000..01a002ade529
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 count = 0;
+
+SEC("raw_tracepoint/sys_enter")
+int test_enable_stats(void *ctx)
+{
+ count += 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_endian.c b/tools/testing/selftests/bpf/progs/test_endian.c
new file mode 100644
index 000000000000..ddb687c5d125
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_endian.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+__u16 in16 = 0;
+__u32 in32 = 0;
+__u64 in64 = 0;
+
+__u16 out16 = 0;
+__u32 out32 = 0;
+__u64 out64 = 0;
+
+__u16 const16 = 0;
+__u32 const32 = 0;
+__u64 const64 = 0;
+
+SEC("raw_tp/sys_enter")
+int sys_enter(const void *ctx)
+{
+ out16 = __builtin_bswap16(in16);
+ out32 = __builtin_bswap32(in32);
+ out64 = __builtin_bswap64(in64);
+ const16 = ___bpf_swab16(IN16);
+ const32 = ___bpf_swab32(IN32);
+ const64 = ___bpf_swab64(IN64);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index 29817a703984..b6a6eb279e54 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -57,8 +57,9 @@ struct {
SEC("raw_tracepoint/sys_enter")
int bpf_prog1(void *ctx)
{
- int max_len, max_buildid_len, usize, ksize, total_size;
+ int max_len, max_buildid_len, total_size;
struct stack_trace_t *data;
+ long usize, ksize;
void *raw_data;
__u32 key = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c
new file mode 100644
index 000000000000..6c9cbb5a3bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u64 out__bpf_link_fops = -1;
+__u64 out__bpf_link_fops1 = -1;
+__u64 out__btf_size = -1;
+__u64 out__per_cpu_start = -1;
+
+extern const void bpf_link_fops __ksym;
+extern const void __start_BTF __ksym;
+extern const void __stop_BTF __ksym;
+extern const void __per_cpu_start __ksym;
+/* non-existing symbol, weak, default to zero */
+extern const void bpf_link_fops1 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ out__bpf_link_fops = (__u64)&bpf_link_fops;
+ out__btf_size = (__u64)(&__stop_BTF - &__start_BTF);
+ out__per_cpu_start = (__u64)&__per_cpu_start;
+
+ out__bpf_link_fops1 = (__u64)&bpf_link_fops1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c
new file mode 100644
index 000000000000..bb8ea9270f29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+__u64 out__runqueues_addr = -1;
+__u64 out__bpf_prog_active_addr = -1;
+
+__u32 out__rq_cpu = -1; /* percpu struct fields */
+int out__bpf_prog_active = -1; /* percpu int */
+
+__u32 out__this_rq_cpu = -1;
+int out__this_bpf_prog_active = -1;
+
+__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */
+
+extern const struct rq runqueues __ksym; /* struct type global var. */
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ struct rq *rq;
+ int *active;
+ __u32 cpu;
+
+ out__runqueues_addr = (__u64)&runqueues;
+ out__bpf_prog_active_addr = (__u64)&bpf_prog_active;
+
+ cpu = bpf_get_smp_processor_id();
+
+ /* test bpf_per_cpu_ptr() */
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
+ if (rq)
+ out__rq_cpu = rq->cpu;
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active)
+ out__bpf_prog_active = *active;
+
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+ if (rq) /* should always be valid, but we can't spare the check. */
+ out__cpu_0_rq_cpu = rq->cpu;
+
+ /* test bpf_this_cpu_ptr */
+ rq = (struct rq *)bpf_this_cpu_ptr(&runqueues);
+ out__this_rq_cpu = rq->cpu;
+ active = (int *)bpf_this_cpu_ptr(&bpf_prog_active);
+ out__this_bpf_prog_active = *active;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c
new file mode 100644
index 000000000000..8bc8f7c637bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+extern const struct rq runqueues __ksym; /* struct type global var. */
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ struct rq *rq;
+ int *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* READ_ONCE */
+ *(volatile int *)active;
+ /* !rq has not been tested, so verifier should reject. */
+ *(volatile int *)(&rq->cpu);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 28351936a438..b9e2753f4f91 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -17,9 +17,7 @@
#include "test_iptunnel_common.h"
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
@@ -52,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
-static u32 jhash(const void *key, u32 length, u32 initval)
+static __noinline u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
const unsigned char *k = key;
@@ -88,7 +86,7 @@ static u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
-static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
b += initval;
@@ -97,7 +95,7 @@ static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
-static u32 jhash_2words(u32 a, u32 b, u32 initval)
+static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
}
@@ -200,8 +198,7 @@ struct {
__type(value, struct ctl_value);
} ctl_array SEC(".maps");
-static __u32 get_packet_hash(struct packet_description *pckt,
- bool ipv6)
+static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
{
if (ipv6)
return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
@@ -210,10 +207,10 @@ static __u32 get_packet_hash(struct packet_description *pckt,
return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
}
-static bool get_packet_dst(struct real_definition **real,
- struct packet_description *pckt,
- struct vip_meta *vip_info,
- bool is_ipv6)
+static __noinline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
{
__u32 hash = get_packet_hash(pckt, is_ipv6);
__u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
@@ -233,8 +230,8 @@ static bool get_packet_dst(struct real_definition **real,
return true;
}
-static int parse_icmpv6(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
+static __noinline int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
{
struct icmp6hdr *icmp_hdr;
struct ipv6hdr *ip6h;
@@ -255,8 +252,8 @@ static int parse_icmpv6(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
-static int parse_icmp(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
+static __noinline int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
{
struct icmphdr *icmp_hdr;
struct iphdr *iph;
@@ -280,8 +277,8 @@ static int parse_icmp(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
-static bool parse_udp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
+static __noinline bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
{
struct udphdr *udp;
udp = data + off;
@@ -299,8 +296,8 @@ static bool parse_udp(void *data, __u64 off, void *data_end,
return true;
}
-static bool parse_tcp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
+static __noinline bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
{
struct tcphdr *tcp;
@@ -321,8 +318,8 @@ static bool parse_tcp(void *data, __u64 off, void *data_end,
return true;
}
-static int process_packet(void *data, __u64 off, void *data_end,
- bool is_ipv6, struct __sk_buff *skb)
+static __noinline int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
{
void *pkt_start = (void *)(long)skb->data;
struct packet_description pckt = {};
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
new file mode 100644
index 000000000000..6077a025092c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+__u16 last_addr16_n = __bpf_htons(1);
+__u16 active_lport_n = 0;
+__u16 active_lport_h = 0;
+__u16 passive_lport_n = 0;
+__u16 passive_lport_h = 0;
+
+/* options received at passive side */
+unsigned int nr_pure_ack = 0;
+unsigned int nr_data = 0;
+unsigned int nr_syn = 0;
+unsigned int nr_fin = 0;
+
+/* Check the header received from the active side */
+static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
+{
+ union {
+ struct tcphdr th;
+ struct ipv6hdr ip6;
+ struct tcp_exprm_opt exprm_opt;
+ struct tcp_opt reg_opt;
+ __u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
+ } hdr = {};
+ __u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+ struct tcphdr *pth;
+ int ret;
+
+ hdr.reg_opt.kind = 0xB9;
+
+ /* The option is 4 bytes long instead of 2 bytes */
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
+ if (ret != -ENOSPC)
+ RET_CG_ERR(ret);
+
+ /* Test searching magic with regular kind */
+ hdr.reg_opt.len = 4;
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+ load_flags);
+ if (ret != -EINVAL)
+ RET_CG_ERR(ret);
+
+ hdr.reg_opt.len = 0;
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+ load_flags);
+ if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
+ hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
+ RET_CG_ERR(ret);
+
+ /* Test searching experimental option with invalid kind length */
+ hdr.exprm_opt.kind = TCPOPT_EXP;
+ hdr.exprm_opt.len = 5;
+ hdr.exprm_opt.magic = 0;
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != -EINVAL)
+ RET_CG_ERR(ret);
+
+ /* Test searching experimental option with 0 magic value */
+ hdr.exprm_opt.len = 4;
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != -ENOMSG)
+ RET_CG_ERR(ret);
+
+ hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != 4 || hdr.exprm_opt.len != 4 ||
+ hdr.exprm_opt.kind != TCPOPT_EXP ||
+ hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
+ RET_CG_ERR(ret);
+
+ if (!check_syn)
+ return CG_OK;
+
+ /* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
+ *
+ * Test loading from tp->saved_syn for other sk_state.
+ */
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
+ sizeof(hdr.ip6));
+ if (ret != -ENOSPC)
+ RET_CG_ERR(ret);
+
+ if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
+ hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
+ RET_CG_ERR(0);
+
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
+ if (ret < 0)
+ RET_CG_ERR(ret);
+
+ pth = (struct tcphdr *)(&hdr.ip6 + 1);
+ if (pth->dest != passive_lport_n || pth->source != active_lport_n)
+ RET_CG_ERR(0);
+
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
+ if (ret < 0)
+ RET_CG_ERR(ret);
+
+ if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
+ RET_CG_ERR(0);
+
+ return CG_OK;
+}
+
+static int check_active_syn_in(struct bpf_sock_ops *skops)
+{
+ return __check_active_hdr_in(skops, true);
+}
+
+static int check_active_hdr_in(struct bpf_sock_ops *skops)
+{
+ struct tcphdr *th;
+
+ if (__check_active_hdr_in(skops, false) == CG_ERR)
+ return CG_ERR;
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (tcp_hdrlen(th) < skops->skb_len)
+ nr_data++;
+
+ if (th->fin)
+ nr_fin++;
+
+ if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
+ nr_pure_ack++;
+
+ return CG_OK;
+}
+
+static int active_opt_len(struct bpf_sock_ops *skops)
+{
+ int err;
+
+ /* Reserve more than enough to allow the -EEXIST test in
+ * the write_active_opt().
+ */
+ err = bpf_reserve_hdr_opt(skops, 12, 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int write_active_opt(struct bpf_sock_ops *skops)
+{
+ struct tcp_exprm_opt exprm_opt = {};
+ struct tcp_opt win_scale_opt = {};
+ struct tcp_opt reg_opt = {};
+ struct tcphdr *th;
+ int err, ret;
+
+ exprm_opt.kind = TCPOPT_EXP;
+ exprm_opt.len = 4;
+ exprm_opt.magic = __bpf_htons(0xeB9F);
+
+ reg_opt.kind = 0xB9;
+ reg_opt.len = 4;
+ reg_opt.data[0] = 0xfa;
+ reg_opt.data[1] = 0xce;
+
+ win_scale_opt.kind = TCPOPT_WINDOW;
+
+ err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ /* Store the same exprm option */
+ err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+
+ err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+ if (err)
+ RET_CG_ERR(err);
+ err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+
+ /* Check the option has been written and can be searched */
+ ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
+ exprm_opt.magic != __bpf_htons(0xeB9F))
+ RET_CG_ERR(ret);
+
+ reg_opt.len = 0;
+ ret = bpf_load_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+ if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
+ reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
+ RET_CG_ERR(ret);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (th->syn) {
+ active_lport_h = skops->local_port;
+ active_lport_n = th->source;
+
+ /* Search the win scale option written by kernel
+ * in the SYN packet.
+ */
+ ret = bpf_load_hdr_opt(skops, &win_scale_opt,
+ sizeof(win_scale_opt), 0);
+ if (ret != 3 || win_scale_opt.len != 3 ||
+ win_scale_opt.kind != TCPOPT_WINDOW)
+ RET_CG_ERR(ret);
+
+ /* Write the win scale option that kernel
+ * has already written.
+ */
+ err = bpf_store_hdr_opt(skops, &win_scale_opt,
+ sizeof(win_scale_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ /* Check the SYN from bpf_sock_ops_kern->syn_skb */
+ return check_active_syn_in(skops);
+
+ /* Passive side should have cleared the write hdr cb by now */
+ if (skops->local_port == passive_lport_h)
+ RET_CG_ERR(0);
+
+ return active_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+ if (skops->local_port == passive_lport_h)
+ RET_CG_ERR(0);
+
+ return write_active_opt(skops);
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+ /* Passive side is not writing any non-standard/unknown
+ * option, so the active side should never be called.
+ */
+ if (skops->local_port == active_lport_h)
+ RET_CG_ERR(0);
+
+ return check_active_hdr_in(skops);
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+ int err;
+
+ /* No more write hdr cb */
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+
+ /* Recheck the SYN but check the tp->saved_syn this time */
+ err = check_active_syn_in(skops);
+ if (err == CG_ERR)
+ return err;
+
+ nr_syn++;
+
+ /* The ack has header option written by the active side also */
+ return check_active_hdr_in(skops);
+}
+
+SEC("sockops/misc_estab")
+int misc_estab(struct bpf_sock_ops *skops)
+{
+ int true_val = 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ passive_lport_h = skops->local_port;
+ passive_lport_n = __bpf_htons(passive_lport_h);
+ bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+ &true_val, sizeof(true_val));
+ set_hdr_cb_flags(skops, 0);
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ set_hdr_cb_flags(skops, 0);
+ break;
+ case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+ return handle_parse_hdr(skops);
+ case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+ return handle_hdr_opt_len(skops);
+ case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+ return handle_write_hdr_opt(skops);
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ return handle_passive_estab(skops);
+ }
+
+ return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index 98b9de2fafd0..ded71b3ff6b4 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -3,16 +3,8 @@
*/
#include <stddef.h>
#include <linux/bpf.h>
-#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
-/* It is a dumb bpf program such that it must have no
- * issue to be loaded since testing the verifier is
- * not the focus here.
- */
-
-int _version SEC("version") = 1;
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
@@ -20,13 +12,13 @@ struct {
__type(value, __u64);
} test_map_id SEC(".maps");
-SEC("test_obj_id_dummy")
-int test_obj_id(struct __sk_buff *skb)
+SEC("raw_tp/sys_enter")
+int test_obj_id(void *ctx)
{
__u32 key = 0;
__u64 *value;
value = bpf_map_lookup_elem(&test_map_id, &key);
- return TC_ACT_OK;
+ return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
new file mode 100644
index 000000000000..fb22de7c365d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} array_1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} array_2 SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(read_array_1)
+{
+ struct bpf_perf_event_value val;
+
+ return bpf_perf_event_read_value(&array_1, 0, &val, sizeof(val));
+}
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(read_array_2)
+{
+ struct bpf_perf_event_value val;
+
+ return bpf_perf_event_read_value(&array_2, 0, &val, sizeof(val));
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index ad59c4c9aba8..8207a2dc2f9d 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -12,8 +12,8 @@ struct {
__uint(value_size, sizeof(int));
} perf_buf_map SEC(".maps");
-SEC("kprobe/sys_nanosleep")
-int BPF_KPROBE(handle_sys_nanosleep_entry)
+SEC("tp/raw_syscalls/sys_enter")
+int handle_sys_enter(void *ctx)
{
int cpu = bpf_get_smp_processor_id();
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index e72eba4a93d2..852051064507 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -79,6 +79,24 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
return skb->ifindex * val * var;
}
+__attribute__ ((noinline))
+int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct tcphdr *tcp = NULL;
+
+ if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+ return -1;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return -1;
+ /* make modification to the packet data */
+ tcp->check++;
+ return 0;
+}
+
SEC("classifier/test_pkt_access")
int test_pkt_access(struct __sk_buff *skb)
{
@@ -117,6 +135,8 @@ int test_pkt_access(struct __sk_buff *skb)
if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex)
return TC_ACT_SHOT;
if (tcp) {
+ if (test_pkt_write_access_subprog(skb, (void *)tcp - data))
+ return TC_ACT_SHOT;
if (((void *)(tcp) + 20) > data_end || proto != 6)
return TC_ACT_SHOT;
barrier(); /* to force ordering of checks */
diff --git a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c
new file mode 100644
index 000000000000..4c63cc87b9d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u32 count = 0;
+__u32 on_cpu = 0xffffffff;
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(rename, struct task_struct *task, char *comm)
+{
+
+ count++;
+ if ((__u64) task == 0x1234ULL && (__u64) comm == 0x5678ULL) {
+ on_cpu = bpf_get_smp_processor_id();
+ return (long)task + (long)comm;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
new file mode 100644
index 000000000000..8ba9959b036b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+long value = 0;
+long flags = 0;
+
+/* outputs */
+long total = 0;
+long discarded = 0;
+long dropped = 0;
+
+long avail_data = 0;
+long ring_size = 0;
+long cons_pos = 0;
+long prod_pos = 0;
+
+/* inner state */
+long seq = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+ int zero = 0;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample) {
+ __sync_fetch_and_add(&dropped, 1);
+ return 1;
+ }
+
+ sample->pid = pid;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+ sample->value = value;
+
+ sample->seq = seq++;
+ __sync_fetch_and_add(&total, 1);
+
+ if (sample->seq & 1) {
+ /* copy from reserved sample to a new one... */
+ bpf_ringbuf_output(&ringbuf, sample, sizeof(*sample), flags);
+ /* ...and then discard reserved sample */
+ bpf_ringbuf_discard(sample, flags);
+ __sync_fetch_and_add(&discarded, 1);
+ } else {
+ bpf_ringbuf_submit(sample, flags);
+ }
+
+ avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+ ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE);
+ cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS);
+ prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
new file mode 100644
index 000000000000..edf3b6953533
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct ringbuf_map {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf1 SEC(".maps"),
+ ringbuf2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __array(values, struct ringbuf_map);
+} ringbuf_arr SEC(".maps") = {
+ .values = {
+ [0] = &ringbuf1,
+ [2] = &ringbuf2,
+ },
+};
+
+/* inputs */
+int pid = 0;
+int target_ring = 0;
+long value = 0;
+
+/* outputs */
+long total = 0;
+long dropped = 0;
+long skipped = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+ void *rb;
+ int zero = 0;
+
+ if (cur_pid != pid)
+ return 0;
+
+ rb = bpf_map_lookup_elem(&ringbuf_arr, &target_ring);
+ if (!rb) {
+ skipped += 1;
+ return 1;
+ }
+
+ sample = bpf_ringbuf_reserve(rb, sizeof(*sample), 0);
+ if (!sample) {
+ dropped += 1;
+ return 1;
+ }
+
+ sample->pid = pid;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+ sample->value = value;
+
+ sample->seq = total;
+ total += 1;
+
+ bpf_ringbuf_submit(sample, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 8f530843b4da..1ecd987005d2 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -16,6 +16,26 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+/* Pin map under /sys/fs/bpf/tc/globals/<map name> */
+#define PIN_GLOBAL_NS 2
+
+/* Must match struct bpf_elf_map layout from iproute2 */
+struct {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+ __u32 flags;
+ __u32 id;
+ __u32 pinning;
+} server_map SEC("maps") = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .size_key = sizeof(int),
+ .size_value = sizeof(__u64),
+ .max_elem = 1,
+ .pinning = PIN_GLOBAL_NS,
+};
+
int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
@@ -72,7 +92,9 @@ handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
+ const int zero = 0;
size_t tuple_len;
+ __be16 dport;
int ret;
tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
@@ -83,32 +105,11 @@ handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
if (sk)
goto assign;
- if (ipv4) {
- if (tuple->ipv4.dport != bpf_htons(4321))
- return TC_ACT_OK;
-
- ln.ipv4.daddr = bpf_htonl(0x7f000001);
- ln.ipv4.dport = bpf_htons(1234);
-
- sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4),
- BPF_F_CURRENT_NETNS, 0);
- } else {
- if (tuple->ipv6.dport != bpf_htons(4321))
- return TC_ACT_OK;
-
- /* Upper parts of daddr are already zero. */
- ln.ipv6.daddr[3] = bpf_htonl(0x1);
- ln.ipv6.dport = bpf_htons(1234);
-
- sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6),
- BPF_F_CURRENT_NETNS, 0);
- }
+ dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
+ if (dport != bpf_htons(4321))
+ return TC_ACT_OK;
- /* workaround: We can't do a single socket lookup here, because then
- * the compiler will likely spill tuple_len to the stack. This makes it
- * lose all bounds information in the verifier, which then rejects the
- * call as unsafe.
- */
+ sk = bpf_map_lookup_elem(&server_map, &zero);
if (!sk)
return TC_ACT_SHOT;
@@ -123,7 +124,9 @@ handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
+ const int zero = 0;
size_t tuple_len;
+ __be16 dport;
int ret;
tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
@@ -137,32 +140,11 @@ handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
bpf_sk_release(sk);
}
- if (ipv4) {
- if (tuple->ipv4.dport != bpf_htons(4321))
- return TC_ACT_OK;
+ dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
+ if (dport != bpf_htons(4321))
+ return TC_ACT_OK;
- ln.ipv4.daddr = bpf_htonl(0x7f000001);
- ln.ipv4.dport = bpf_htons(1234);
-
- sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4),
- BPF_F_CURRENT_NETNS, 0);
- } else {
- if (tuple->ipv6.dport != bpf_htons(4321))
- return TC_ACT_OK;
-
- /* Upper parts of daddr are already zero. */
- ln.ipv6.daddr[3] = bpf_htonl(0x1);
- ln.ipv6.dport = bpf_htons(1234);
-
- sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6),
- BPF_F_CURRENT_NETNS, 0);
- }
-
- /* workaround: We can't do a single socket lookup here, because then
- * the compiler will likely spill tuple_len to the stack. This makes it
- * lose all bounds information in the verifier, which then rejects the
- * call as unsafe.
- */
+ sk = bpf_map_lookup_elem(&server_map, &zero);
if (!sk)
return TC_ACT_SHOT;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
new file mode 100644
index 000000000000..1032b292af5b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -0,0 +1,627 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define IP4(a, b, c, d) \
+ bpf_htonl((((__u32)(a) & 0xffU) << 24) | \
+ (((__u32)(b) & 0xffU) << 16) | \
+ (((__u32)(c) & 0xffU) << 8) | \
+ (((__u32)(d) & 0xffU) << 0))
+#define IP6(aaaa, bbbb, cccc, dddd) \
+ { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
+
+/* Macros for least-significant byte and word accesses. */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define LSE_INDEX(index, size) (index)
+#else
+#define LSE_INDEX(index, size) ((size) - (index) - 1)
+#endif
+#define LSB(value, index) \
+ (((__u8 *)&(value))[LSE_INDEX((index), sizeof(value))])
+#define LSW(value, index) \
+ (((__u16 *)&(value))[LSE_INDEX((index), sizeof(value) / 2)])
+
+#define MAX_SOCKS 32
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, MAX_SOCKS);
+ __type(key, __u32);
+ __type(value, __u64);
+} redir_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} run_map SEC(".maps");
+
+enum {
+ PROG1 = 0,
+ PROG2,
+};
+
+enum {
+ SERVER_A = 0,
+ SERVER_B,
+};
+
+/* Addressable key/value constants for convenience */
+static const int KEY_PROG1 = PROG1;
+static const int KEY_PROG2 = PROG2;
+static const int PROG_DONE = 1;
+
+static const __u32 KEY_SERVER_A = SERVER_A;
+static const __u32 KEY_SERVER_B = SERVER_B;
+
+static const __u16 DST_PORT = 7007; /* Host byte order */
+static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
+static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
+
+SEC("sk_lookup/lookup_pass")
+int lookup_pass(struct bpf_sk_lookup *ctx)
+{
+ return SK_PASS;
+}
+
+SEC("sk_lookup/lookup_drop")
+int lookup_drop(struct bpf_sk_lookup *ctx)
+{
+ return SK_DROP;
+}
+
+SEC("sk_reuseport/reuse_pass")
+int reuseport_pass(struct sk_reuseport_md *ctx)
+{
+ return SK_PASS;
+}
+
+SEC("sk_reuseport/reuse_drop")
+int reuseport_drop(struct sk_reuseport_md *ctx)
+{
+ return SK_DROP;
+}
+
+/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
+SEC("sk_lookup/redir_port")
+int redir_port(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip4")
+int redir_ip4(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->family != AF_INET)
+ return SK_PASS;
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+ if (ctx->local_ip4 != DST_IP4)
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip6")
+int redir_ip6(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->family != AF_INET6)
+ return SK_PASS;
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+ if (ctx->local_ip6[0] != DST_IP6[0] ||
+ ctx->local_ip6[1] != DST_IP6[1] ||
+ ctx->local_ip6[2] != DST_IP6[2] ||
+ ctx->local_ip6[3] != DST_IP6[3])
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a")
+int select_sock_a(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a_no_reuseport")
+int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_DROP;
+
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_reuseport/select_sock_b")
+int select_sock_b(struct sk_reuseport_md *ctx)
+{
+ __u32 key = KEY_SERVER_B;
+ int err;
+
+ err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
+SEC("sk_lookup/sk_assign_eexist")
+int sk_assign_eexist(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err)
+ goto out;
+ bpf_sk_release(sk);
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err != -EEXIST) {
+ bpf_printk("sk_assign returned %d, expected %d\n",
+ err, -EEXIST);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
+SEC("sk_lookup/sk_assign_replace_flag")
+int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err)
+ goto out;
+ bpf_sk_release(sk);
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that bpf_sk_assign(sk=NULL) is accepted. */
+SEC("sk_lookup/sk_assign_null")
+int sk_assign_null(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk = NULL;
+ int err, ret;
+
+ ret = SK_DROP;
+
+ err = bpf_sk_assign(ctx, NULL, 0);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ if (ctx->sk != sk)
+ goto out;
+ err = bpf_sk_assign(ctx, NULL, 0);
+ if (err != -EEXIST)
+ goto out;
+ err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that selected sk is accessible through context. */
+SEC("sk_lookup/access_ctx_sk")
+int access_ctx_sk(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk1 = NULL, *sk2 = NULL;
+ int err, ret;
+
+ ret = SK_DROP;
+
+ /* Try accessing unassigned (NULL) ctx->sk field */
+ if (ctx->sk && ctx->sk->family != AF_INET)
+ goto out;
+
+ /* Assign a value to ctx->sk */
+ sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk1)
+ goto out;
+ err = bpf_sk_assign(ctx, sk1, 0);
+ if (err)
+ goto out;
+ if (ctx->sk != sk1)
+ goto out;
+
+ /* Access ctx->sk fields */
+ if (ctx->sk->family != AF_INET ||
+ ctx->sk->type != SOCK_STREAM ||
+ ctx->sk->state != BPF_TCP_LISTEN)
+ goto out;
+
+ /* Reset selection */
+ err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ if (ctx->sk)
+ goto out;
+
+ /* Assign another socket */
+ sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk2)
+ goto out;
+ err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ if (ctx->sk != sk2)
+ goto out;
+
+ /* Access reassigned ctx->sk fields */
+ if (ctx->sk->family != AF_INET ||
+ ctx->sk->type != SOCK_STREAM ||
+ ctx->sk->state != BPF_TCP_LISTEN)
+ goto out;
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk1)
+ bpf_sk_release(sk1);
+ if (sk2)
+ bpf_sk_release(sk2);
+ return ret;
+}
+
+/* Check narrow loads from ctx fields that support them.
+ *
+ * Narrow loads of size >= target field size from a non-zero offset
+ * are not covered because they give bogus results, that is the
+ * verifier ignores the offset.
+ */
+SEC("sk_lookup/ctx_narrow_access")
+int ctx_narrow_access(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, family;
+ bool v4;
+
+ v4 = (ctx->family == AF_INET);
+
+ /* Narrow loads from family field */
+ if (LSB(ctx->family, 0) != (v4 ? AF_INET : AF_INET6) ||
+ LSB(ctx->family, 1) != 0 || LSB(ctx->family, 2) != 0 || LSB(ctx->family, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->family, 0) != (v4 ? AF_INET : AF_INET6))
+ return SK_DROP;
+
+ /* Narrow loads from protocol field */
+ if (LSB(ctx->protocol, 0) != IPPROTO_TCP ||
+ LSB(ctx->protocol, 1) != 0 || LSB(ctx->protocol, 2) != 0 || LSB(ctx->protocol, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
+ return SK_DROP;
+
+ /* Narrow loads from remote_port field. Expect non-0 value. */
+ if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
+ LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
+ return SK_DROP;
+ if (LSW(ctx->remote_port, 0) == 0)
+ return SK_DROP;
+
+ /* Narrow loads from local_port field. Expect DST_PORT. */
+ if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) ||
+ LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) ||
+ LSB(ctx->local_port, 2) != 0 || LSB(ctx->local_port, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->local_port, 0) != DST_PORT)
+ return SK_DROP;
+
+ /* Narrow loads from IPv4 fields */
+ if (v4) {
+ /* Expect non-0.0.0.0 in remote_ip4 */
+ if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
+ LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
+ return SK_DROP;
+ if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
+ return SK_DROP;
+
+ /* Expect DST_IP4 in local_ip4 */
+ if (LSB(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xff) ||
+ LSB(ctx->local_ip4, 1) != ((DST_IP4 >> 8) & 0xff) ||
+ LSB(ctx->local_ip4, 2) != ((DST_IP4 >> 16) & 0xff) ||
+ LSB(ctx->local_ip4, 3) != ((DST_IP4 >> 24) & 0xff))
+ return SK_DROP;
+ if (LSW(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xffff) ||
+ LSW(ctx->local_ip4, 1) != ((DST_IP4 >> 16) & 0xffff))
+ return SK_DROP;
+ } else {
+ /* Expect 0.0.0.0 IPs when family != AF_INET */
+ if (LSB(ctx->remote_ip4, 0) != 0 || LSB(ctx->remote_ip4, 1) != 0 ||
+ LSB(ctx->remote_ip4, 2) != 0 || LSB(ctx->remote_ip4, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->remote_ip4, 0) != 0 || LSW(ctx->remote_ip4, 1) != 0)
+ return SK_DROP;
+
+ if (LSB(ctx->local_ip4, 0) != 0 || LSB(ctx->local_ip4, 1) != 0 ||
+ LSB(ctx->local_ip4, 2) != 0 || LSB(ctx->local_ip4, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->local_ip4, 0) != 0 || LSW(ctx->local_ip4, 1) != 0)
+ return SK_DROP;
+ }
+
+ /* Narrow loads from IPv6 fields */
+ if (!v4) {
+ /* Expect non-:: IP in remote_ip6 */
+ if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
+ LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
+ LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
+ LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
+ LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
+ LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
+ LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
+ LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
+ return SK_DROP;
+ if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
+ LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
+ LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
+ LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
+ return SK_DROP;
+ /* Expect DST_IP6 in local_ip6 */
+ if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[0], 2) != ((DST_IP6[0] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[0], 3) != ((DST_IP6[0] >> 24) & 0xff) ||
+ LSB(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[1], 2) != ((DST_IP6[1] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[1], 3) != ((DST_IP6[1] >> 24) & 0xff) ||
+ LSB(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[2], 2) != ((DST_IP6[2] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[2], 3) != ((DST_IP6[2] >> 24) & 0xff) ||
+ LSB(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[3], 2) != ((DST_IP6[3] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[3], 3) != ((DST_IP6[3] >> 24) & 0xff))
+ return SK_DROP;
+ if (LSW(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 16) & 0xffff) ||
+ LSW(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 16) & 0xffff) ||
+ LSW(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 16) & 0xffff) ||
+ LSW(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 16) & 0xffff))
+ return SK_DROP;
+ } else {
+ /* Expect :: IPs when family != AF_INET6 */
+ if (LSB(ctx->remote_ip6[0], 0) != 0 || LSB(ctx->remote_ip6[0], 1) != 0 ||
+ LSB(ctx->remote_ip6[0], 2) != 0 || LSB(ctx->remote_ip6[0], 3) != 0 ||
+ LSB(ctx->remote_ip6[1], 0) != 0 || LSB(ctx->remote_ip6[1], 1) != 0 ||
+ LSB(ctx->remote_ip6[1], 2) != 0 || LSB(ctx->remote_ip6[1], 3) != 0 ||
+ LSB(ctx->remote_ip6[2], 0) != 0 || LSB(ctx->remote_ip6[2], 1) != 0 ||
+ LSB(ctx->remote_ip6[2], 2) != 0 || LSB(ctx->remote_ip6[2], 3) != 0 ||
+ LSB(ctx->remote_ip6[3], 0) != 0 || LSB(ctx->remote_ip6[3], 1) != 0 ||
+ LSB(ctx->remote_ip6[3], 2) != 0 || LSB(ctx->remote_ip6[3], 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+ LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+ LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+ LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
+ return SK_DROP;
+
+ if (LSB(ctx->local_ip6[0], 0) != 0 || LSB(ctx->local_ip6[0], 1) != 0 ||
+ LSB(ctx->local_ip6[0], 2) != 0 || LSB(ctx->local_ip6[0], 3) != 0 ||
+ LSB(ctx->local_ip6[1], 0) != 0 || LSB(ctx->local_ip6[1], 1) != 0 ||
+ LSB(ctx->local_ip6[1], 2) != 0 || LSB(ctx->local_ip6[1], 3) != 0 ||
+ LSB(ctx->local_ip6[2], 0) != 0 || LSB(ctx->local_ip6[2], 1) != 0 ||
+ LSB(ctx->local_ip6[2], 2) != 0 || LSB(ctx->local_ip6[2], 3) != 0 ||
+ LSB(ctx->local_ip6[3], 0) != 0 || LSB(ctx->local_ip6[3], 1) != 0 ||
+ LSB(ctx->local_ip6[3], 2) != 0 || LSB(ctx->local_ip6[3], 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+ LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+ LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+ LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
+ return SK_DROP;
+ }
+
+ /* Success, redirect to KEY_SERVER_B */
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (sk) {
+ bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ }
+ return SK_PASS;
+}
+
+/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
+SEC("sk_lookup/sk_assign_esocknosupport")
+int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err != -ESOCKTNOSUPPORT) {
+ bpf_printk("sk_assign returned %d, expected %d\n",
+ err, -ESOCKTNOSUPPORT);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, pass to regular lookup */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+SEC("sk_lookup/multi_prog_pass1")
+int multi_prog_pass1(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_pass2")
+int multi_prog_pass2(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_drop1")
+int multi_prog_drop1(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_DROP;
+}
+
+SEC("sk_lookup/multi_prog_drop2")
+int multi_prog_drop2(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_DROP;
+}
+
+static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_DROP;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ if (err)
+ return SK_DROP;
+
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir1")
+int multi_prog_redir1(struct bpf_sk_lookup *ctx)
+{
+ int ret;
+
+ ret = select_server_a(ctx);
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir2")
+int multi_prog_redir2(struct bpf_sk_lookup *ctx)
+{
+ int ret;
+
+ ret = select_server_a(ctx);
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index d2b38fa6a5b0..e83d0b48d80c 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -73,6 +73,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+ bpf_printk("sk=%d\n", sk ? 1 : 0);
if (sk)
bpf_sk_release(sk);
return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
new file mode 100644
index 000000000000..bb3fbf1a29e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define TEST_COMM_LEN 16
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, u32);
+} cgroup_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("classifier/test_skb_helpers")
+int test_skb_helpers(struct __sk_buff *skb)
+{
+ struct task_struct *task;
+ char comm[TEST_COMM_LEN];
+ __u32 tpid;
+
+ task = (struct task_struct *)bpf_get_current_task();
+ bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+ bpf_probe_read_kernel_str(&comm, sizeof(comm), &task->comm);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index de03a90f78ca..374ccef704e1 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -10,16 +10,28 @@ struct s {
long long b;
} __attribute__((packed));
-int in1 = 0;
-long long in2 = 0;
+/* .data section */
+int in1 = -1;
+long long in2 = -1;
+
+/* .bss section */
char in3 = '\0';
long long in4 __attribute__((aligned(64))) = 0;
struct s in5 = {};
-long long out2 = 0;
+/* .rodata section */
+const volatile struct {
+ const int in6;
+} in = {};
+
+/* .data section */
+int out1 = -1;
+long long out2 = -1;
+
+/* .bss section */
char out3 = 0;
long long out4 = 0;
-int out1 = 0;
+int out6 = 0;
extern bool CONFIG_BPF_SYSCALL __kconfig;
extern int LINUX_KERNEL_VERSION __kconfig;
@@ -36,6 +48,7 @@ int handler(const void *ctx)
out3 = in3;
out4 = in4;
out5 = in5;
+ out6 = in.in6;
bpf_syscall = CONFIG_BPF_SYSCALL;
kern_ver = LINUX_KERNEL_VERSION;
diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
new file mode 100644
index 000000000000..45e8fc75a739
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Isovalent, Inc.
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, __u32);
+ __type(value, __u64);
+} socket_storage SEC(".maps");
+
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+ struct task_struct *task = (struct task_struct *)bpf_get_current_task();
+ int verdict = SK_PASS;
+ __u32 pid, tpid;
+ __u64 *sk_stg;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ sk_stg = bpf_sk_storage_get(&socket_storage, msg->sk, 0, BPF_SK_STORAGE_GET_F_CREATE);
+ if (!sk_stg)
+ return SK_DROP;
+ *sk_stg = pid;
+ bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+ if (pid != tpid)
+ verdict = SK_DROP;
+ bpf_sk_storage_delete(&socket_storage, (void *)msg->sk);
+ return verdict;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 9bcaa37f476a..81b57b9aaaea 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -7,19 +7,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-
-enum bpf_addr_array_idx {
- ADDR_SRV_IDX,
- ADDR_CLI_IDX,
- __NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
- EGRESS_SRV_IDX,
- EGRESS_CLI_IDX,
- INGRESS_LISTEN_IDX,
- __NR_BPF_RESULT_ARRAY_IDX,
-};
+#include "bpf_tcp_helpers.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
@@ -29,27 +17,6 @@ enum bpf_linum_array_idx {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX);
- __type(key, __u32);
- __type(value, struct sockaddr_in6);
-} addr_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
- __type(key, __u32);
- __type(value, struct bpf_sock);
-} sock_result_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
- __type(key, __u32);
- __type(value, struct bpf_tcp_sock);
-} tcp_sock_result_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
__type(key, __u32);
__type(value, __u32);
@@ -74,6 +41,17 @@ struct {
__type(value, struct bpf_spinlock_cnt);
} sk_pkt_out_cnt10 SEC(".maps");
+struct bpf_tcp_sock listen_tp = {};
+struct sockaddr_in6 srv_sa6 = {};
+struct bpf_tcp_sock cli_tp = {};
+struct bpf_tcp_sock srv_tp = {};
+struct bpf_sock listen_sk = {};
+struct bpf_sock srv_sk = {};
+struct bpf_sock cli_sk = {};
+__u64 parent_cg_id = 0;
+__u64 child_cg_id = 0;
+__u64 lsndtime = 0;
+
static bool is_loopback6(__u32 *a6)
{
return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
@@ -130,62 +108,86 @@ static void tpcpy(struct bpf_tcp_sock *dst,
dst->bytes_acked = src->bytes_acked;
}
-#define RETURN { \
+/* Always return CG_OK so that no pkt will be filtered out */
+#define CG_OK 1
+
+#define RET_LOG() ({ \
linum = __LINE__; \
- bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \
- return 1; \
-}
+ bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \
+ return CG_OK; \
+})
SEC("cgroup_skb/egress")
int egress_read_sock_fields(struct __sk_buff *skb)
{
struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
- __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
- struct sockaddr_in6 *srv_sa6, *cli_sa6;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
__u32 linum, linum_idx;
+ struct tcp_sock *ktp;
linum_idx = EGRESS_LINUM_IDX;
sk = skb->sk;
- if (!sk || sk->state == 10)
- RETURN;
+ if (!sk)
+ RET_LOG();
+ /* Not the testing egress traffic or
+ * TCP_LISTEN (10) socket will be copied at the ingress side.
+ */
+ if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+ sk->state == 10)
+ return CG_OK;
+
+ if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
+ /* Server socket */
+ sk_ret = &srv_sk;
+ tp_ret = &srv_tp;
+ } else if (sk->dst_port == srv_sa6.sin6_port) {
+ /* Client socket */
+ sk_ret = &cli_sk;
+ tp_ret = &cli_tp;
+ } else {
+ /* Not the testing egress traffic */
+ return CG_OK;
+ }
+
+ /* It must be a fullsock for cgroup_skb/egress prog */
sk = bpf_sk_fullsock(sk);
- if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
- !is_loopback6(sk->src_ip6))
- RETURN;
+ if (!sk)
+ RET_LOG();
+
+ /* Not the testing egress traffic */
+ if (sk->protocol != IPPROTO_TCP)
+ return CG_OK;
tp = bpf_tcp_sock(sk);
if (!tp)
- RETURN;
+ RET_LOG();
- srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
- cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
- if (!srv_sa6 || !cli_sa6)
- RETURN;
+ skcpy(sk_ret, sk);
+ tpcpy(tp_ret, tp);
- if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
- result_idx = EGRESS_SRV_IDX;
- else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
- result_idx = EGRESS_CLI_IDX;
- else
- RETURN;
+ if (sk_ret == &srv_sk) {
+ ktp = bpf_skc_to_tcp_sock(sk);
- sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
- tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
- if (!sk_ret || !tp_ret)
- RETURN;
+ if (!ktp)
+ RET_LOG();
- skcpy(sk_ret, sk);
- tpcpy(tp_ret, tp);
+ lsndtime = ktp->lsndtime;
+
+ child_cg_id = bpf_sk_cgroup_id(ktp);
+ if (!child_cg_id)
+ RET_LOG();
+
+ parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
+ if (!parent_cg_id)
+ RET_LOG();
- if (result_idx == EGRESS_SRV_IDX) {
/* The userspace has created it for srv sk */
- pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0);
- pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk,
+ pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
+ pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
0, 0);
} else {
pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
@@ -197,7 +199,7 @@ int egress_read_sock_fields(struct __sk_buff *skb)
}
if (!pkt_out_cnt || !pkt_out_cnt10)
- RETURN;
+ RET_LOG();
/* Even both cnt and cnt10 have lock defined in their BTF,
* intentionally one cnt takes lock while one does not
@@ -208,48 +210,44 @@ int egress_read_sock_fields(struct __sk_buff *skb)
pkt_out_cnt10->cnt += 10;
bpf_spin_unlock(&pkt_out_cnt10->lock);
- RETURN;
+ return CG_OK;
}
SEC("cgroup_skb/ingress")
int ingress_read_sock_fields(struct __sk_buff *skb)
{
- __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
- struct bpf_tcp_sock *tp, *tp_ret;
- struct bpf_sock *sk, *sk_ret;
- struct sockaddr_in6 *srv_sa6;
+ struct bpf_tcp_sock *tp;
__u32 linum, linum_idx;
+ struct bpf_sock *sk;
linum_idx = INGRESS_LINUM_IDX;
sk = skb->sk;
- if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
- RETURN;
+ if (!sk)
+ RET_LOG();
- srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
- if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
- RETURN;
+ /* Not the testing ingress traffic to the server */
+ if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+ sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
+ return CG_OK;
- if (sk->state != 10 && sk->state != 12)
- RETURN;
+ /* Only interested in TCP_LISTEN */
+ if (sk->state != 10)
+ return CG_OK;
- sk = bpf_get_listener_sock(sk);
+ /* It must be a fullsock for cgroup_skb/ingress prog */
+ sk = bpf_sk_fullsock(sk);
if (!sk)
- RETURN;
+ RET_LOG();
tp = bpf_tcp_sock(sk);
if (!tp)
- RETURN;
-
- sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
- tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
- if (!sk_ret || !tp_ret)
- RETURN;
+ RET_LOG();
- skcpy(sk_ret, sk);
- tpcpy(tp_ret, tp);
+ skcpy(&listen_sk, sk);
+ tpcpy(&listen_tp, tp);
- RETURN;
+ return CG_OK;
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
new file mode 100644
index 000000000000..02a59e220cbc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} map SEC(".maps");
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+ __u32 key = 0;
+
+ if (skops->sk)
+ bpf_map_update_elem(&map, &key, skops->sk, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 9b4d3a68a91a..1858435de7aa 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -79,14 +79,27 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
+ __uint(max_entries, 3);
__type(key, int);
__type(value, int);
} sock_skb_opts SEC(".maps");
+struct {
+ __uint(type, TEST_MAP_TYPE);
+ __uint(max_entries, 20);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} tls_sock_map SEC(".maps");
+
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
+ int *f, two = 2;
+
+ f = bpf_map_lookup_elem(&sock_skb_opts, &two);
+ if (f && *f) {
+ return *f;
+ }
return skb->len;
}
@@ -110,8 +123,6 @@ int bpf_prog2(struct __sk_buff *skb)
flags = *f;
}
- bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
- len, flags);
#ifdef SOCKMAP
return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
#else
@@ -120,6 +131,59 @@ int bpf_prog2(struct __sk_buff *skb)
}
+static inline void bpf_write_pass(struct __sk_buff *skb, int offset)
+{
+ int err = bpf_skb_pull_data(skb, 6 + offset);
+ void *data_end;
+ char *c;
+
+ if (err)
+ return;
+
+ c = (char *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+
+ if (c + 5 + offset < data_end)
+ memcpy(c + offset, "PASS", 4);
+}
+
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+ int err, *f, ret = SK_PASS;
+ const int one = 1;
+
+ f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+ if (f && *f) {
+ __u64 flags = 0;
+
+ ret = 0;
+ flags = *f;
+
+ err = bpf_skb_adjust_room(skb, -13, 0, 0);
+ if (err)
+ return SK_DROP;
+ err = bpf_skb_adjust_room(skb, 4, 0, 0);
+ if (err)
+ return SK_DROP;
+ bpf_write_pass(skb, 0);
+#ifdef SOCKMAP
+ return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
+#else
+ return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
+#endif
+ }
+ f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+ if (f && *f)
+ ret = SK_DROP;
+ err = bpf_skb_adjust_room(skb, 4, 0, 0);
+ if (err)
+ return SK_DROP;
+ bpf_write_pass(skb, 13);
+tls_out:
+ return ret;
+}
+
SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
@@ -143,8 +207,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
err = bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
- bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
- lport, bpf_ntohl(rport), err);
}
break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
@@ -160,8 +222,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
err = bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
- bpf_printk("active(%i -> %i) map ctx update err: %d\n",
- lport, bpf_ntohl(rport), err);
}
break;
default:
@@ -199,72 +259,6 @@ int bpf_prog4(struct sk_msg_md *msg)
}
SEC("sk_msg2")
-int bpf_prog5(struct sk_msg_md *msg)
-{
- int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
- int *start, *end, *start_push, *end_push, *start_pop, *pop;
- int *bytes, len1, len2 = 0, len3, len4;
- int err1 = -1, err2 = -1;
-
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes)
- err1 = bpf_msg_apply_bytes(msg, *bytes);
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes)
- err2 = bpf_msg_cork_bytes(msg, *bytes);
- len1 = (__u64)msg->data_end - (__u64)msg->data;
- start = bpf_map_lookup_elem(&sock_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_bytes, &one);
- if (start && end) {
- int err;
-
- bpf_printk("sk_msg2: pull(%i:%i)\n",
- start ? *start : 0, end ? *end : 0);
- err = bpf_msg_pull_data(msg, *start, *end, 0);
- if (err)
- bpf_printk("sk_msg2: pull_data err %i\n",
- err);
- len2 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length update %i->%i\n",
- len1, len2);
- }
-
- start_push = bpf_map_lookup_elem(&sock_bytes, &two);
- end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push) {
- int err;
-
- bpf_printk("sk_msg2: push(%i:%i)\n",
- start_push ? *start_push : 0,
- end_push ? *end_push : 0);
- err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
- if (err)
- bpf_printk("sk_msg2: push_data err %i\n", err);
- len3 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length push_update %i->%i\n",
- len2 ? len2 : len1, len3);
- }
- start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
- pop = bpf_map_lookup_elem(&sock_bytes, &five);
- if (start_pop && pop) {
- int err;
-
- bpf_printk("sk_msg2: pop(%i@%i)\n",
- start_pop, pop);
- err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
- if (err)
- bpf_printk("sk_msg2: pop_data err %i\n", err);
- len4 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length pop_data %i->%i\n",
- len1 ? len1 : 0, len4);
- }
-
- bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
- len1, err1, err2);
- return SK_PASS;
-}
-
-SEC("sk_msg3")
int bpf_prog6(struct sk_msg_md *msg)
{
int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
@@ -305,86 +299,7 @@ int bpf_prog6(struct sk_msg_md *msg)
#endif
}
-SEC("sk_msg4")
-int bpf_prog7(struct sk_msg_md *msg)
-{
- int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
- int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
- int len1, len2 = 0, len3, len4;
- int err1 = 0, err2 = 0, key = 0;
- __u64 flags = 0;
-
- int err;
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes)
- err1 = bpf_msg_apply_bytes(msg, *bytes);
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes)
- err2 = bpf_msg_cork_bytes(msg, *bytes);
- len1 = (__u64)msg->data_end - (__u64)msg->data;
-
- start = bpf_map_lookup_elem(&sock_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_bytes, &one);
- if (start && end) {
- bpf_printk("sk_msg2: pull(%i:%i)\n",
- start ? *start : 0, end ? *end : 0);
- err = bpf_msg_pull_data(msg, *start, *end, 0);
- if (err)
- bpf_printk("sk_msg2: pull_data err %i\n",
- err);
- len2 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length update %i->%i\n",
- len1, len2);
- }
-
- start_push = bpf_map_lookup_elem(&sock_bytes, &two);
- end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push) {
- bpf_printk("sk_msg4: push(%i:%i)\n",
- start_push ? *start_push : 0,
- end_push ? *end_push : 0);
- err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
- if (err)
- bpf_printk("sk_msg4: push_data err %i\n",
- err);
- len3 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg4: length push_update %i->%i\n",
- len2 ? len2 : len1, len3);
- }
-
- start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
- pop = bpf_map_lookup_elem(&sock_bytes, &five);
- if (start_pop && pop) {
- int err;
-
- bpf_printk("sk_msg4: pop(%i@%i)\n",
- start_pop, pop);
- err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
- if (err)
- bpf_printk("sk_msg4: pop_data err %i\n", err);
- len4 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg4: length pop_data %i->%i\n",
- len1 ? len1 : 0, len4);
- }
-
-
- f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
- if (f && *f) {
- key = 2;
- flags = *f;
- }
- bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
- len1, flags, err1 ? err1 : err2);
-#ifdef SOCKMAP
- err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
-#else
- err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
-#endif
- bpf_printk("sk_msg3: err %i\n", err);
- return err;
-}
-
-SEC("sk_msg5")
+SEC("sk_msg3")
int bpf_prog8(struct sk_msg_md *msg)
{
void *data_end = (void *)(long) msg->data_end;
@@ -401,7 +316,7 @@ int bpf_prog8(struct sk_msg_md *msg)
}
return SK_PASS;
}
-SEC("sk_msg6")
+SEC("sk_msg4")
int bpf_prog9(struct sk_msg_md *msg)
{
void *data_end = (void *)(long) msg->data_end;
@@ -419,7 +334,7 @@ int bpf_prog9(struct sk_msg_md *msg)
return SK_PASS;
}
-SEC("sk_msg7")
+SEC("sk_msg5")
int bpf_prog10(struct sk_msg_md *msg)
{
int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
@@ -443,7 +358,6 @@ int bpf_prog10(struct sk_msg_md *msg)
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
bpf_msg_pop_data(msg, *start_pop, *pop, 0);
- bpf_printk("return sk drop\n");
return SK_DROP;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
new file mode 100644
index 000000000000..9d0c9f28cab2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} src SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst_sock_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst_sock_hash SEC(".maps");
+
+SEC("classifier/copy_sock_map")
+int copy_sock_map(void *ctx)
+{
+ struct bpf_sock *sk;
+ bool failed = false;
+ __u32 key = 0;
+
+ sk = bpf_map_lookup_elem(&src, &key);
+ if (!sk)
+ return SK_DROP;
+
+ if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0))
+ failed = true;
+
+ if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0))
+ failed = true;
+
+ bpf_sk_release(sk);
+ return failed ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
new file mode 100644
index 000000000000..d3c5673c0218
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -0,0 +1,103 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+const char LICENSE[] SEC("license") = "GPL";
+
+__noinline int sub1(int x)
+{
+ return x + 1;
+}
+
+static __noinline int sub5(int v);
+
+__noinline int sub2(int y)
+{
+ return sub5(y + 2);
+}
+
+static __noinline int sub3(int z)
+{
+ return z + 3 + sub1(4);
+}
+
+static __noinline int sub4(int w)
+{
+ return w + sub3(5) + sub1(6);
+}
+
+/* sub5() is an identitify function, just to test weirder functions layout and
+ * call patterns
+ */
+static __noinline int sub5(int v)
+{
+ return sub1(v) - 1; /* compensates sub1()'s + 1 */
+}
+
+/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer
+ * type, so we need to accept pointer as integer and then cast it inside the
+ * function
+ */
+__noinline int get_task_tgid(uintptr_t t)
+{
+ /* this ensures that CO-RE relocs work in multi-subprogs .text */
+ return BPF_CORE_READ((struct task_struct *)(void *)t, tgid);
+}
+
+int res1 = 0;
+int res2 = 0;
+int res3 = 0;
+int res4 = 0;
+
+SEC("raw_tp/sys_enter")
+int prog1(void *ctx)
+{
+ /* perform some CO-RE relocations to ensure they work with multi-prog
+ * sections correctly
+ */
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res1 = sub1(1) + sub3(2); /* (1 + 1) + (2 + 3 + (4 + 1)) = 12 */
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res2 = sub2(3) + sub3(4); /* (3 + 2) + (4 + 3 + (4 + 1)) = 17 */
+ return 0;
+}
+
+/* prog3 has the same section name as prog1 */
+SEC("raw_tp/sys_enter")
+int prog3(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */
+ return 0;
+}
+
+/* prog4 has the same section name as prog2 */
+SEC("raw_tp/sys_exit")
+int prog4(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res4 = sub4(7) + sub1(8); /* (7 + (5 + 3 + (4 + 1)) + (6 + 1)) + (8 + 1) = 36 */
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 458b0d69133e..553a282d816a 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index b2e6f9b0894d..2b64bc563a12 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 2d0b0b82a78a..5489823c83fc 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -19,11 +19,11 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- char tcp_mem_name[] = "net/ipv4/tcp_mem";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
@@ -45,7 +45,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
unsigned long tcp_mem[3] = {0, 0, 0};
char value[MAX_VALUE_STR_LEN];
unsigned char i, off = 0;
- int ret;
+ volatile int ret;
if (ctx->write)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
new file mode 100644
index 000000000000..b985ac4e7a81
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef ctx_ptr
+# define ctx_ptr(field) (void *)(long)(field)
+#endif
+
+#define ip4_src 0xac100164 /* 172.16.1.100 */
+#define ip4_dst 0xac100264 /* 172.16.2.100 */
+
+#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+
+#ifndef v6_equal
+# define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
+ a.s6_addr32[1] == b.s6_addr32[1] && \
+ a.s6_addr32[2] == b.s6_addr32[2] && \
+ a.s6_addr32[3] == b.s6_addr32[3])
+#endif
+
+enum {
+ dev_src,
+ dev_dst,
+};
+
+struct bpf_map_def SEC("maps") ifindex_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2,
+};
+
+static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
+ __be32 addr)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct iphdr *ip4h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return false;
+
+ ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip4h + 1) > data_end)
+ return false;
+
+ return ip4h->daddr == addr;
+}
+
+static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
+ struct in6_addr addr)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct ipv6hdr *ip6h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return false;
+
+ ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip6h + 1) > data_end)
+ return false;
+
+ return v6_equal(ip6h->daddr, addr);
+}
+
+static __always_inline int get_dev_ifindex(int which)
+{
+ int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
+
+ return ifindex ? *ifindex : 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ __u32 *raw = data;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return TC_ACT_SHOT;
+
+ return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+ __u8 zero[ETH_ALEN * 2];
+ bool redirect = false;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src));
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src);
+ break;
+ }
+
+ if (!redirect)
+ return TC_ACT_OK;
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+ __u8 zero[ETH_ALEN * 2];
+ bool redirect = false;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst));
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst);
+ break;
+ }
+
+ if (!redirect)
+ return TC_ACT_OK;
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
new file mode 100644
index 000000000000..d82ed3457030
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef ctx_ptr
+# define ctx_ptr(field) (void *)(long)(field)
+#endif
+
+#define AF_INET 2
+#define AF_INET6 10
+
+static __always_inline int fill_fib_params_v4(struct __sk_buff *skb,
+ struct bpf_fib_lookup *fib_params)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct iphdr *ip4h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return -1;
+
+ ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip4h + 1) > data_end)
+ return -1;
+
+ fib_params->family = AF_INET;
+ fib_params->tos = ip4h->tos;
+ fib_params->l4_protocol = ip4h->protocol;
+ fib_params->sport = 0;
+ fib_params->dport = 0;
+ fib_params->tot_len = bpf_ntohs(ip4h->tot_len);
+ fib_params->ipv4_src = ip4h->saddr;
+ fib_params->ipv4_dst = ip4h->daddr;
+
+ return 0;
+}
+
+static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
+ struct bpf_fib_lookup *fib_params)
+{
+ struct in6_addr *src = (struct in6_addr *)fib_params->ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *)fib_params->ipv6_dst;
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct ipv6hdr *ip6h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return -1;
+
+ ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip6h + 1) > data_end)
+ return -1;
+
+ fib_params->family = AF_INET6;
+ fib_params->flowinfo = 0;
+ fib_params->l4_protocol = ip6h->nexthdr;
+ fib_params->sport = 0;
+ fib_params->dport = 0;
+ fib_params->tot_len = bpf_ntohs(ip6h->payload_len);
+ *src = ip6h->saddr;
+ *dst = ip6h->daddr;
+
+ return 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ __u32 *raw = data;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return TC_ACT_SHOT;
+
+ return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+static __always_inline int tc_redir(struct __sk_buff *skb)
+{
+ struct bpf_fib_lookup fib_params = { .ifindex = skb->ingress_ifindex };
+ __u8 zero[ETH_ALEN * 2];
+ int ret = -1;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ ret = fill_fib_params_v4(skb, &fib_params);
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ ret = fill_fib_params_v6(skb, &fib_params);
+ break;
+ }
+
+ if (ret)
+ return TC_ACT_OK;
+
+ ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), 0);
+ if (ret == BPF_FIB_LKUP_RET_NOT_FWDED || ret < 0)
+ return TC_ACT_OK;
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ if (ret == BPF_FIB_LKUP_RET_NO_NEIGH) {
+ struct bpf_redir_neigh nh_params = {};
+
+ nh_params.nh_family = fib_params.family;
+ __builtin_memcpy(&nh_params.ipv6_nh, &fib_params.ipv6_dst,
+ sizeof(nh_params.ipv6_nh));
+
+ return bpf_redirect_neigh(fib_params.ifindex, &nh_params,
+ sizeof(nh_params), 0);
+
+ } else if (ret == BPF_FIB_LKUP_RET_SUCCESS) {
+ void *data_end = ctx_ptr(skb->data_end);
+ struct ethhdr *eth = ctx_ptr(skb->data);
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+
+ return bpf_redirect(fib_params.ifindex, 0);
+ }
+
+ return TC_ACT_SHOT;
+}
+
+/* these are identical, but keep them separate for compatibility with the
+ * section names expected by test_tc_redirect.sh
+ */
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+ return tc_redir(skb);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+ return tc_redir(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
new file mode 100644
index 000000000000..fc84a7685aa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+
+enum {
+ dev_src,
+ dev_dst,
+};
+
+struct bpf_map_def SEC("maps") ifindex_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2,
+};
+
+static __always_inline int get_dev_ifindex(int which)
+{
+ int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
+
+ return ifindex ? *ifindex : 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+ return TC_ACT_SHOT;
+}
+
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
new file mode 100644
index 000000000000..678bd0fad29e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+#ifndef sizeof_field
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#endif
+
+__u8 test_kind = TCPOPT_EXP;
+__u16 test_magic = 0xeB9F;
+__u32 inherit_cb_flags = 0;
+
+struct bpf_test_option passive_synack_out = {};
+struct bpf_test_option passive_fin_out = {};
+
+struct bpf_test_option passive_estab_in = {};
+struct bpf_test_option passive_fin_in = {};
+
+struct bpf_test_option active_syn_out = {};
+struct bpf_test_option active_fin_out = {};
+
+struct bpf_test_option active_estab_in = {};
+struct bpf_test_option active_fin_in = {};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct hdr_stg);
+} hdr_stg_map SEC(".maps");
+
+static bool skops_want_cookie(const struct bpf_sock_ops *skops)
+{
+ return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
+}
+
+static bool skops_current_mss(const struct bpf_sock_ops *skops)
+{
+ return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
+}
+
+static __u8 option_total_len(__u8 flags)
+{
+ __u8 i, len = 1; /* +1 for flags */
+
+ if (!flags)
+ return 0;
+
+ /* RESEND bit does not use a byte */
+ for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
+ len += !!TEST_OPTION_FLAGS(flags, i);
+
+ if (test_kind == TCPOPT_EXP)
+ return len + TCP_BPF_EXPOPT_BASE_LEN;
+ else
+ return len + 2; /* +1 kind, +1 kind-len */
+}
+
+static void write_test_option(const struct bpf_test_option *test_opt,
+ __u8 *data)
+{
+ __u8 offset = 0;
+
+ data[offset++] = test_opt->flags;
+ if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
+ data[offset++] = test_opt->max_delack_ms;
+
+ if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
+ data[offset++] = test_opt->rand;
+}
+
+static int store_option(struct bpf_sock_ops *skops,
+ const struct bpf_test_option *test_opt)
+{
+ union {
+ struct tcp_exprm_opt exprm;
+ struct tcp_opt regular;
+ } write_opt;
+ int err;
+
+ if (test_kind == TCPOPT_EXP) {
+ write_opt.exprm.kind = TCPOPT_EXP;
+ write_opt.exprm.len = option_total_len(test_opt->flags);
+ write_opt.exprm.magic = __bpf_htons(test_magic);
+ write_opt.exprm.data32 = 0;
+ write_test_option(test_opt, write_opt.exprm.data);
+ err = bpf_store_hdr_opt(skops, &write_opt.exprm,
+ sizeof(write_opt.exprm), 0);
+ } else {
+ write_opt.regular.kind = test_kind;
+ write_opt.regular.len = option_total_len(test_opt->flags);
+ write_opt.regular.data32 = 0;
+ write_test_option(test_opt, write_opt.regular.data);
+ err = bpf_store_hdr_opt(skops, &write_opt.regular,
+ sizeof(write_opt.regular), 0);
+ }
+
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
+{
+ opt->flags = *start++;
+
+ if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
+ opt->max_delack_ms = *start++;
+
+ if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
+ opt->rand = *start++;
+
+ return 0;
+}
+
+static int load_option(struct bpf_sock_ops *skops,
+ struct bpf_test_option *test_opt, bool from_syn)
+{
+ union {
+ struct tcp_exprm_opt exprm;
+ struct tcp_opt regular;
+ } search_opt;
+ int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+
+ if (test_kind == TCPOPT_EXP) {
+ search_opt.exprm.kind = TCPOPT_EXP;
+ search_opt.exprm.len = 4;
+ search_opt.exprm.magic = __bpf_htons(test_magic);
+ search_opt.exprm.data32 = 0;
+ ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
+ sizeof(search_opt.exprm), load_flags);
+ if (ret < 0)
+ return ret;
+ return parse_test_option(test_opt, search_opt.exprm.data);
+ } else {
+ search_opt.regular.kind = test_kind;
+ search_opt.regular.len = 0;
+ search_opt.regular.data32 = 0;
+ ret = bpf_load_hdr_opt(skops, &search_opt.regular,
+ sizeof(search_opt.regular), load_flags);
+ if (ret < 0)
+ return ret;
+ return parse_test_option(test_opt, search_opt.regular.data);
+ }
+}
+
+static int synack_opt_len(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option test_opt = {};
+ __u8 optlen;
+ int err;
+
+ if (!passive_synack_out.flags)
+ return CG_OK;
+
+ err = load_option(skops, &test_opt, true);
+
+ /* bpf_test_option is not found */
+ if (err == -ENOMSG)
+ return CG_OK;
+
+ if (err)
+ RET_CG_ERR(err);
+
+ optlen = option_total_len(passive_synack_out.flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_synack_opt(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option opt;
+
+ if (!passive_synack_out.flags)
+ /* We should not even be called since no header
+ * space has been reserved.
+ */
+ RET_CG_ERR(0);
+
+ opt = passive_synack_out;
+ if (skops_want_cookie(skops))
+ SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
+
+ return store_option(skops, &opt);
+}
+
+static int syn_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 optlen;
+ int err;
+
+ if (!active_syn_out.flags)
+ return CG_OK;
+
+ optlen = option_total_len(active_syn_out.flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_syn_opt(struct bpf_sock_ops *skops)
+{
+ if (!active_syn_out.flags)
+ RET_CG_ERR(0);
+
+ return store_option(skops, &active_syn_out);
+}
+
+static int fin_opt_len(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option *opt;
+ struct hdr_stg *hdr_stg;
+ __u8 optlen;
+ int err;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->active)
+ opt = &active_fin_out;
+ else
+ opt = &passive_fin_out;
+
+ optlen = option_total_len(opt->flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_fin_opt(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option *opt;
+ struct hdr_stg *hdr_stg;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->active)
+ opt = &active_fin_out;
+ else
+ opt = &passive_fin_out;
+
+ if (!opt->flags)
+ RET_CG_ERR(0);
+
+ return store_option(skops, opt);
+}
+
+static int resend_in_ack(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg *hdr_stg;
+
+ if (!skops->sk)
+ return -1;
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ return -1;
+
+ return !!hdr_stg->resend_syn;
+}
+
+static int nodata_opt_len(struct bpf_sock_ops *skops)
+{
+ int resend;
+
+ resend = resend_in_ack(skops);
+ if (resend < 0)
+ RET_CG_ERR(0);
+
+ if (resend)
+ return syn_opt_len(skops);
+
+ return CG_OK;
+}
+
+static int write_nodata_opt(struct bpf_sock_ops *skops)
+{
+ int resend;
+
+ resend = resend_in_ack(skops);
+ if (resend < 0)
+ RET_CG_ERR(0);
+
+ if (resend)
+ return write_syn_opt(skops);
+
+ return CG_OK;
+}
+
+static int data_opt_len(struct bpf_sock_ops *skops)
+{
+ /* Same as the nodata version. Mostly to show
+ * an example usage on skops->skb_len.
+ */
+ return nodata_opt_len(skops);
+}
+
+static int write_data_opt(struct bpf_sock_ops *skops)
+{
+ return write_nodata_opt(skops);
+}
+
+static int current_mss_opt_len(struct bpf_sock_ops *skops)
+{
+ /* Reserve maximum that may be needed */
+ int err;
+
+ err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ return synack_opt_len(skops);
+
+ if (tcp_flags & TCPHDR_SYN)
+ return syn_opt_len(skops);
+
+ if (tcp_flags & TCPHDR_FIN)
+ return fin_opt_len(skops);
+
+ if (skops_current_mss(skops))
+ /* The kernel is calculating the MSS */
+ return current_mss_opt_len(skops);
+
+ if (skops->skb_len)
+ return data_opt_len(skops);
+
+ return nodata_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+ struct tcphdr *th;
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ return write_synack_opt(skops);
+
+ if (tcp_flags & TCPHDR_SYN)
+ return write_syn_opt(skops);
+
+ if (tcp_flags & TCPHDR_FIN)
+ return write_fin_opt(skops);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (skops->skb_len > tcp_hdrlen(th))
+ return write_data_opt(skops);
+
+ return write_nodata_opt(skops);
+}
+
+static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
+{
+ __u32 max_delack_us = max_delack_ms * 1000;
+
+ return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
+ &max_delack_us, sizeof(max_delack_us));
+}
+
+static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
+{
+ __u32 min_rto_us = peer_max_delack_ms * 1000;
+
+ return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
+ sizeof(min_rto_us));
+}
+
+static int handle_active_estab(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg init_stg = {
+ .active = true,
+ };
+ int err;
+
+ err = load_option(skops, &active_estab_in, false);
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+
+ init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
+ OPTION_RESEND);
+ if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
+ &init_stg,
+ BPF_SK_STORAGE_GET_F_CREATE))
+ RET_CG_ERR(0);
+
+ if (init_stg.resend_syn)
+ /* Don't clear the write_hdr cb now because
+ * the ACK may get lost and retransmit may
+ * be needed.
+ *
+ * PARSE_ALL_HDR cb flag is set to learn if this
+ * resend_syn option has received by the peer.
+ *
+ * The header option will be resent until a valid
+ * packet is received at handle_parse_hdr()
+ * and all hdr cb flags will be cleared in
+ * handle_parse_hdr().
+ */
+ set_parse_all_hdr_cb_flags(skops);
+ else if (!active_fin_out.flags)
+ /* No options will be written from now */
+ clear_hdr_cb_flags(skops);
+
+ if (active_syn_out.max_delack_ms) {
+ err = set_delack_max(skops, active_syn_out.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ if (active_estab_in.max_delack_ms) {
+ err = set_rto_min(skops, active_estab_in.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg init_stg = {};
+ struct tcphdr *th;
+ int err;
+
+ inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
+
+ err = load_option(skops, &passive_estab_in, true);
+ if (err == -ENOENT) {
+ /* saved_syn is not found. It was in syncookie mode.
+ * We have asked the active side to resend the options
+ * in ACK, so try to find the bpf_test_option from ACK now.
+ */
+ err = load_option(skops, &passive_estab_in, false);
+ init_stg.syncookie = true;
+ }
+
+ /* ENOMSG: The bpf_test_option is not found which is fine.
+ * Bail out now for all other errors.
+ */
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (th->syn) {
+ /* Fastopen */
+
+ /* Cannot clear cb_flags to stop write_hdr cb.
+ * synack is not sent yet for fast open.
+ * Even it was, the synack may need to be retransmitted.
+ *
+ * PARSE_ALL_HDR cb flag is set to learn
+ * if synack has reached the peer.
+ * All cb_flags will be cleared in handle_parse_hdr().
+ */
+ set_parse_all_hdr_cb_flags(skops);
+ init_stg.fastopen = true;
+ } else if (!passive_fin_out.flags) {
+ /* No options will be written from now */
+ clear_hdr_cb_flags(skops);
+ }
+
+ if (!skops->sk ||
+ !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
+ BPF_SK_STORAGE_GET_F_CREATE))
+ RET_CG_ERR(0);
+
+ if (passive_synack_out.max_delack_ms) {
+ err = set_delack_max(skops, passive_synack_out.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ if (passive_estab_in.max_delack_ms) {
+ err = set_rto_min(skops, passive_estab_in.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg *hdr_stg;
+ struct tcphdr *th;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->resend_syn || hdr_stg->fastopen)
+ /* The PARSE_ALL_HDR cb flag was turned on
+ * to ensure that the previously written
+ * options have reached the peer.
+ * Those previously written option includes:
+ * - Active side: resend_syn in ACK during syncookie
+ * or
+ * - Passive side: SYNACK during fastopen
+ *
+ * A valid packet has been received here after
+ * the 3WHS, so the PARSE_ALL_HDR cb flag
+ * can be cleared now.
+ */
+ clear_parse_all_hdr_cb_flags(skops);
+
+ if (hdr_stg->resend_syn && !active_fin_out.flags)
+ /* Active side resent the syn option in ACK
+ * because the server was in syncookie mode.
+ * A valid packet has been received, so
+ * clear header cb flags if there is no
+ * more option to send.
+ */
+ clear_hdr_cb_flags(skops);
+
+ if (hdr_stg->fastopen && !passive_fin_out.flags)
+ /* Passive side was in fastopen.
+ * A valid packet has been received, so
+ * the SYNACK has reached the peer.
+ * Clear header cb flags if there is no more
+ * option to send.
+ */
+ clear_hdr_cb_flags(skops);
+
+ if (th->fin) {
+ struct bpf_test_option *fin_opt;
+ int err;
+
+ if (hdr_stg->active)
+ fin_opt = &active_fin_in;
+ else
+ fin_opt = &passive_fin_in;
+
+ err = load_option(skops, fin_opt, false);
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+SEC("sockops/estab")
+int estab(struct bpf_sock_ops *skops)
+{
+ int true_val = 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+ &true_val, sizeof(true_val));
+ set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ set_hdr_cb_flags(skops, 0);
+ break;
+ case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+ return handle_parse_hdr(skops);
+ case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+ return handle_hdr_opt_len(skops);
+ case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+ return handle_write_hdr_opt(skops);
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ return handle_passive_estab(skops);
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ return handle_active_estab(skops);
+ }
+
+ return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 1f1966e86e9f..3e6912e4df3d 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -54,6 +54,7 @@ SEC("sockops")
int bpf_testcb(struct bpf_sock_ops *skops)
{
char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
+ struct bpf_sock_ops *reuse = skops;
struct tcphdr *thdr;
int good_call_rv = 0;
int bad_call_rv = 0;
@@ -62,6 +63,46 @@ int bpf_testcb(struct bpf_sock_ops *skops)
int v = 0;
int op;
+ /* Test reading fields in bpf_sock_ops using single register */
+ asm volatile (
+ "%[reuse] = *(u32 *)(%[reuse] +96)"
+ : [reuse] "+r"(reuse)
+ :);
+
+ asm volatile (
+ "%[op] = *(u32 *)(%[skops] +96)"
+ : [op] "+r"(op)
+ : [skops] "r"(skops)
+ :);
+
+ asm volatile (
+ "r9 = %[skops];\n"
+ "r8 = *(u32 *)(r9 +164);\n"
+ "*(u32 *)(r9 +164) = r8;\n"
+ :: [skops] "r"(skops)
+ : "r9", "r8");
+
+ asm volatile (
+ "r1 = %[skops];\n"
+ "r1 = *(u64 *)(r1 +184);\n"
+ "if r1 == 0 goto +1;\n"
+ "r1 = *(u32 *)(r1 +4);\n"
+ :: [skops] "r"(skops):"r1");
+
+ asm volatile (
+ "r9 = %[skops];\n"
+ "r9 = *(u64 *)(r9 +184);\n"
+ "if r9 == 0 goto +1;\n"
+ "r9 = *(u32 *)(r9 +4);\n"
+ :: [skops] "r"(skops):"r9");
+
+ asm volatile (
+ "r1 = %[skops];\n"
+ "r2 = *(u64 *)(r1 +184);\n"
+ "if r2 == 0 goto +1;\n"
+ "r2 = *(u32 *)(r2 +4);\n"
+ :: [skops] "r"(skops):"r1", "r2");
+
op = (int) skops->op;
update_event_map(op);
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext.c b/tools/testing/selftests/bpf/progs/test_trace_ext.c
new file mode 100644
index 000000000000..d19a634d0e78
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trace_ext.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 ext_called = 0;
+
+SEC("freplace/test_pkt_md_access")
+int test_pkt_md_access_new(struct __sk_buff *skb)
+{
+ ext_called = skb->len;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c
new file mode 100644
index 000000000000..52f3baf98f20
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 fentry_called = 0;
+
+SEC("fentry/test_pkt_md_access_new")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+ fentry_called = skb->len;
+ return 0;
+}
+
+__u64 fexit_called = 0;
+
+SEC("fexit/test_pkt_md_access_new")
+int BPF_PROG(fexit, struct sk_buff *skb)
+{
+ fexit_called = skb->len;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
new file mode 100644
index 000000000000..913acdffd90f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_LEN 256
+
+char buf_in1[MAX_LEN] = {};
+char buf_in2[MAX_LEN] = {};
+
+int test_pid = 0;
+bool capture = false;
+
+/* .bss */
+__u64 payload1_len1 = 0;
+__u64 payload1_len2 = 0;
+__u64 total1 = 0;
+char payload1[MAX_LEN + MAX_LEN] = {};
+
+/* .data */
+int payload2_len1 = -1;
+int payload2_len2 = -1;
+int total2 = -1;
+char payload2[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload3_len1 = -1;
+int payload3_len2 = -1;
+int total3= -1;
+char payload3[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload4_len1 = -1;
+int payload4_len2 = -1;
+int total4= -1;
+char payload4[MAX_LEN + MAX_LEN] = { 1 };
+
+SEC("raw_tp/sys_enter")
+int handler64_unsigned(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload1;
+ u64 len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload1_len1 = len;
+ }
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload1_len2 = len;
+ }
+
+ total1 = payload - (void *)payload1;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int handler64_signed(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload3;
+ long len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len >= 0) {
+ payload += len;
+ payload3_len1 = len;
+ }
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len >= 0) {
+ payload += len;
+ payload3_len2 = len;
+ }
+ total3 = payload - (void *)payload3;
+
+ return 0;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int handler32_unsigned(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload2;
+ u32 len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload2_len1 = len;
+ }
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload2_len2 = len;
+ }
+
+ total2 = payload - (void *)payload2;
+
+ return 0;
+}
+
+SEC("tp/raw_syscalls/sys_exit")
+int handler32_signed(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload4;
+ int len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len >= 0) {
+ payload += len;
+ payload4_len1 = len;
+ }
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len >= 0) {
+ payload += len;
+ payload4_len2 = len;
+ }
+ total4 = payload - (void *)payload4;
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_exit_getpid")
+int handler_exit(void *regs)
+{
+ long bla;
+
+ if (bpf_probe_read_kernel(&bla, sizeof(bla), 0))
+ return 1;
+ else
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 5611b564d3b1..e9dfa0313d1b 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep")
int handle__tp(struct trace_event_raw_sys_enter *args)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (args->id != __NR_nanosleep)
return 0;
ts = (void *)args->args[0];
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
tp_called = true;
@@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter")
int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (id != __NR_nanosleep)
return 0;
ts = (void *)PT_REGS_PARM1_CORE(regs);
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
raw_tp_called = true;
@@ -51,32 +55,34 @@ SEC("tp_btf/sys_enter")
int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (id != __NR_nanosleep)
return 0;
ts = (void *)PT_REGS_PARM1_CORE(regs);
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
tp_btf_called = true;
return 0;
}
-SEC("kprobe/hrtimer_nanosleep")
-int BPF_KPROBE(handle__kprobe,
- ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("kprobe/hrtimer_start_range_ns")
+int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+ const enum hrtimer_mode mode)
{
- if (rqtp == MY_TV_NSEC)
+ if (tim == MY_TV_NSEC)
kprobe_called = true;
return 0;
}
-SEC("fentry/hrtimer_nanosleep")
-int BPF_PROG(handle__fentry,
- ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("fentry/hrtimer_start_range_ns")
+int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+ const enum hrtimer_mode mode)
{
- if (rqtp == MY_TV_NSEC)
+ if (tim == MY_TV_NSEC)
fentry_called = true;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
new file mode 100644
index 000000000000..3d66599eee2e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_adjust_tail_grow")
+int _xdp_adjust_tail_grow(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ unsigned int data_len;
+ int offset = 0;
+
+ /* Data length determine test case */
+ data_len = data_end - data;
+
+ if (data_len == 54) { /* sizeof(pkt_v4) */
+ offset = 4096; /* test too large offset */
+ } else if (data_len == 74) { /* sizeof(pkt_v6) */
+ offset = 40;
+ } else if (data_len == 64) {
+ offset = 128;
+ } else if (data_len == 128) {
+ offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+ } else {
+ return XDP_ABORTED; /* No matching test */
+ }
+
+ if (bpf_xdp_adjust_tail(xdp, offset))
+ return XDP_DROP;
+ return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index b7fc85769bdc..22065a9cfb25 100644
--- a/tools/testing/selftests/bpf/progs/test_adjust_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018 Facebook
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -11,15 +11,15 @@
int _version SEC("version") = 1;
-SEC("xdp_adjust_tail")
-int _xdp_adjust_tail(struct xdp_md *xdp)
+SEC("xdp_adjust_tail_shrink")
+int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
int offset = 0;
- if (data_end - data == 54)
- offset = 256;
+ if (data_end - data == 54) /* sizeof(pkt_v4) */
+ offset = 256; /* shrink too much */
else
offset = 20;
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
new file mode 100644
index 000000000000..b360ba2bd441
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* fails to load without expected_attach_type = BPF_XDP_DEVMAP
+ * because of access to egress_ifindex
+ */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_dm_log")
+int xdpdm_devlog(struct xdp_md *ctx)
+{
+ char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ unsigned int len = data_end - data;
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
new file mode 100644
index 000000000000..eb93ea95d1d8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+SEC("xdp/handler")
+int xdp_handler(struct xdp_md *xdp)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 8beecec166d9..3a67921f62b5 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -16,7 +16,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
@@ -49,7 +49,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
-static __attribute__ ((noinline))
+static __noinline
u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
@@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
-__attribute__ ((noinline))
+__noinline
u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
@@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
-__attribute__ ((noinline))
+__noinline
u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
@@ -213,7 +213,7 @@ struct eth_hdr {
unsigned short eth_proto;
};
-static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp)
{
__u64 off = sizeof(struct eth_hdr);
if (is_ipv6) {
@@ -797,8 +797,8 @@ out:
return XDP_DROP;
}
-__attribute__ ((section("xdp-test"), used))
-int balancer_ingress(struct xdp_md *ctx)
+SEC("xdp-test-v4")
+int balancer_ingress_v4(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
@@ -812,11 +812,27 @@ int balancer_ingress(struct xdp_md *ctx)
eth_proto = bpf_ntohs(eth->eth_proto);
if (eth_proto == ETH_P_IP)
return process_packet(data, nh_off, data_end, 0, ctx);
- else if (eth_proto == ETH_P_IPV6)
+ else
+ return XDP_DROP;
+}
+
+SEC("xdp-test-v6")
+int balancer_ingress_v6(struct xdp_md *ctx)
+{
+ void *data = (void *)(long)ctx->data;
+ void *data_end = (void *)(long)ctx->data_end;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+ eth_proto = bpf_ntohs(eth->eth_proto);
+ if (eth_proto == ETH_P_IPV6)
return process_packet(data, nh_off, data_end, 1, ctx);
else
return XDP_DROP;
}
-char _license[] __attribute__ ((section("license"), used)) = "GPL";
-int _version __attribute__ ((section("version"), used)) = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
new file mode 100644
index 000000000000..59ee4f182ff8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO 1
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&cpu_map, 1, 0);
+}
+
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+ if (ctx->ingress_ifindex == IFINDEX_LO)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
new file mode 100644
index 000000000000..0ac086497722
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&dm_ports, 1, 0);
+}
+
+/* invalid program on DEVMAP entry;
+ * SEC name means expected attach type not set
+ */
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap/map_prog")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+ char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ unsigned int len = data_end - data;
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+ return XDP_PASS;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
new file mode 100644
index 000000000000..8ca7f399b670
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020, Oracle and/or its affiliates.
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int trace_printk_ret = 0;
+int trace_printk_ran = 0;
+
+SEC("tp/raw_syscalls/sys_enter")
+int sys_enter(void *ctx)
+{
+ static const char fmt[] = "testing,testing %d\n";
+
+ trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
+ ++trace_printk_ran);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
new file mode 100644
index 000000000000..9a4d09590b3d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <asm/unistd.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+long hits = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bench_trigger_tp(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
+{
+ if (id == __NR_getpgid)
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("kprobe/__x64_sys_getpgid")
+int bench_trigger_kprobe(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_trigger_fentry(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fentry.s/__x64_sys_getpgid")
+int bench_trigger_fentry_sleep(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fmod_ret/__x64_sys_getpgid")
+int bench_trigger_fmodret(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return -22;
+}
diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c
new file mode 100644
index 000000000000..165e3c2dd9a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/udp_limit.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <sys/socket.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int invocations = 0, in_use = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_map SEC(".maps");
+
+SEC("cgroup/sock_create")
+int sock(struct bpf_sock *ctx)
+{
+ int *sk_storage;
+ __u32 key;
+
+ if (ctx->type != SOCK_DGRAM)
+ return 1;
+
+ sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!sk_storage)
+ return 0;
+ *sk_storage = 0xdeadbeef;
+
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (in_use > 0) {
+ /* BPF_CGROUP_INET_SOCK_RELEASE is _not_ called
+ * when we return an error from the BPF
+ * program!
+ */
+ return 0;
+ }
+
+ __sync_fetch_and_add(&in_use, 1);
+ return 1;
+}
+
+SEC("cgroup/sock_release")
+int sock_release(struct bpf_sock *ctx)
+{
+ int *sk_storage;
+ __u32 key;
+
+ if (ctx->type != SOCK_DGRAM)
+ return 1;
+
+ sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0, 0);
+ if (!sk_storage || *sk_storage != 0xdeadbeef)
+ return 0;
+
+ __sync_fetch_and_add(&invocations, 1);
+ __sync_fetch_and_add(&in_use, -1);
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
index a53ed58528d6..bfff82be3fc1 100755
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -34,7 +34,7 @@ serverPort = int(sys.argv[1])
# create active socket
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
try:
- sock.connect(('localhost', serverPort))
+ sock.connect(('::1', serverPort))
except socket.error as e:
sys.exit(1)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
index 0ca60d193bed..42ab8882f00f 100755
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -38,7 +38,7 @@ serverSocket = None
# create passive socket
serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-try: serverSocket.bind(('localhost', 0))
+try: serverSocket.bind(('::1', 0))
except socket.error as msg:
print('bind fails: ' + str(msg))
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index ac349a5cea7e..2db3c60e1e61 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -85,6 +85,23 @@ make_with_tmpdir() {
echo
}
+make_doc_and_clean() {
+ echo -e "\$PWD: $PWD"
+ echo -e "command: make -s $* doc >/dev/null"
+ RST2MAN_OPTS="--exit-status=1" make $J -s $* doc
+ if [ $? -ne 0 ] ; then
+ ERROR=1
+ printf "FAILURE: Errors or warnings when building documentation\n"
+ fi
+ (
+ if [ $# -ge 1 ] ; then
+ cd ${@: -1}
+ fi
+ make -s doc-clean
+ )
+ echo
+}
+
echo "Trying to build bpftool"
echo -e "... through kbuild\n"
@@ -145,3 +162,7 @@ make_and_clean
make_with_tmpdir OUTPUT
make_with_tmpdir O
+
+echo -e "Checking documentation build\n"
+# From tools/bpf/bpftool
+make_doc_and_clean
diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
new file mode 100755
index 000000000000..1bf81b49457a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME=bpftool_metadata
+BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_DIR=$BPF_FS/test_$TESTNAME
+
+_cleanup()
+{
+ set +e
+ rm -rf $BPF_DIR 2> /dev/null
+}
+
+cleanup_skip()
+{
+ echo "selftests: $TESTNAME [SKIP]"
+ _cleanup
+
+ exit $ksft_skip
+}
+
+cleanup()
+{
+ if [ "$?" = 0 ]; then
+ echo "selftests: $TESTNAME [PASS]"
+ else
+ echo "selftests: $TESTNAME [FAILED]"
+ fi
+ _cleanup
+}
+
+if [ $(id -u) -ne 0 ]; then
+ echo "selftests: $TESTNAME [SKIP] Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ -z "$BPF_FS" ]; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
+ exit $ksft_skip
+fi
+
+if ! bpftool version > /dev/null 2>&1; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
+ exit $ksft_skip
+fi
+
+set -e
+
+trap cleanup_skip EXIT
+
+mkdir $BPF_DIR
+
+trap cleanup EXIT
+
+bpftool prog load metadata_unused.o $BPF_DIR/unused
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/unused
+
+bpftool prog load metadata_used.o $BPF_DIR/used
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/used
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 655729004391..d946252a25bb 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -74,22 +74,7 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to setup cgroup environment\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (cgroup_fd < 0) {
- printf("Failed to create test cgroup\n");
- goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
- }
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
/* Attach the bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
index ed253f252cd0..ec53b1ef90d2 100644
--- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
+++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
@@ -156,4 +156,5 @@ cleanup:
bpf_object__close(obj);
}
}
+ return 0;
}
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index d850fb9076b5..804dddd97d4c 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -33,21 +33,10 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to load DEV_CGROUP program\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
if (cgroup_fd < 0) {
printf("Failed to create test cgroup\n");
- goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
+ goto out;
}
/* Attach bpf program */
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 9df0d2ac45f8..4f6444bcd53f 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -10,7 +10,13 @@ if [ "$(id -u)" != "0" ]; then
exit $ksft_skip
fi
-SRC_TREE=../../../../
+if [ "$building_out_of_srctree" ]; then
+ # We are in linux-build/kselftest/bpf
+ OUTPUT=../../
+else
+ # We are in linux/tools/testing/selftests/bpf
+ OUTPUT=../../../../
+fi
test_run()
{
@@ -19,8 +25,8 @@ test_run()
echo "[ JIT enabled:$1 hardened:$2 ]"
dmesg -C
- if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
- insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+ if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
+ insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null
if [ $? -ne 0 ]; then
rc=1
fi
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 785eabf2a593..5620919fde9e 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -140,7 +140,7 @@ ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -INT $!
+kill -TERM $!
if [[ $(< $TMP_FILE) != "foobar" ]]; then
exit 1
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index c6766b2cff85..0d92ebcb335d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -789,19 +789,19 @@ static void test_sockmap(unsigned int tasks, void *data)
}
err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER);
- if (err) {
+ if (!err) {
printf("Failed empty parser prog detach\n");
goto out_sockmap;
}
err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT);
- if (err) {
+ if (!err) {
printf("Failed empty verdict prog detach\n");
goto out_sockmap;
}
err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT);
- if (err) {
+ if (!err) {
printf("Failed empty msg verdict prog detach\n");
goto out_sockmap;
}
@@ -1090,19 +1090,19 @@ static void test_sockmap(unsigned int tasks, void *data)
assert(status == 0);
}
- err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE);
+ err = bpf_prog_detach2(parse_prog, map_fd_rx, __MAX_BPF_ATTACH_TYPE);
if (!err) {
printf("Detached an invalid prog type.\n");
goto out_sockmap;
}
- err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
+ err = bpf_prog_detach2(parse_prog, map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
if (err) {
printf("Failed parser prog detach\n");
goto out_sockmap;
}
- err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
+ err = bpf_prog_detach2(verdict_prog, map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
if (err) {
printf("Failed parser prog detach\n");
goto out_sockmap;
@@ -1274,6 +1274,8 @@ static void __run_parallel(unsigned int tasks,
pid_t pid[tasks];
int i;
+ fflush(stdout);
+
for (i = 0; i < tasks; i++) {
pid[i] = fork();
if (pid[i] == 0) {
@@ -1394,23 +1396,25 @@ static void test_map_rdonly(void)
key = 1;
value = 1234;
- /* Insert key=1 element. */
+ /* Try to insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
errno == EPERM);
- /* Check that key=2 is not found. */
+ /* Check that key=1 is not found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+
+ close(fd);
}
-static void test_map_wronly(void)
+static void test_map_wronly_hash(void)
{
int fd, key = 0, value = 0;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE, map_flags | BPF_F_WRONLY);
if (fd < 0) {
- printf("Failed to create map for read only test '%s'!\n",
+ printf("Failed to create map for write only test '%s'!\n",
strerror(errno));
exit(1);
}
@@ -1420,9 +1424,49 @@ static void test_map_wronly(void)
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
- /* Check that key=2 is not found. */
+ /* Check that reading elements and keys from the map is not allowed. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+
+ close(fd);
+}
+
+static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
+{
+ int fd, value = 0;
+
+ assert(map_type == BPF_MAP_TYPE_QUEUE ||
+ map_type == BPF_MAP_TYPE_STACK);
+ fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE,
+ map_flags | BPF_F_WRONLY);
+ /* Stack/Queue maps do not support BPF_F_NO_PREALLOC */
+ if (map_flags & BPF_F_NO_PREALLOC) {
+ assert(fd < 0 && errno == EINVAL);
+ return;
+ }
+ if (fd < 0) {
+ printf("Failed to create map '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ value = 1234;
+ assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
+
+ /* Peek element should fail */
+ assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+
+ /* Pop element should fail */
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+ errno == EPERM);
+
+ close(fd);
+}
+
+static void test_map_wronly(void)
+{
+ test_map_wronly_hash();
+ test_map_wronly_stack_or_queue(BPF_MAP_TYPE_STACK);
+ test_map_wronly_stack_or_queue(BPF_MAP_TYPE_QUEUE);
}
static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
index c1da5404454a..a7b9a69f4fd5 100644
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -58,22 +58,9 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to load bpf program\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (cgroup_fd < 0) {
- printf("Failed to create test cgroup\n");
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (cgroup_fd < 0)
goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
- }
/* Attach bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
@@ -82,9 +69,9 @@ int main(int argc, char **argv)
}
if (system("which ping6 &>/dev/null") == 0)
- assert(!system("ping6 localhost -c 10000 -f -q > /dev/null"));
+ assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
else
- assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null"));
+ assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
&prog_cnt)) {
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 8294ae3ffb3c..43c9cda199b8 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -318,6 +318,9 @@ class DebugfsDir:
continue
if os.path.isfile(p):
+ # We need to init trap_flow_action_cookie before read it
+ if f == "trap_flow_action_cookie":
+ cmd('echo deadbeef > %s/%s' % (path, f))
_, out = cmd('cat %s/%s' % (path, f))
dfs[f] = out.strip()
elif os.path.isdir(p):
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index b521e0a512b6..22943b58d752 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -12,6 +12,9 @@
#include <string.h>
#include <execinfo.h> /* backtrace */
+#define EXIT_NO_TEST 2
+#define EXIT_ERR_SETUP_INFRA 3
+
/* defined in test_progs.h */
struct test_env env = {};
@@ -111,13 +114,31 @@ static void reset_affinity() {
if (err < 0) {
stdio_restore();
fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
- exit(-1);
+ exit(EXIT_ERR_SETUP_INFRA);
}
err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
if (err < 0) {
stdio_restore();
fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
- exit(-1);
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+}
+
+static void save_netns(void)
+{
+ env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (env.saved_netns_fd == -1) {
+ perror("open(/proc/self/ns/net)");
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+}
+
+static void restore_netns(void)
+{
+ if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
+ stdio_restore();
+ perror("setns(CLONE_NEWNS)");
+ exit(EXIT_ERR_SETUP_INFRA);
}
}
@@ -138,8 +159,6 @@ void test__end_subtest()
test->test_num, test->subtest_num,
test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
- reset_affinity();
-
free(test->subtest_name);
test->subtest_name = NULL;
}
@@ -222,23 +241,6 @@ int test__join_cgroup(const char *path)
return fd;
}
-struct ipv4_packet pkt_v4 = {
- .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
- .iph.ihl = 5,
- .iph.protocol = IPPROTO_TCP,
- .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
- .tcp.urg_ptr = 123,
- .tcp.doff = 5,
-};
-
-struct ipv6_packet pkt_v6 = {
- .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
- .iph.nexthdr = IPPROTO_TCP,
- .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
- .tcp.urg_ptr = 123,
- .tcp.doff = 5,
-};
-
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
{
struct bpf_map *map;
@@ -351,25 +353,13 @@ int extract_build_id(char *build_id, size_t size)
len = size;
memcpy(build_id, line, len);
build_id[len] = '\0';
+ free(line);
return 0;
err:
fclose(fp);
return -1;
}
-void *spin_lock_thread(void *arg)
-{
- __u32 duration, retval;
- int err, prog_fd = *(u32 *) arg;
-
- err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
- pthread_exit(arg);
-}
-
/* extern declarations for test funcs */
#define DEFINE_TEST(name) extern void test_##name(void);
#include <prog_tests/tests.h>
@@ -395,6 +385,8 @@ enum ARG_KEYS {
ARG_TEST_NAME_BLACKLIST = 'b',
ARG_VERIFIER_STATS = 's',
ARG_VERBOSE = 'v',
+ ARG_GET_TEST_CNT = 'c',
+ ARG_LIST_TEST_NAMES = 'l',
};
static const struct argp_option opts[] = {
@@ -408,6 +400,10 @@ static const struct argp_option opts[] = {
"Output verifier statistics", },
{ "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL,
"Verbose output (use -vv or -vvv for progressively verbose output)" },
+ { "count", ARG_GET_TEST_CNT, NULL, 0,
+ "Get number of selected top-level tests " },
+ { "list", ARG_LIST_TEST_NAMES, NULL, 0,
+ "List test names that would run (without running them) " },
{},
};
@@ -420,6 +416,18 @@ static int libbpf_print_fn(enum libbpf_print_level level,
return 0;
}
+static void free_str_set(const struct str_set *set)
+{
+ int i;
+
+ if (!set)
+ return;
+
+ for (i = 0; i < set->cnt; i++)
+ free((void *)set->strs[i]);
+ free(set->strs);
+}
+
static int parse_str_list(const char *s, struct str_set *set)
{
char *input, *state = NULL, *next, **tmp, **strs = NULL;
@@ -455,67 +463,6 @@ err:
return -ENOMEM;
}
-int parse_num_list(const char *s, struct test_selector *sel)
-{
- int i, set_len = 0, new_len, num, start = 0, end = -1;
- bool *set = NULL, *tmp, parsing_end = false;
- char *next;
-
- while (s[0]) {
- errno = 0;
- num = strtol(s, &next, 10);
- if (errno)
- return -errno;
-
- if (parsing_end)
- end = num;
- else
- start = num;
-
- if (!parsing_end && *next == '-') {
- s = next + 1;
- parsing_end = true;
- continue;
- } else if (*next == ',') {
- parsing_end = false;
- s = next + 1;
- end = num;
- } else if (*next == '\0') {
- parsing_end = false;
- s = next;
- end = num;
- } else {
- return -EINVAL;
- }
-
- if (start > end)
- return -EINVAL;
-
- if (end + 1 > set_len) {
- new_len = end + 1;
- tmp = realloc(set, new_len);
- if (!tmp) {
- free(set);
- return -ENOMEM;
- }
- for (i = set_len; i < start; i++)
- tmp[i] = false;
- set = tmp;
- set_len = new_len;
- }
- for (i = start; i <= end; i++)
- set[i] = true;
- }
-
- if (!set)
- return -EINVAL;
-
- sel->num_set = set;
- sel->num_set_len = set_len;
-
- return 0;
-}
-
extern int extra_prog_load_log_flags;
static error_t parse_arg(int key, char *arg, struct argp_state *state)
@@ -529,13 +476,15 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
if (subtest_str) {
*subtest_str = '\0';
if (parse_num_list(subtest_str + 1,
- &env->subtest_selector)) {
+ &env->subtest_selector.num_set,
+ &env->subtest_selector.num_set_len)) {
fprintf(stderr,
"Failed to parse subtest numbers.\n");
return -EINVAL;
}
}
- if (parse_num_list(arg, &env->test_selector)) {
+ if (parse_num_list(arg, &env->test_selector.num_set,
+ &env->test_selector.num_set_len)) {
fprintf(stderr, "Failed to parse test numbers.\n");
return -EINVAL;
}
@@ -587,6 +536,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
}
break;
+ case ARG_GET_TEST_CNT:
+ env->get_test_cnt = true;
+ break;
+ case ARG_LIST_TEST_NAMES:
+ env->list_test_names = true;
+ break;
case ARGP_KEY_ARG:
argp_usage(state);
break;
@@ -663,7 +618,9 @@ int cd_flavor_subdir(const char *exec_name)
if (!flavor)
return 0;
flavor++;
- fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
+ if (env.verbosity > VERBOSE_NONE)
+ fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
+
return chdir(flavor);
}
@@ -719,6 +676,7 @@ int main(int argc, char **argv)
return -1;
}
+ save_netns();
stdio_hijack();
for (i = 0; i < prog_test_cnt; i++) {
struct prog_test_def *test = &prog_test_defs[i];
@@ -730,6 +688,17 @@ int main(int argc, char **argv)
test->test_num, test->test_name))
continue;
+ if (env.get_test_cnt) {
+ env.succ_cnt++;
+ continue;
+ }
+
+ if (env.list_test_names) {
+ fprintf(env.stdout, "%s\n", test->test_name);
+ env.succ_cnt++;
+ continue;
+ }
+
test->run_test();
/* ensure last sub-test is finalized properly */
if (test->subtest_name)
@@ -749,19 +718,34 @@ int main(int argc, char **argv)
test->error_cnt ? "FAIL" : "OK");
reset_affinity();
+ restore_netns();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
}
stdio_restore();
+
+ if (env.get_test_cnt) {
+ printf("%d\n", env.succ_cnt);
+ goto out;
+ }
+
+ if (env.list_test_names)
+ goto out;
+
fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
- free(env.test_selector.blacklist.strs);
- free(env.test_selector.whitelist.strs);
+out:
+ free_str_set(&env.test_selector.blacklist);
+ free_str_set(&env.test_selector.whitelist);
free(env.test_selector.num_set);
- free(env.subtest_selector.blacklist.strs);
- free(env.subtest_selector.whitelist.strs);
+ free_str_set(&env.subtest_selector.blacklist);
+ free_str_set(&env.subtest_selector.whitelist);
free(env.subtest_selector.num_set);
+ close(env.saved_netns_fd);
+
+ if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
+ return EXIT_NO_TEST;
return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f4aff6b8284b..238f5f61189e 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -37,6 +37,7 @@ typedef __u16 __sum16;
#include "bpf_util.h"
#include <bpf/bpf_endian.h>
#include "trace_helpers.h"
+#include "testing_helpers.h"
#include "flow_dissector_load.h"
enum verbosity {
@@ -65,6 +66,8 @@ struct test_env {
enum verbosity verbosity;
bool jit_enabled;
+ bool get_test_cnt;
+ bool list_test_names;
struct prog_test_def *test;
FILE *stdout;
@@ -77,6 +80,8 @@ struct test_env {
int sub_succ_cnt; /* successful sub-tests */
int fail_cnt; /* total failed tests + sub-tests */
int skip_cnt; /* skipped tests */
+
+ int saved_netns_fd;
};
extern struct test_env env;
@@ -87,23 +92,12 @@ extern void test__skip(void);
extern void test__fail(void);
extern int test__join_cgroup(const char *path);
-#define MAGIC_BYTES 123
-
-/* ipv4 test vector */
-struct ipv4_packet {
- struct ethhdr eth;
- struct iphdr iph;
- struct tcphdr tcp;
-} __packed;
-extern struct ipv4_packet pkt_v4;
-
-/* ipv6 test vector */
-struct ipv6_packet {
- struct ethhdr eth;
- struct ipv6hdr iph;
- struct tcphdr tcp;
-} __packed;
-extern struct ipv6_packet pkt_v6;
+#define PRINT_FAIL(format...) \
+ ({ \
+ test__fail(); \
+ fprintf(stdout, "%s:FAIL:%d ", __func__, __LINE__); \
+ fprintf(stdout, ##format); \
+ })
#define _CHECK(condition, tag, duration, format...) ({ \
int __ret = !!(condition); \
@@ -136,20 +130,83 @@ extern struct ipv6_packet pkt_v6;
#define CHECK_ATTR(condition, tag, format...) \
_CHECK(condition, tag, tattr.duration, format)
-#define MAGIC_VAL 0x1234
-#define NUM_ITER 100000
-#define VIP_NUM 5
+#define ASSERT_EQ(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act == ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld != expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_STREQ(actual, expected, name) ({ \
+ static int duration = 0; \
+ const char *___act = actual; \
+ const char *___exp = expected; \
+ bool ___ok = strcmp(___act, ___exp) == 0; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual '%s' != expected '%s'\n", \
+ (name), ___act, ___exp); \
+ ___ok; \
+})
+
+#define ASSERT_OK(res, name) ({ \
+ static int duration = 0; \
+ long long ___res = (res); \
+ bool ___ok = ___res == 0; \
+ CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \
+ ___ok; \
+})
+
+#define ASSERT_ERR(res, name) ({ \
+ static int duration = 0; \
+ long long ___res = (res); \
+ bool ___ok = ___res < 0; \
+ CHECK(!___ok, (name), "unexpected success: %lld\n", ___res); \
+ ___ok; \
+})
+
+#define ASSERT_NULL(ptr, name) ({ \
+ static int duration = 0; \
+ const void *___res = (ptr); \
+ bool ___ok = !___res; \
+ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
+ ___ok; \
+})
+
+#define ASSERT_OK_PTR(ptr, name) ({ \
+ static int duration = 0; \
+ const void *___res = (ptr); \
+ bool ___ok = !IS_ERR_OR_NULL(___res); \
+ CHECK(!___ok, (name), \
+ "unexpected error: %ld\n", PTR_ERR(___res)); \
+ ___ok; \
+})
+
+#define ASSERT_ERR_PTR(ptr, name) ({ \
+ static int duration = 0; \
+ const void *___res = (ptr); \
+ bool ___ok = IS_ERR(___res) \
+ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
+ ___ok; \
+})
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
+static inline void *u64_to_ptr(__u64 ptr)
+{
+ return (void *) (unsigned long) ptr;
+}
+
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
int extract_build_id(char *build_id, size_t size);
-void *spin_lock_thread(void *arg);
#ifdef __x86_64__
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 356351c0ac28..4a64306728ab 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -160,16 +160,10 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CGROUP_PATH);
+ cgfd = cgroup_setup_and_join(CGROUP_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CGROUP_PATH))
- goto err;
-
if (send_packet(argv[1]))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 52bf14955797..9613f7538840 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -464,16 +464,10 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 61fd95b89af8..b8c72c1d9cf7 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -677,7 +677,7 @@ static int bind4_prog_load(const struct sock_addr_test *test)
uint8_t u4_addr8[4];
uint16_t u4_addr16[2];
uint32_t u4_addr32;
- } ip4;
+ } ip4, port;
struct sockaddr_in addr4_rw;
if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
@@ -685,6 +685,8 @@ static int bind4_prog_load(const struct sock_addr_test *test)
return -1;
}
+ port.u4_addr32 = htons(SERV4_PORT);
+
if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
(struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
return -1;
@@ -696,49 +698,65 @@ static int bind4_prog_load(const struct sock_addr_test *test)
/* if (sk.family == AF_INET && */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32),
/* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, type)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
BPF_JMP_A(1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28),
/* 1st_byte_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26),
/* 2nd_byte_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4) + 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24),
/* 3rd_byte_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4) + 2),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22),
/* 4th_byte_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4) + 3),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20),
/* 1st_half_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18),
/* 2nd_half_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4) + 2),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16),
- /* whole_user_ip4 == expected) { */
+ /* whole_user_ip4 == expected && */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4)),
BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12),
+
+ /* 1st_byte_of_user_port == expected && */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10),
+
+ /* 1st_half_of_user_port == expected && */
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8),
+
+ /* user_port == expected) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_port)),
+ BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */
BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
/* user_ip4 = addr4_rw.sin_addr */
@@ -1620,16 +1638,10 @@ int main(int argc, char **argv)
exit(err);
}
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
deleted file mode 100644
index f0fc103261a4..000000000000
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ /dev/null
@@ -1,490 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2019 Facebook */
-
-#include <sys/socket.h>
-#include <sys/epoll.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-
-enum bpf_addr_array_idx {
- ADDR_SRV_IDX,
- ADDR_CLI_IDX,
- __NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
- EGRESS_SRV_IDX,
- EGRESS_CLI_IDX,
- INGRESS_LISTEN_IDX,
- __NR_BPF_RESULT_ARRAY_IDX,
-};
-
-enum bpf_linum_array_idx {
- EGRESS_LINUM_IDX,
- INGRESS_LINUM_IDX,
- __NR_BPF_LINUM_ARRAY_IDX,
-};
-
-struct bpf_spinlock_cnt {
- struct bpf_spin_lock lock;
- __u32 cnt;
-};
-
-#define CHECK(condition, tag, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
- printf(format); \
- printf("\n"); \
- exit(-1); \
- } \
-})
-
-#define TEST_CGROUP "/test-bpf-sock-fields"
-#define DATA "Hello BPF!"
-#define DATA_LEN sizeof(DATA)
-
-static struct sockaddr_in6 srv_sa6, cli_sa6;
-static int sk_pkt_out_cnt10_fd;
-static int sk_pkt_out_cnt_fd;
-static int linum_map_fd;
-static int addr_map_fd;
-static int tp_map_fd;
-static int sk_map_fd;
-
-static __u32 addr_srv_idx = ADDR_SRV_IDX;
-static __u32 addr_cli_idx = ADDR_CLI_IDX;
-
-static __u32 egress_srv_idx = EGRESS_SRV_IDX;
-static __u32 egress_cli_idx = EGRESS_CLI_IDX;
-static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
-
-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
-
-static void init_loopback6(struct sockaddr_in6 *sa6)
-{
- memset(sa6, 0, sizeof(*sa6));
- sa6->sin6_family = AF_INET6;
- sa6->sin6_addr = in6addr_loopback;
-}
-
-static void print_sk(const struct bpf_sock *sk)
-{
- char src_ip4[24], dst_ip4[24];
- char src_ip6[64], dst_ip6[64];
-
- inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
- inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
- inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
- inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
-
- printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
- "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
- "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
- sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
- sk->mark, sk->priority,
- sk->src_ip4, src_ip4,
- sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
- src_ip6, sk->src_port,
- sk->dst_ip4, dst_ip4,
- sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
- dst_ip6, ntohs(sk->dst_port));
-}
-
-static void print_tp(const struct bpf_tcp_sock *tp)
-{
- printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
- "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
- "rate_delivered:%u rate_interval_us:%u packets_out:%u "
- "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
- "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
- "bytes_received:%llu bytes_acked:%llu\n",
- tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
- tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
- tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
- tp->packets_out, tp->retrans_out, tp->total_retrans,
- tp->segs_in, tp->data_segs_in, tp->segs_out,
- tp->data_segs_out, tp->lost_out, tp->sacked_out,
- tp->bytes_received, tp->bytes_acked);
-}
-
-static void check_result(void)
-{
- struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
- struct bpf_sock srv_sk, cli_sk, listen_sk;
- __u32 ingress_linum, egress_linum;
- int err;
-
- err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
- &egress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
- &ingress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
- "err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
- "err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
- "err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
- "err:%d errno:%d", err, errno);
-
- printf("listen_sk: ");
- print_sk(&listen_sk);
- printf("\n");
-
- printf("srv_sk: ");
- print_sk(&srv_sk);
- printf("\n");
-
- printf("cli_sk: ");
- print_sk(&cli_sk);
- printf("\n");
-
- printf("listen_tp: ");
- print_tp(&listen_tp);
- printf("\n");
-
- printf("srv_tp: ");
- print_tp(&srv_tp);
- printf("\n");
-
- printf("cli_tp: ");
- print_tp(&cli_tp);
- printf("\n");
-
- CHECK(listen_sk.state != 10 ||
- listen_sk.family != AF_INET6 ||
- listen_sk.protocol != IPPROTO_TCP ||
- memcmp(listen_sk.src_ip6, &in6addr_loopback,
- sizeof(listen_sk.src_ip6)) ||
- listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
- listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
- listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
- listen_sk.dst_port,
- "Unexpected listen_sk",
- "Check listen_sk output. ingress_linum:%u",
- ingress_linum);
-
- CHECK(srv_sk.state == 10 ||
- !srv_sk.state ||
- srv_sk.family != AF_INET6 ||
- srv_sk.protocol != IPPROTO_TCP ||
- memcmp(srv_sk.src_ip6, &in6addr_loopback,
- sizeof(srv_sk.src_ip6)) ||
- memcmp(srv_sk.dst_ip6, &in6addr_loopback,
- sizeof(srv_sk.dst_ip6)) ||
- srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
- srv_sk.dst_port != cli_sa6.sin6_port,
- "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
- egress_linum);
-
- CHECK(cli_sk.state == 10 ||
- !cli_sk.state ||
- cli_sk.family != AF_INET6 ||
- cli_sk.protocol != IPPROTO_TCP ||
- memcmp(cli_sk.src_ip6, &in6addr_loopback,
- sizeof(cli_sk.src_ip6)) ||
- memcmp(cli_sk.dst_ip6, &in6addr_loopback,
- sizeof(cli_sk.dst_ip6)) ||
- cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
- cli_sk.dst_port != srv_sa6.sin6_port,
- "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
- egress_linum);
-
- CHECK(listen_tp.data_segs_out ||
- listen_tp.data_segs_in ||
- listen_tp.total_retrans ||
- listen_tp.bytes_acked,
- "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
- ingress_linum);
-
- CHECK(srv_tp.data_segs_out != 2 ||
- srv_tp.data_segs_in ||
- srv_tp.snd_cwnd != 10 ||
- srv_tp.total_retrans ||
- srv_tp.bytes_acked != 2 * DATA_LEN,
- "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
- egress_linum);
-
- CHECK(cli_tp.data_segs_out ||
- cli_tp.data_segs_in != 2 ||
- cli_tp.snd_cwnd != 10 ||
- cli_tp.total_retrans ||
- cli_tp.bytes_received != 2 * DATA_LEN,
- "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
- egress_linum);
-}
-
-static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
-{
- struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
- int err;
-
- pkt_out_cnt.cnt = ~0;
- pkt_out_cnt10.cnt = ~0;
- err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
- if (!err)
- err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
- &pkt_out_cnt10);
-
- /* The bpf prog only counts for fullsock and
- * passive conneciton did not become fullsock until 3WHS
- * had been finished.
- * The bpf prog only counted two data packet out but we
- * specially init accept_fd's pkt_out_cnt by 2 in
- * init_sk_storage(). Hence, 4 here.
- */
- CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40,
- "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
- "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
- err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-
- pkt_out_cnt.cnt = ~0;
- pkt_out_cnt10.cnt = ~0;
- err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
- if (!err)
- err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
- &pkt_out_cnt10);
- /* Active connection is fullsock from the beginning.
- * 1 SYN and 1 ACK during 3WHS
- * 2 Acks on data packet.
- *
- * The bpf_prog initialized it to 0xeB9F.
- */
- CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 ||
- pkt_out_cnt10.cnt != 0xeB9F + 40,
- "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
- "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
- err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-}
-
-static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
-{
- struct bpf_spinlock_cnt scnt = {};
- int err;
-
- scnt.cnt = pkt_out_cnt;
- err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
- BPF_NOEXIST);
- CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
- "err:%d errno:%d", err, errno);
-
- scnt.cnt *= 10;
- err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
- BPF_NOEXIST);
- CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
- "err:%d errno:%d", err, errno);
-}
-
-static void test(void)
-{
- int listen_fd, cli_fd, accept_fd, epfd, err;
- struct epoll_event ev;
- socklen_t addrlen;
- int i;
-
- addrlen = sizeof(struct sockaddr_in6);
- ev.events = EPOLLIN;
-
- epfd = epoll_create(1);
- CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno);
-
- /* Prepare listen_fd */
- listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
- CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d",
- listen_fd, errno);
-
- init_loopback6(&srv_sa6);
- err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6));
- CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno);
-
- err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
- CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno);
-
- err = listen(listen_fd, 1);
- CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno);
-
- /* Prepare cli_fd */
- cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
- CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno);
-
- init_loopback6(&cli_sa6);
- err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6));
- CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno);
-
- err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
- CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d",
- err, errno);
-
- /* Update addr_map with srv_sa6 and cli_sa6 */
- err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
- CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
- err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
- CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
- /* Connect from cli_sa6 to srv_sa6 */
- err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen);
- printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n",
- ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port));
- CHECK(err && errno != EINPROGRESS,
- "connect(cli_fd)", "err:%d errno:%d", err, errno);
-
- ev.data.fd = listen_fd;
- err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev);
- CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d",
- err, errno);
-
- /* Accept the connection */
- /* Have some timeout in accept(listen_fd). Just in case. */
- err = epoll_wait(epfd, &ev, 1, 1000);
- CHECK(err != 1 || ev.data.fd != listen_fd,
- "epoll_wait(listen_fd)",
- "err:%d errno:%d ev.data.fd:%d listen_fd:%d",
- err, errno, ev.data.fd, listen_fd);
-
- accept_fd = accept(listen_fd, NULL, NULL);
- CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d",
- accept_fd, errno);
- close(listen_fd);
-
- ev.data.fd = cli_fd;
- err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
- CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
- err, errno);
-
- init_sk_storage(accept_fd, 2);
-
- for (i = 0; i < 2; i++) {
- /* Send some data from accept_fd to cli_fd */
- err = send(accept_fd, DATA, DATA_LEN, 0);
- CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
- err, errno);
-
- /* Have some timeout in recv(cli_fd). Just in case. */
- err = epoll_wait(epfd, &ev, 1, 1000);
- CHECK(err != 1 || ev.data.fd != cli_fd,
- "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
- err, errno, ev.data.fd, cli_fd);
-
- err = recv(cli_fd, NULL, 0, MSG_TRUNC);
- CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
- }
-
- check_sk_pkt_out_cnt(accept_fd, cli_fd);
-
- close(epfd);
- close(accept_fd);
- close(cli_fd);
-
- check_result();
-}
-
-int main(int argc, char **argv)
-{
- struct bpf_prog_load_attr attr = {
- .file = "test_sock_fields_kern.o",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .prog_flags = BPF_F_TEST_RND_HI32,
- };
- int cgroup_fd, egress_fd, ingress_fd, err;
- struct bpf_program *ingress_prog;
- struct bpf_object *obj;
- struct bpf_map *map;
-
- err = setup_cgroup_environment();
- CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d",
- err, errno);
-
- atexit(cleanup_cgroup_environment);
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- CHECK(cgroup_fd == -1, "create_and_get_cgroup()",
- "cgroup_fd:%d errno:%d", cgroup_fd, errno);
-
- err = join_cgroup(TEST_CGROUP);
- CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
-
- err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
- CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
-
- ingress_prog = bpf_object__find_program_by_title(obj,
- "cgroup_skb/ingress");
- CHECK(!ingress_prog,
- "bpf_object__find_program_by_title(cgroup_skb/ingress)",
- "not found");
- ingress_fd = bpf_program__fd(ingress_prog);
-
- err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
- CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
- "err:%d errno%d", err, errno);
-
- err = bpf_prog_attach(ingress_fd, cgroup_fd,
- BPF_CGROUP_INET_INGRESS, 0);
- CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
- "err:%d errno%d", err, errno);
- close(cgroup_fd);
-
- map = bpf_object__find_map_by_name(obj, "addr_map");
- CHECK(!map, "cannot find addr_map", "(null)");
- addr_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "sock_result_map");
- CHECK(!map, "cannot find sock_result_map", "(null)");
- sk_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map");
- CHECK(!map, "cannot find tcp_sock_result_map", "(null)");
- tp_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "linum_map");
- CHECK(!map, "cannot find linum_map", "(null)");
- linum_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt");
- CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)");
- sk_pkt_out_cnt_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10");
- CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)");
- sk_pkt_out_cnt10_fd = bpf_map__fd(map);
-
- test();
-
- bpf_object__close(obj);
- cleanup_cgroup_environment();
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index 15653b0e26eb..ca7ca87e91aa 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -151,7 +151,7 @@ static int run_test(int cgfd)
}
bpf_object__for_each_program(prog, pobj) {
- prog_name = bpf_program__title(prog, /*needs_copy*/ false);
+ prog_name = bpf_program__section_name(prog);
if (libbpf_attach_type_by_name(prog_name, &attach_type))
goto err;
@@ -191,16 +191,10 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_test(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 779e11da979c..0fa1e421c3d7 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -54,7 +54,7 @@ static void running_handler(int a);
#define S1_PORT 10000
#define S2_PORT 10001
-#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
+#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
#define CG_PATH "/sockmap"
@@ -63,14 +63,12 @@ int s1, s2, c1, c2, p1, p2;
int test_cnt;
int passed;
int failed;
-int map_fd[8];
-struct bpf_map *maps[8];
+int map_fd[9];
+struct bpf_map *maps[9];
int prog_fd[11];
int txmsg_pass;
-int txmsg_noisy;
int txmsg_redir;
-int txmsg_redir_noisy;
int txmsg_drop;
int txmsg_apply;
int txmsg_cork;
@@ -81,23 +79,26 @@ int txmsg_end_push;
int txmsg_start_pop;
int txmsg_pop;
int txmsg_ingress;
-int txmsg_skb;
+int txmsg_redir_skb;
+int txmsg_ktls_skb;
+int txmsg_ktls_skb_drop;
+int txmsg_ktls_skb_redir;
int ktls;
int peek_flag;
+int skb_use_parser;
+int txmsg_omit_skb_parser;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
{"cgroup", required_argument, NULL, 'c' },
{"rate", required_argument, NULL, 'r' },
- {"verbose", no_argument, NULL, 'v' },
+ {"verbose", optional_argument, NULL, 'v' },
{"iov_count", required_argument, NULL, 'i' },
{"length", required_argument, NULL, 'l' },
{"test", required_argument, NULL, 't' },
{"data_test", no_argument, NULL, 'd' },
{"txmsg", no_argument, &txmsg_pass, 1 },
- {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
{"txmsg_redir", no_argument, &txmsg_redir, 1 },
- {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
{"txmsg_drop", no_argument, &txmsg_drop, 1 },
{"txmsg_apply", required_argument, NULL, 'a'},
{"txmsg_cork", required_argument, NULL, 'k'},
@@ -108,12 +109,111 @@ static const struct option long_options[] = {
{"txmsg_start_pop", required_argument, NULL, 'w'},
{"txmsg_pop", required_argument, NULL, 'x'},
{"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
- {"txmsg_skb", no_argument, &txmsg_skb, 1 },
+ {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
{"ktls", no_argument, &ktls, 1 },
{"peek", no_argument, &peek_flag, 1 },
+ {"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1},
+ {"whitelist", required_argument, NULL, 'n' },
+ {"blacklist", required_argument, NULL, 'b' },
{0, 0, NULL, 0 }
};
+struct test_env {
+ const char *type;
+ const char *subtest;
+ const char *prepend;
+
+ int test_num;
+ int subtest_num;
+
+ int succ_cnt;
+ int fail_cnt;
+ int fail_last;
+};
+
+struct test_env env;
+
+struct sockmap_options {
+ int verbose;
+ bool base;
+ bool sendpage;
+ bool data_test;
+ bool drop_expected;
+ int iov_count;
+ int iov_length;
+ int rate;
+ char *map;
+ char *whitelist;
+ char *blacklist;
+ char *prepend;
+};
+
+struct _test {
+ char *title;
+ void (*tester)(int cg_fd, struct sockmap_options *opt);
+};
+
+static void test_start(void)
+{
+ env.subtest_num++;
+}
+
+static void test_fail(void)
+{
+ env.fail_cnt++;
+}
+
+static void test_pass(void)
+{
+ env.succ_cnt++;
+}
+
+static void test_reset(void)
+{
+ txmsg_start = txmsg_end = 0;
+ txmsg_start_pop = txmsg_pop = 0;
+ txmsg_start_push = txmsg_end_push = 0;
+ txmsg_pass = txmsg_drop = txmsg_redir = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_ingress = txmsg_redir_skb = 0;
+ txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
+ txmsg_omit_skb_parser = 0;
+ skb_use_parser = 0;
+}
+
+static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
+{
+ env.type = o->map;
+ env.subtest = t->title;
+ env.prepend = o->prepend;
+ env.test_num++;
+ env.subtest_num = 0;
+ env.fail_last = env.fail_cnt;
+ test_reset();
+ return 0;
+}
+
+static void test_end_subtest(void)
+{
+ int error = env.fail_cnt - env.fail_last;
+ int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
+
+ if (!error)
+ test_pass();
+
+ fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
+ env.test_num, env.subtest_num,
+ !type ? "sockmap" : "sockhash",
+ env.prepend ? : "",
+ env.subtest, error ? "FAIL" : "OK");
+}
+
+static void test_print_results(void)
+{
+ fprintf(stdout, "Pass: %d Fail: %d\n",
+ env.succ_cnt, env.fail_cnt);
+}
+
static void usage(char *argv[])
{
int i;
@@ -296,7 +396,7 @@ static int sockmap_init_sockets(int verbose)
return errno;
}
- if (verbose) {
+ if (verbose > 1) {
printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
c1, s1, c2, s2);
@@ -311,17 +411,6 @@ struct msg_stats {
struct timespec end;
};
-struct sockmap_options {
- int verbose;
- bool base;
- bool sendpage;
- bool data_test;
- bool drop_expected;
- int iov_count;
- int iov_length;
- int rate;
-};
-
static int msg_loop_sendpage(int fd, int iov_length, int cnt,
struct msg_stats *s,
struct sockmap_options *opt)
@@ -345,14 +434,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
clock_gettime(CLOCK_MONOTONIC, &s->start);
for (i = 0; i < cnt; i++) {
- int sent = sendfile(fd, fp, NULL, iov_length);
+ int sent;
+
+ errno = 0;
+ sent = sendfile(fd, fp, NULL, iov_length);
if (!drop && sent < 0) {
- perror("send loop error");
+ perror("sendpage loop error");
fclose(file);
return sent;
} else if (drop && sent >= 0) {
- printf("sendpage loop error expected: %i\n", sent);
+ printf("sendpage loop error expected: %i errno %i\n",
+ sent, errno);
fclose(file);
return -EIO;
}
@@ -418,14 +511,26 @@ unwind_iov:
static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
{
- int i, j, bytes_cnt = 0;
+ int i, j = 0, bytes_cnt = 0;
unsigned char k = 0;
for (i = 0; i < msg->msg_iovlen; i++) {
unsigned char *d = msg->msg_iov[i].iov_base;
- for (j = 0;
- j < msg->msg_iov[i].iov_len && size; j++) {
+ /* Special case test for skb ingress + ktls */
+ if (i == 0 && txmsg_ktls_skb) {
+ if (msg->msg_iov[i].iov_len < 4)
+ return -EIO;
+ if (memcmp(d, "PASS", 4) != 0) {
+ fprintf(stderr,
+ "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
+ i, 0, d[0], d[1], d[2], d[3]);
+ return -EIO;
+ }
+ j = 4; /* advance index past PASS header */
+ }
+
+ for (; j < msg->msg_iov[i].iov_len && size; j++) {
if (d[j] != k++) {
fprintf(stderr,
"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
@@ -464,13 +569,18 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
if (tx) {
clock_gettime(CLOCK_MONOTONIC, &s->start);
for (i = 0; i < cnt; i++) {
- int sent = sendmsg(fd, &msg, flags);
+ int sent;
+
+ errno = 0;
+ sent = sendmsg(fd, &msg, flags);
if (!drop && sent < 0) {
- perror("send loop error");
+ perror("sendmsg loop error");
goto out_errno;
} else if (drop && sent >= 0) {
- printf("send loop error expected: %i\n", sent);
+ fprintf(stderr,
+ "sendmsg loop error expected: %i errno %i\n",
+ sent, errno);
errno = -EIO;
goto out_errno;
}
@@ -497,9 +607,10 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
* paths.
*/
total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
- txmsg_pop_total = txmsg_pop;
if (txmsg_apply)
- txmsg_pop_total *= (total_bytes / txmsg_apply);
+ txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
+ else
+ txmsg_pop_total = txmsg_pop * cnt;
total_bytes -= txmsg_pop_total;
err = clock_gettime(CLOCK_MONOTONIC, &s->start);
if (err < 0)
@@ -633,14 +744,18 @@ static int sendmsg_test(struct sockmap_options *opt)
rxpid = fork();
if (rxpid == 0) {
- if (opt->drop_expected)
- exit(0);
+ iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
+ if (opt->drop_expected || txmsg_ktls_skb_drop)
+ _exit(0);
+
+ if (!iov_buf) /* zero bytes sent case */
+ _exit(0);
if (opt->sendpage)
iov_count = 1;
err = msg_loop(rx_fd, iov_count, iov_buf,
cnt, &s, false, opt);
- if (opt->verbose)
+ if (opt->verbose > 1)
fprintf(stderr,
"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err);
@@ -648,7 +763,7 @@ static int sendmsg_test(struct sockmap_options *opt)
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
}
- if (opt->verbose)
+ if (opt->verbose > 1)
fprintf(stdout,
"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
s.bytes_sent, sent_Bps, sent_Bps/giga,
@@ -678,7 +793,7 @@ static int sendmsg_test(struct sockmap_options *opt)
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
}
- if (opt->verbose)
+ if (opt->verbose > 1)
fprintf(stdout,
"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
s.bytes_sent, sent_Bps, sent_Bps/giga,
@@ -694,14 +809,14 @@ static int sendmsg_test(struct sockmap_options *opt)
if (WIFEXITED(rx_status)) {
err = WEXITSTATUS(rx_status);
if (err) {
- fprintf(stderr, "rx thread exited with err %d. ", err);
+ fprintf(stderr, "rx thread exited with err %d.\n", err);
goto out;
}
}
if (WIFEXITED(tx_status)) {
err = WEXITSTATUS(tx_status);
if (err)
- fprintf(stderr, "tx thread exited with err %d. ", err);
+ fprintf(stderr, "tx thread exited with err %d.\n", err);
}
out:
return err;
@@ -783,6 +898,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
}
enum {
+ SELFTESTS,
PING_PONG,
SENDMSG,
BASE,
@@ -799,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
goto run;
/* Attach programs to sockmap */
- err = bpf_prog_attach(prog_fd[0], map_fd[0],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
- prog_fd[0], map_fd[0], err, strerror(errno));
- return err;
+ if (!txmsg_omit_skb_parser) {
+ err = bpf_prog_attach(prog_fd[0], map_fd[0],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[0], err, strerror(errno));
+ return err;
+ }
}
err = bpf_prog_attach(prog_fd[1], map_fd[0],
@@ -816,8 +934,30 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
return err;
}
+ /* Attach programs to TLS sockmap */
+ if (txmsg_ktls_skb) {
+ if (!txmsg_omit_skb_parser) {
+ err = bpf_prog_attach(prog_fd[0], map_fd[8],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[8], err, strerror(errno));
+ return err;
+ }
+ }
+
+ err = bpf_prog_attach(prog_fd[2], map_fd[8],
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
/* Attach to cgroups */
- err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
err, strerror(errno));
@@ -833,19 +973,14 @@ run:
/* Attach txmsg program to sockmap */
if (txmsg_pass)
- tx_prog_fd = prog_fd[3];
- else if (txmsg_noisy)
tx_prog_fd = prog_fd[4];
else if (txmsg_redir)
tx_prog_fd = prog_fd[5];
- else if (txmsg_redir_noisy)
- tx_prog_fd = prog_fd[6];
- else if (txmsg_drop)
- tx_prog_fd = prog_fd[9];
- /* apply and cork must be last */
else if (txmsg_apply)
- tx_prog_fd = prog_fd[7];
+ tx_prog_fd = prog_fd[6];
else if (txmsg_cork)
+ tx_prog_fd = prog_fd[7];
+ else if (txmsg_drop)
tx_prog_fd = prog_fd[8];
else
tx_prog_fd = 0;
@@ -870,7 +1005,7 @@ run:
goto out;
}
- if (txmsg_redir || txmsg_redir_noisy)
+ if (txmsg_redir)
redir_fd = c2;
else
redir_fd = c1;
@@ -1018,7 +1153,35 @@ run:
}
}
- if (txmsg_skb) {
+ if (txmsg_ktls_skb) {
+ int ingress = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ if (txmsg_ktls_skb_redir) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[7],
+ &i, &ingress, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+
+ if (txmsg_ktls_skb_drop) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
+ }
+ }
+
+ if (txmsg_redir_skb) {
int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
p2 : p1;
int ingress = BPF_F_INGRESS;
@@ -1033,8 +1196,7 @@ run:
}
i = 3;
- err = bpf_map_update_elem(map_fd[0],
- &i, &skb_fd, BPF_ANY);
+ err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
@@ -1043,6 +1205,11 @@ run:
}
}
+ if (skb_use_parser) {
+ i = 2;
+ err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY);
+ }
+
if (txmsg_drop)
options->drop_expected = true;
@@ -1068,9 +1235,12 @@ run:
fprintf(stderr, "unknown test\n");
out:
/* Detatch and zero all the maps */
- bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+ bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+ bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
+ bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
+
if (tx_prog_fd >= 0)
bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
@@ -1112,12 +1282,8 @@ static void test_options(char *options)
if (txmsg_pass)
strncat(options, "pass,", OPTSTRING);
- if (txmsg_noisy)
- strncat(options, "pass_noisy,", OPTSTRING);
if (txmsg_redir)
strncat(options, "redir,", OPTSTRING);
- if (txmsg_redir_noisy)
- strncat(options, "redir_noisy,", OPTSTRING);
if (txmsg_drop)
strncat(options, "drop,", OPTSTRING);
if (txmsg_apply) {
@@ -1143,8 +1309,10 @@ static void test_options(char *options)
}
if (txmsg_ingress)
strncat(options, "ingress,", OPTSTRING);
- if (txmsg_skb)
- strncat(options, "skb,", OPTSTRING);
+ if (txmsg_redir_skb)
+ strncat(options, "redir_skb,", OPTSTRING);
+ if (txmsg_ktls_skb)
+ strncat(options, "ktls_skb,", OPTSTRING);
if (ktls)
strncat(options, "ktls,", OPTSTRING);
if (peek_flag)
@@ -1168,416 +1336,344 @@ static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
test_options(options);
- fprintf(stdout,
- "[TEST %i]: (%i, %i, %i, %s, %s): ",
- test_cnt, opt->rate, opt->iov_count, opt->iov_length,
- test_to_str(test), options);
- fflush(stdout);
+ if (opt->verbose) {
+ fprintf(stdout,
+ " [TEST %i]: (%i, %i, %i, %s, %s): ",
+ test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+ test_to_str(test), options);
+ fflush(stdout);
+ }
err = run_options(opt, cgrp, test);
- fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+ if (opt->verbose)
+ fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
test_cnt++;
!err ? passed++ : failed++;
free(options);
return err;
}
-static int test_exec(int cgrp, struct sockmap_options *opt)
-{
- int err = __test_exec(cgrp, SENDMSG, opt);
-
- if (err)
- goto out;
-
- err = __test_exec(cgrp, SENDPAGE, opt);
-out:
- return err;
-}
-
-static int test_loop(int cgrp)
-{
- struct sockmap_options opt;
-
- int err, i, l, r;
-
- opt.verbose = 0;
- opt.base = false;
- opt.sendpage = false;
- opt.data_test = false;
- opt.drop_expected = false;
- opt.iov_count = 0;
- opt.iov_length = 0;
- opt.rate = 0;
-
- r = 1;
- for (i = 1; i < 100; i += 33) {
- for (l = 1; l < 100; l += 33) {
- opt.rate = r;
- opt.iov_count = i;
- opt.iov_length = l;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- }
- }
- sched_yield();
-out:
- return err;
-}
-
-static int test_txmsg(int cgrp)
+static void test_exec(int cgrp, struct sockmap_options *opt)
{
+ int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
int err;
- txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
- txmsg_apply = txmsg_cork = 0;
- txmsg_ingress = txmsg_skb = 0;
-
- txmsg_pass = 1;
- err = test_loop(cgrp);
- txmsg_pass = 0;
- if (err)
- goto out;
-
- txmsg_redir = 1;
- err = test_loop(cgrp);
- txmsg_redir = 0;
- if (err)
- goto out;
-
- txmsg_drop = 1;
- err = test_loop(cgrp);
- txmsg_drop = 0;
- if (err)
- goto out;
-
- txmsg_redir = 1;
- txmsg_ingress = 1;
- err = test_loop(cgrp);
- txmsg_redir = 0;
- txmsg_ingress = 0;
- if (err)
- goto out;
-out:
- txmsg_pass = 0;
- txmsg_redir = 0;
- txmsg_drop = 0;
- return err;
+ if (type == 0) {
+ test_start();
+ err = __test_exec(cgrp, SENDMSG, opt);
+ if (err)
+ test_fail();
+ } else {
+ test_start();
+ err = __test_exec(cgrp, SENDPAGE, opt);
+ if (err)
+ test_fail();
+ }
}
-static int test_send(struct sockmap_options *opt, int cgrp)
+static void test_send_one(struct sockmap_options *opt, int cgrp)
{
- int err;
-
opt->iov_length = 1;
opt->iov_count = 1;
opt->rate = 1;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
+ test_exec(cgrp, opt);
opt->iov_length = 1;
opt->iov_count = 1024;
opt->rate = 1;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
+ test_exec(cgrp, opt);
opt->iov_length = 1024;
opt->iov_count = 1;
opt->rate = 1;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
+ test_exec(cgrp, opt);
- opt->iov_length = 1;
+}
+
+static void test_send_many(struct sockmap_options *opt, int cgrp)
+{
+ opt->iov_length = 3;
opt->iov_count = 1;
opt->rate = 512;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
-
- opt->iov_length = 256;
- opt->iov_count = 1024;
- opt->rate = 2;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
+ test_exec(cgrp, opt);
opt->rate = 100;
opt->iov_count = 1;
opt->iov_length = 5;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
-out:
- sched_yield();
- return err;
+ test_exec(cgrp, opt);
}
-static int test_mixed(int cgrp)
+static void test_send_large(struct sockmap_options *opt, int cgrp)
{
- struct sockmap_options opt = {0};
- int err;
+ opt->iov_length = 256;
+ opt->iov_count = 1024;
+ opt->rate = 2;
+ test_exec(cgrp, opt);
+}
- txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
- txmsg_apply = txmsg_cork = 0;
- txmsg_start = txmsg_end = 0;
- txmsg_start_push = txmsg_end_push = 0;
- txmsg_start_pop = txmsg_pop = 0;
+static void test_send(struct sockmap_options *opt, int cgrp)
+{
+ test_send_one(opt, cgrp);
+ test_send_many(opt, cgrp);
+ test_send_large(opt, cgrp);
+ sched_yield();
+}
+static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
+{
/* Test small and large iov_count values with pass/redir/apply/cork */
txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 1;
- txmsg_cork = 0;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ test_send(opt, cgrp);
+}
- txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 0;
- txmsg_cork = 1;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_redir = 1;
+ test_send(opt, cgrp);
+}
- txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 1;
- txmsg_cork = 1;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_drop = 1;
+ test_send(opt, cgrp);
+}
- txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 1024;
- txmsg_cork = 0;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_pass = txmsg_drop = 0;
+ txmsg_ingress = txmsg_redir = 1;
+ test_send(opt, cgrp);
+}
- txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 0;
- txmsg_cork = 1024;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
+{
+ bool data = opt->data_test;
+ int k = ktls;
+ opt->data_test = true;
+ ktls = 1;
+
+ txmsg_pass = txmsg_drop = 0;
+ txmsg_ingress = txmsg_redir = 0;
+ txmsg_ktls_skb = 1;
txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 1024;
- txmsg_cork = 1024;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ /* Using data verification so ensure iov layout is
+ * expected from test receiver side. e.g. has enough
+ * bytes to write test code.
+ */
+ opt->iov_length = 100;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ test_exec(cgrp, opt);
+
+ txmsg_ktls_skb_drop = 1;
+ test_exec(cgrp, opt);
+
+ txmsg_ktls_skb_drop = 0;
+ txmsg_ktls_skb_redir = 1;
+ test_exec(cgrp, opt);
+ txmsg_ktls_skb_redir = 0;
+
+ /* Tests that omit skb_parser */
+ txmsg_omit_skb_parser = 1;
+ ktls = 0;
+ txmsg_ktls_skb = 0;
+ test_exec(cgrp, opt);
+
+ txmsg_ktls_skb_drop = 1;
+ test_exec(cgrp, opt);
+ txmsg_ktls_skb_drop = 0;
+
+ txmsg_ktls_skb_redir = 1;
+ test_exec(cgrp, opt);
+
+ ktls = 1;
+ test_exec(cgrp, opt);
+ txmsg_omit_skb_parser = 0;
+
+ opt->data_test = data;
+ ktls = k;
+}
+
+/* Test cork with hung data. This tests poor usage patterns where
+ * cork can leave data on the ring if user program is buggy and
+ * doesn't flush them somehow. They do take some time however
+ * because they wait for a timeout. Test pass, redir and cork with
+ * apply logic. Use cork size of 4097 with send_large to avoid
+ * aligning cork size with send size.
+ */
+static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
+{
txmsg_pass = 1;
txmsg_redir = 0;
- txmsg_cork = 4096;
- txmsg_apply = 4096;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
-
- txmsg_pass = 0;
- txmsg_redir = 1;
- txmsg_apply = 1;
- txmsg_cork = 0;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ txmsg_cork = 4097;
+ txmsg_apply = 4097;
+ test_send_large(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
txmsg_apply = 0;
- txmsg_cork = 1;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ txmsg_cork = 4097;
+ test_send_large(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
- txmsg_apply = 1024;
- txmsg_cork = 0;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ txmsg_apply = 4097;
+ txmsg_cork = 4097;
+ test_send_large(opt, cgrp);
+}
- txmsg_pass = 0;
+static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
+{
+ /* Test basic start/end */
+ txmsg_start = 1;
+ txmsg_end = 2;
+ test_send(opt, cgrp);
+
+ /* Test >4k pull */
+ txmsg_start = 4096;
+ txmsg_end = 9182;
+ test_send_large(opt, cgrp);
+
+ /* Test pull + redirect */
+ txmsg_redir = 0;
+ txmsg_start = 1;
+ txmsg_end = 2;
+ test_send(opt, cgrp);
+
+ /* Test pull + cork */
+ txmsg_redir = 0;
+ txmsg_cork = 512;
+ txmsg_start = 1;
+ txmsg_end = 2;
+ test_send_many(opt, cgrp);
+
+ /* Test pull + cork + redirect */
txmsg_redir = 1;
- txmsg_apply = 0;
- txmsg_cork = 1024;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ txmsg_cork = 512;
+ txmsg_start = 1;
+ txmsg_end = 2;
+ test_send_many(opt, cgrp);
+}
- txmsg_pass = 0;
+static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
+{
+ /* Test basic pop */
+ txmsg_start_pop = 1;
+ txmsg_pop = 2;
+ test_send_many(opt, cgrp);
+
+ /* Test pop with >4k */
+ txmsg_start_pop = 4096;
+ txmsg_pop = 4096;
+ test_send_large(opt, cgrp);
+
+ /* Test pop + redirect */
txmsg_redir = 1;
- txmsg_apply = 1024;
- txmsg_cork = 1024;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ txmsg_start_pop = 1;
+ txmsg_pop = 2;
+ test_send_many(opt, cgrp);
- txmsg_pass = 0;
+ /* Test pop + cork */
+ txmsg_redir = 0;
+ txmsg_cork = 512;
+ txmsg_start_pop = 1;
+ txmsg_pop = 2;
+ test_send_many(opt, cgrp);
+
+ /* Test pop + redirect + cork */
txmsg_redir = 1;
- txmsg_cork = 4096;
- txmsg_apply = 4096;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
-out:
- return err;
+ txmsg_cork = 4;
+ txmsg_start_pop = 1;
+ txmsg_pop = 2;
+ test_send_many(opt, cgrp);
}
-static int test_start_end(int cgrp)
+static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
{
- struct sockmap_options opt = {0};
- int err, i;
+ /* Test basic push */
+ txmsg_start_push = 1;
+ txmsg_end_push = 1;
+ test_send(opt, cgrp);
- /* Test basic start/end with lots of iov_count and iov_lengths */
- txmsg_start = 1;
- txmsg_end = 2;
+ /* Test push 4kB >4k */
+ txmsg_start_push = 4096;
+ txmsg_end_push = 4096;
+ test_send_large(opt, cgrp);
+
+ /* Test push + redirect */
+ txmsg_redir = 1;
txmsg_start_push = 1;
txmsg_end_push = 2;
- txmsg_start_pop = 1;
- txmsg_pop = 1;
- err = test_txmsg(cgrp);
- if (err)
- goto out;
+ test_send_many(opt, cgrp);
- /* Cut a byte of pushed data but leave reamining in place */
- txmsg_start = 1;
- txmsg_end = 2;
+ /* Test push + cork */
+ txmsg_redir = 0;
+ txmsg_cork = 512;
txmsg_start_push = 1;
- txmsg_end_push = 3;
- txmsg_start_pop = 1;
- txmsg_pop = 1;
- err = test_txmsg(cgrp);
- if (err)
- goto out;
-
- /* Test start/end with cork */
- opt.rate = 16;
- opt.iov_count = 1;
- opt.iov_length = 100;
- txmsg_cork = 1600;
-
- txmsg_start_pop = 0;
- txmsg_pop = 0;
-
- for (i = 99; i <= 1600; i += 500) {
- txmsg_start = 0;
- txmsg_end = i;
- txmsg_start_push = 0;
- txmsg_end_push = i;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- }
-
- /* Test pop data in middle of cork */
- for (i = 99; i <= 1600; i += 500) {
- txmsg_start_pop = 10;
- txmsg_pop = i;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- }
- txmsg_start_pop = 0;
- txmsg_pop = 0;
-
- /* Test start/end with cork but pull data in middle */
- for (i = 199; i <= 1600; i += 500) {
- txmsg_start = 100;
- txmsg_end = i;
- txmsg_start_push = 100;
- txmsg_end_push = i;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- }
+ txmsg_end_push = 2;
+ test_send_many(opt, cgrp);
+}
- /* Test start/end with cork pulling last sg entry */
- txmsg_start = 1500;
- txmsg_end = 1600;
- txmsg_start_push = 1500;
- txmsg_end_push = 1600;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_start_push = 1;
+ txmsg_end_push = 10;
+ txmsg_start_pop = 5;
+ txmsg_pop = 4;
+ test_send_large(opt, cgrp);
+}
- /* Test pop with cork pulling last sg entry */
- txmsg_start_pop = 1500;
- txmsg_pop = 1600;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- txmsg_start_pop = 0;
- txmsg_pop = 0;
-
- /* Test start/end pull of single byte in last page */
- txmsg_start = 1111;
- txmsg_end = 1112;
- txmsg_start_push = 1111;
- txmsg_end_push = 1112;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ test_send_one(opt, cgrp);
- /* Test pop of single byte in last page */
- txmsg_start_pop = 1111;
- txmsg_pop = 1112;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ test_send_one(opt, cgrp);
- /* Test start/end with end < start */
- txmsg_start = 1111;
- txmsg_end = 0;
- txmsg_start_push = 1111;
- txmsg_end_push = 0;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ test_send_large(opt, cgrp);
- /* Test start/end with end > data */
- txmsg_start = 0;
- txmsg_end = 1601;
- txmsg_start_push = 0;
- txmsg_end_push = 1601;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ test_send_large(opt, cgrp);
+}
- /* Test start/end with start > data */
- txmsg_start = 1601;
- txmsg_end = 1600;
- txmsg_start_push = 1601;
- txmsg_end_push = 1600;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ test_send(opt, cgrp);
- /* Test pop with start > data */
- txmsg_start_pop = 1601;
- txmsg_pop = 1;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 1;
+ test_send(opt, cgrp);
+}
- /* Test pop with pop range > data */
- txmsg_start_pop = 1599;
- txmsg_pop = 10;
- err = test_exec(cgrp, &opt);
-out:
- txmsg_start = 0;
- txmsg_end = 0;
- sched_yield();
- return err;
+static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_pass = 1;
+ skb_use_parser = 512;
+ opt->iov_length = 256;
+ opt->iov_count = 1;
+ opt->rate = 2;
+ test_exec(cgrp, opt);
}
char *map_names[] = {
@@ -1589,11 +1685,13 @@ char *map_names[] = {
"sock_bytes",
"sock_redir_flags",
"sock_skb_opts",
+ "tls_sock_map",
};
int prog_attach_type[] = {
BPF_SK_SKB_STREAM_PARSER,
BPF_SK_SKB_STREAM_VERDICT,
+ BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_SOCK_OPS,
BPF_SK_MSG_VERDICT,
BPF_SK_MSG_VERDICT,
@@ -1607,6 +1705,7 @@ int prog_attach_type[] = {
int prog_type[] = {
BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_SK_MSG,
@@ -1662,73 +1761,118 @@ static int populate_progs(char *bpf_file)
return 0;
}
-static int __test_suite(int cg_fd, char *bpf_file)
+struct _test test[] = {
+ {"txmsg test passthrough", test_txmsg_pass},
+ {"txmsg test redirect", test_txmsg_redir},
+ {"txmsg test drop", test_txmsg_drop},
+ {"txmsg test ingress redirect", test_txmsg_ingress_redir},
+ {"txmsg test skb", test_txmsg_skb},
+ {"txmsg test apply", test_txmsg_apply},
+ {"txmsg test cork", test_txmsg_cork},
+ {"txmsg test hanging corks", test_txmsg_cork_hangs},
+ {"txmsg test push_data", test_txmsg_push},
+ {"txmsg test pull-data", test_txmsg_pull},
+ {"txmsg test pop-data", test_txmsg_pop},
+ {"txmsg test push/pop data", test_txmsg_push_pop},
+ {"txmsg text ingress parser", test_txmsg_ingress_parser},
+};
+
+static int check_whitelist(struct _test *t, struct sockmap_options *opt)
+{
+ char *entry, *ptr;
+
+ if (!opt->whitelist)
+ return 0;
+ ptr = strdup(opt->whitelist);
+ if (!ptr)
+ return -ENOMEM;
+ entry = strtok(ptr, ",");
+ while (entry) {
+ if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
+ strstr(opt->map, entry) != 0 ||
+ strstr(t->title, entry) != 0)
+ return 0;
+ entry = strtok(NULL, ",");
+ }
+ return -EINVAL;
+}
+
+static int check_blacklist(struct _test *t, struct sockmap_options *opt)
{
- int err, cleanup = cg_fd;
+ char *entry, *ptr;
- err = populate_progs(bpf_file);
+ if (!opt->blacklist)
+ return -EINVAL;
+ ptr = strdup(opt->blacklist);
+ if (!ptr)
+ return -ENOMEM;
+ entry = strtok(ptr, ",");
+ while (entry) {
+ if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
+ strstr(opt->map, entry) != 0 ||
+ strstr(t->title, entry) != 0)
+ return 0;
+ entry = strtok(NULL, ",");
+ }
+ return -EINVAL;
+}
+
+static int __test_selftests(int cg_fd, struct sockmap_options *opt)
+{
+ int i, err;
+
+ err = populate_progs(opt->map);
if (err < 0) {
fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
return err;
}
- if (cg_fd < 0) {
- if (setup_cgroup_environment()) {
- fprintf(stderr, "ERROR: cgroup env failed\n");
- return -EINVAL;
- }
+ /* Tests basic commands and APIs */
+ for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
+ struct _test t = test[i];
- cg_fd = create_and_get_cgroup(CG_PATH);
- if (cg_fd < 0) {
- fprintf(stderr,
- "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, optarg);
- return cg_fd;
- }
+ if (check_whitelist(&t, opt) != 0)
+ continue;
+ if (check_blacklist(&t, opt) == 0)
+ continue;
- if (join_cgroup(CG_PATH)) {
- fprintf(stderr, "ERROR: failed to join cgroup\n");
- return -EINVAL;
- }
+ test_start_subtest(&t, opt);
+ t.tester(cg_fd, opt);
+ test_end_subtest();
}
- /* Tests basic commands and APIs with range of iov values */
- txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0;
- err = test_txmsg(cg_fd);
- if (err)
- goto out;
+ return err;
+}
- /* Tests interesting combinations of APIs used together */
- err = test_mixed(cg_fd);
- if (err)
- goto out;
+static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
+{
+ opt->map = BPF_SOCKMAP_FILENAME;
+ __test_selftests(cg_fd, opt);
+}
- /* Tests pull_data API using start/end API */
- err = test_start_end(cg_fd);
- if (err)
- goto out;
+static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
+{
+ opt->map = BPF_SOCKHASH_FILENAME;
+ __test_selftests(cg_fd, opt);
+}
-out:
- printf("Summary: %i PASSED %i FAILED\n", passed, failed);
- if (cleanup < 0) {
- cleanup_cgroup_environment();
- close(cg_fd);
- }
- return err;
+static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
+{
+ opt->map = BPF_SOCKHASH_FILENAME;
+ opt->prepend = "ktls";
+ ktls = 1;
+ __test_selftests(cg_fd, opt);
+ ktls = 0;
}
-static int test_suite(int cg_fd)
+static int test_selftest(int cg_fd, struct sockmap_options *opt)
{
- int err;
- err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME);
- if (err)
- goto out;
- err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME);
-out:
- if (cg_fd > -1)
- close(cg_fd);
- return err;
+ test_selftests_sockmap(cg_fd, opt);
+ test_selftests_sockhash(cg_fd, opt);
+ test_selftests_ktls(cg_fd, opt);
+ test_print_results();
+ return 0;
}
int main(int argc, char **argv)
@@ -1737,12 +1881,10 @@ int main(int argc, char **argv)
struct sockmap_options options = {0};
int opt, longindex, err, cg_fd = 0;
char *bpf_file = BPF_SOCKMAP_FILENAME;
- int test = PING_PONG;
-
- if (argc < 2)
- return test_suite(-1);
+ int test = SELFTESTS;
+ bool cg_created = 0;
- while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
+ while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
long_options, &longindex)) != -1) {
switch (opt) {
case 's':
@@ -1783,6 +1925,8 @@ int main(int argc, char **argv)
break;
case 'v':
options.verbose = 1;
+ if (optarg)
+ options.verbose = atoi(optarg);
break;
case 'i':
iov_count = atoi(optarg);
@@ -1809,6 +1953,15 @@ int main(int argc, char **argv)
return -1;
}
break;
+ case 'n':
+ options.whitelist = strdup(optarg);
+ if (!options.whitelist)
+ return -ENOMEM;
+ break;
+ case 'b':
+ options.blacklist = strdup(optarg);
+ if (!options.blacklist)
+ return -ENOMEM;
case 0:
break;
case 'h':
@@ -1818,13 +1971,16 @@ int main(int argc, char **argv)
}
}
- if (argc <= 3 && cg_fd)
- return test_suite(cg_fd);
-
if (!cg_fd) {
- fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
- argv[0]);
- return -1;
+ cg_fd = cgroup_setup_and_join(CG_PATH);
+ if (cg_fd < 0)
+ return cg_fd;
+ cg_created = 1;
+ }
+
+ if (test == SELFTESTS) {
+ err = test_selftest(cg_fd, &options);
+ goto out;
}
err = populate_progs(bpf_file);
@@ -1843,6 +1999,13 @@ int main(int argc, char **argv)
options.rate = rate;
err = run_options(&options, cg_fd, test);
+out:
+ if (options.whitelist)
+ free(options.whitelist);
+ if (options.blacklist)
+ free(options.blacklist);
+ if (cg_created)
+ cleanup_cgroup_environment();
close(cg_fd);
return err;
}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index d196e2a4a6e0..a20a919244c0 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -1619,16 +1619,10 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
new file mode 100755
index 000000000000..8868aa1ca902
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tc_redirect.sh
@@ -0,0 +1,216 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+# between src and dst. The netns fwd has veth links to each src and dst. The
+# client is in src and server in dst. The test installs a TC BPF program to each
+# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
+# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
+# switch from ingress side; it also installs a checker prog on the egress side
+# to drop unexpected traffic.
+
+if [[ $EUID -ne 0 ]]; then
+ echo "This script must be run as root"
+ echo "FAIL"
+ exit 1
+fi
+
+# check that needed tools are present
+command -v nc >/dev/null 2>&1 || \
+ { echo >&2 "nc is not available"; exit 1; }
+command -v dd >/dev/null 2>&1 || \
+ { echo >&2 "dd is not available"; exit 1; }
+command -v timeout >/dev/null 2>&1 || \
+ { echo >&2 "timeout is not available"; exit 1; }
+command -v ping >/dev/null 2>&1 || \
+ { echo >&2 "ping is not available"; exit 1; }
+if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi
+command -v perl >/dev/null 2>&1 || \
+ { echo >&2 "perl is not available"; exit 1; }
+command -v jq >/dev/null 2>&1 || \
+ { echo >&2 "jq is not available"; exit 1; }
+command -v bpftool >/dev/null 2>&1 || \
+ { echo >&2 "bpftool is not available"; exit 1; }
+
+readonly GREEN='\033[0;92m'
+readonly RED='\033[0;31m'
+readonly NC='\033[0m' # No Color
+
+readonly PING_ARG="-c 3 -w 10 -q"
+
+readonly TIMEOUT=10
+
+readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
+readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
+readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
+
+readonly IP4_SRC="172.16.1.100"
+readonly IP4_DST="172.16.2.100"
+
+readonly IP6_SRC="::1:dead:beef:cafe"
+readonly IP6_DST="::2:dead:beef:cafe"
+
+readonly IP4_SLL="169.254.0.1"
+readonly IP4_DLL="169.254.0.2"
+readonly IP4_NET="169.254.0.0"
+
+netns_cleanup()
+{
+ ip netns del ${NS_SRC}
+ ip netns del ${NS_FWD}
+ ip netns del ${NS_DST}
+}
+
+netns_setup()
+{
+ ip netns add "${NS_SRC}"
+ ip netns add "${NS_FWD}"
+ ip netns add "${NS_DST}"
+
+ ip link add veth_src type veth peer name veth_src_fwd
+ ip link add veth_dst type veth peer name veth_dst_fwd
+
+ ip link set veth_src netns ${NS_SRC}
+ ip link set veth_src_fwd netns ${NS_FWD}
+
+ ip link set veth_dst netns ${NS_DST}
+ ip link set veth_dst_fwd netns ${NS_FWD}
+
+ ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
+ ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
+
+ # The fwd netns automatically get a v6 LL address / routes, but also
+ # needs v4 one in order to start ARP probing. IP4_NET route is added
+ # to the endpoints so that the ARP processing will reply.
+
+ ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
+ ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
+
+ ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
+ ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
+
+ ip -netns ${NS_SRC} link set dev veth_src up
+ ip -netns ${NS_FWD} link set dev veth_src_fwd up
+
+ ip -netns ${NS_DST} link set dev veth_dst up
+ ip -netns ${NS_FWD} link set dev veth_dst_fwd up
+
+ ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
+ ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
+ ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
+
+ ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
+ ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
+
+ ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
+ ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
+ ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
+
+ ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
+ ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
+
+ fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
+ fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
+
+ ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
+ ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
+
+ ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
+ ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
+}
+
+netns_test_connectivity()
+{
+ set +e
+
+ ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
+ ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
+
+ TEST="TCPv4 connectivity test"
+ ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
+ if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+ fi
+ echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+ TEST="TCPv6 connectivity test"
+ ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
+ if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+ fi
+ echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+ TEST="ICMPv4 connectivity test"
+ ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
+ if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+ fi
+ echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+ TEST="ICMPv6 connectivity test"
+ ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST}
+ if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+ fi
+ echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+ set -e
+}
+
+hex_mem_str()
+{
+ perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
+}
+
+netns_setup_bpf()
+{
+ local obj=$1
+ local use_forwarding=${2:-0}
+
+ ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
+ ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
+ ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress
+
+ ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
+ ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
+ ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
+
+ if [ "$use_forwarding" -eq "1" ]; then
+ # bpf_fib_lookup() checks if forwarding is enabled
+ ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
+ ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
+ ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1
+ return 0
+ fi
+
+ veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
+ veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
+
+ progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
+ for prog in $progs; do
+ map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
+ if [ ! -z "$map" ]; then
+ bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
+ bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
+ fi
+ done
+}
+
+trap netns_cleanup EXIT
+set -e
+
+netns_setup
+netns_setup_bpf test_tc_neigh.o
+netns_test_connectivity
+netns_cleanup
+netns_setup
+netns_setup_bpf test_tc_neigh_fib.o 1
+netns_test_connectivity
+netns_cleanup
+netns_setup
+netns_setup_bpf test_tc_peer.o
+netns_test_connectivity
diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
new file mode 100644
index 000000000000..6118e3ab61fc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+
+#ifndef _TEST_TCP_HDR_OPTIONS_H
+#define _TEST_TCP_HDR_OPTIONS_H
+
+struct bpf_test_option {
+ __u8 flags;
+ __u8 max_delack_ms;
+ __u8 rand;
+} __attribute__((packed));
+
+enum {
+ OPTION_RESEND,
+ OPTION_MAX_DELACK_MS,
+ OPTION_RAND,
+ __NR_OPTION_FLAGS,
+};
+
+#define OPTION_F_RESEND (1 << OPTION_RESEND)
+#define OPTION_F_MAX_DELACK_MS (1 << OPTION_MAX_DELACK_MS)
+#define OPTION_F_RAND (1 << OPTION_RAND)
+#define OPTION_MASK ((1 << __NR_OPTION_FLAGS) - 1)
+
+#define TEST_OPTION_FLAGS(flags, option) (1 & ((flags) >> (option)))
+#define SET_OPTION_FLAGS(flags, option) ((flags) |= (1 << (option)))
+
+/* Store in bpf_sk_storage */
+struct hdr_stg {
+ bool active;
+ bool resend_syn; /* active side only */
+ bool syncookie; /* passive side only */
+ bool fastopen; /* passive side only */
+};
+
+struct linum_err {
+ unsigned int linum;
+ int err;
+};
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+#define TCPHDR_SYNACK (TCPHDR_SYN | TCPHDR_ACK)
+
+#define TCPOPT_EOL 0
+#define TCPOPT_NOP 1
+#define TCPOPT_WINDOW 3
+#define TCPOPT_EXP 254
+
+#define TCP_BPF_EXPOPT_BASE_LEN 4
+#define MAX_TCP_HDR_LEN 60
+#define MAX_TCP_OPTION_SPACE 40
+
+#ifdef BPF_PROG_TEST_TCP_HDR_OPTIONS
+
+#define CG_OK 1
+#define CG_ERR 0
+
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+struct tcp_exprm_opt {
+ __u8 kind;
+ __u8 len;
+ __u16 magic;
+ union {
+ __u8 data[4];
+ __u32 data32;
+ };
+} __attribute__((packed));
+
+struct tcp_opt {
+ __u8 kind;
+ __u8 len;
+ union {
+ __u8 data[4];
+ __u32 data32;
+ };
+} __attribute__((packed));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct linum_err);
+} lport_linum_map SEC(".maps");
+
+static inline unsigned int tcp_hdrlen(const struct tcphdr *th)
+{
+ return th->doff << 2;
+}
+
+static inline __u8 skops_tcp_flags(const struct bpf_sock_ops *skops)
+{
+ return skops->skb_tcp_flags;
+}
+
+static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~(BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG));
+}
+
+static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags |
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
+ extra);
+}
+static inline void
+clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+static inline void
+set_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags |
+ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+#define RET_CG_ERR(__err) ({ \
+ struct linum_err __linum_err; \
+ int __lport; \
+ \
+ __linum_err.linum = __LINE__; \
+ __linum_err.err = __err; \
+ __lport = skops->local_port; \
+ bpf_map_update_elem(&lport_linum_map, &__lport, &__linum_err, BPF_NOEXIST); \
+ clear_hdr_cb_flags(skops); \
+ clear_parse_all_hdr_cb_flags(skops); \
+ return CG_ERR; \
+})
+
+#endif /* BPF_PROG_TEST_TCP_HDR_OPTIONS */
+
+#endif /* _TEST_TCP_HDR_OPTIONS_H */
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 3ae127620463..74a9e49988b6 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -102,16 +102,10 @@ int main(int argc, char **argv)
__u32 key = 0;
int rv;
- if (setup_cgroup_environment())
- goto err;
-
- cg_fd = create_and_get_cgroup(cg_path);
+ cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
- if (join_cgroup(cg_path))
- goto err;
-
if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
printf("FAILED: load_bpf_file failed for: %s\n", file);
goto err;
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index f9765ddf0761..73da7fe8c152 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -86,16 +86,10 @@ int main(int argc, char **argv)
CPU_SET(0, &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
- if (setup_cgroup_environment())
- goto err;
-
- cg_fd = create_and_get_cgroup(cg_path);
+ cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
- if (join_cgroup(cg_path))
- goto err;
-
if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
printf("FAILED: load_bpf_file failed for: %s\n", file);
goto err;
@@ -130,17 +124,24 @@ int main(int argc, char **argv)
sprintf(test_script,
"iptables -A INPUT -p tcp --dport %d -j DROP",
TESTPORT);
- system(test_script);
+ if (system(test_script)) {
+ printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+ goto err;
+ }
sprintf(test_script,
"nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ",
TESTPORT);
- system(test_script);
+ if (system(test_script))
+ printf("execute command: %s, err %d\n", test_script, -errno);
sprintf(test_script,
"iptables -D INPUT -p tcp --dport %d -j DROP",
TESTPORT);
- system(test_script);
+ if (system(test_script)) {
+ printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+ goto err;
+ }
rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g);
if (rv != 0) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 87eaa49609a0..9be395d9dc64 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 19
+#define MAX_NR_MAPS 20
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -86,6 +86,7 @@ struct bpf_test {
int fixup_map_array_small[MAX_FIXUPS];
int fixup_sk_storage_map[MAX_FIXUPS];
int fixup_map_event_output[MAX_FIXUPS];
+ int fixup_map_reuseport_array[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
uint32_t insn_processed;
@@ -113,6 +114,7 @@ struct bpf_test {
bpf_testdata_struct_t retvals[MAX_TEST_RUNS];
};
enum bpf_attach_type expected_attach_type;
+ const char *kfunc;
};
/* Note we want this to be 64 bit aligned so that the end of our array is
@@ -637,6 +639,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_map_array_small = test->fixup_map_array_small;
int *fixup_sk_storage_map = test->fixup_sk_storage_map;
int *fixup_map_event_output = test->fixup_map_event_output;
+ int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -806,12 +809,28 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
fixup_map_event_output++;
} while (*fixup_map_event_output);
}
+ if (*fixup_map_reuseport_array) {
+ map_fds[19] = __create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ sizeof(u32), sizeof(u64), 1, 0);
+ do {
+ prog[*fixup_map_reuseport_array].imm = map_fds[19];
+ fixup_map_reuseport_array++;
+ } while (*fixup_map_reuseport_array);
+ }
}
+struct libcap {
+ struct __user_cap_header_struct hdr;
+ struct __user_cap_data_struct data[2];
+};
+
static int set_admin(bool admin)
{
cap_t caps;
- const cap_value_t cap_val = CAP_SYS_ADMIN;
+ /* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
+ const cap_value_t cap_net_admin = CAP_NET_ADMIN;
+ const cap_value_t cap_sys_admin = CAP_SYS_ADMIN;
+ struct libcap *cap;
int ret = -1;
caps = cap_get_proc();
@@ -819,11 +838,26 @@ static int set_admin(bool admin)
perror("cap_get_proc");
return -1;
}
- if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+ cap = (struct libcap *)caps;
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) {
+ perror("cap_set_flag clear admin");
+ goto out;
+ }
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin,
admin ? CAP_SET : CAP_CLEAR)) {
- perror("cap_set_flag");
+ perror("cap_set_flag set_or_clear net");
goto out;
}
+ /* libcap is likely old and simply ignores CAP_BPF and CAP_PERFMON,
+ * so update effective bits manually
+ */
+ if (admin) {
+ cap->data[1].effective |= 1 << (38 /* CAP_PERFMON */ - 32);
+ cap->data[1].effective |= 1 << (39 /* CAP_BPF */ - 32);
+ } else {
+ cap->data[1].effective &= ~(1 << (38 - 32));
+ cap->data[1].effective &= ~(1 << (39 - 32));
+ }
if (cap_set_proc(caps)) {
perror("cap_set_proc");
goto out;
@@ -943,11 +977,32 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
attr.insns = prog;
attr.insns_cnt = prog_len;
attr.license = "GPL";
- attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4;
+ if (verbose)
+ attr.log_level = 1;
+ else if (expected_ret == VERBOSE_ACCEPT)
+ attr.log_level = 2;
+ else
+ attr.log_level = 4;
attr.prog_flags = pflags;
+ if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
+ attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
+ attr.expected_attach_type);
+ if (attr.attach_btf_id < 0) {
+ printf("FAIL\nFailed to find BTF ID for '%s'!\n",
+ test->kfunc);
+ (*errors)++;
+ return;
+ }
+ }
+
fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
- if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
+
+ /* BPF_PROG_TYPE_TRACING requires more setup and
+ * bpf_probe_prog_type won't give correct answer
+ */
+ if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
+ !bpf_probe_prog_type(prog_type, 0)) {
printf("SKIP (unsupported program type %d)\n", prog_type);
skips++;
goto close_fds;
@@ -1052,9 +1107,11 @@ fail_log:
static bool is_admin(void)
{
+ cap_flag_value_t net_priv = CAP_CLEAR;
+ bool perfmon_priv = false;
+ bool bpf_priv = false;
+ struct libcap *cap;
cap_t caps;
- cap_flag_value_t sysadmin = CAP_CLEAR;
- const cap_value_t cap_val = CAP_SYS_ADMIN;
#ifdef CAP_IS_SUPPORTED
if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
@@ -1067,11 +1124,14 @@ static bool is_admin(void)
perror("cap_get_proc");
return false;
}
- if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin))
- perror("cap_get_flag");
+ cap = (struct libcap *)caps;
+ bpf_priv = cap->data[1].effective & (1 << (39/* CAP_BPF */ - 32));
+ perfmon_priv = cap->data[1].effective & (1 << (38/* CAP_PERFMON */ - 32));
+ if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv))
+ perror("cap_get_flag NET");
if (cap_free(caps))
perror("cap_free");
- return (sysadmin == CAP_SET);
+ return bpf_priv && perfmon_priv && net_priv == CAP_SET;
}
static void get_unpriv_disabled()
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index c4b17e08d431..dd80f0c84afb 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -10,52 +10,72 @@
# | xdp forwarding |
# ------------------
-cleanup()
+ret=0
+
+setup()
{
- if [ "$?" = "0" ]; then
- echo "selftests: test_xdp_redirect [PASS]";
- else
- echo "selftests: test_xdp_redirect [FAILED]";
- fi
- set +e
+ local xdpmode=$1
+
+ ip netns add ns1
+ ip netns add ns2
+
+ ip link add veth1 index 111 type veth peer name veth11 netns ns1
+ ip link add veth2 index 222 type veth peer name veth22 netns ns2
+
+ ip link set veth1 up
+ ip link set veth2 up
+ ip -n ns1 link set dev veth11 up
+ ip -n ns2 link set dev veth22 up
+
+ ip -n ns1 addr add 10.1.1.11/24 dev veth11
+ ip -n ns2 addr add 10.1.1.22/24 dev veth22
+}
+
+cleanup()
+{
ip link del veth1 2> /dev/null
ip link del veth2 2> /dev/null
ip netns del ns1 2> /dev/null
ip netns del ns2 2> /dev/null
}
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
- exit 0
-fi
-set -e
-
-ip netns add ns1
-ip netns add ns2
+test_xdp_redirect()
+{
+ local xdpmode=$1
-trap cleanup 0 2 3 6 9
+ setup
-ip link add veth1 index 111 type veth peer name veth11
-ip link add veth2 index 222 type veth peer name veth22
+ ip link set dev veth1 $xdpmode off &> /dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: test_xdp_redirect $xdpmode [SKIP]"
+ return 0
+ fi
-ip link set veth11 netns ns1
-ip link set veth22 netns ns2
+ ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+ ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+ ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
+ ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
-ip link set veth1 up
-ip link set veth2 up
+ ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null
+ local ret1=$?
+ ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null
+ local ret2=$?
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+ if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then
+ echo "selftests: test_xdp_redirect $xdpmode [PASS]";
+ else
+ ret=1
+ echo "selftests: test_xdp_redirect $xdpmode [FAILED]";
+ fi
-ip netns exec ns1 ip link set dev veth11 up
-ip netns exec ns2 ip link set dev veth22 up
+ cleanup
+}
-ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
-ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+set -e
+trap cleanup 2 3 6 9
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
+test_xdp_redirect xdpgeneric
+test_xdp_redirect xdpdrv
-exit 0
+exit $ret
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
new file mode 100644
index 000000000000..800d503e5cb4
--- /dev/null
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook, Inc. */
+#include <stdlib.h>
+#include <errno.h>
+#include "testing_helpers.h"
+
+int parse_num_list(const char *s, bool **num_set, int *num_set_len)
+{
+ int i, set_len = 0, new_len, num, start = 0, end = -1;
+ bool *set = NULL, *tmp, parsing_end = false;
+ char *next;
+
+ while (s[0]) {
+ errno = 0;
+ num = strtol(s, &next, 10);
+ if (errno)
+ return -errno;
+
+ if (parsing_end)
+ end = num;
+ else
+ start = num;
+
+ if (!parsing_end && *next == '-') {
+ s = next + 1;
+ parsing_end = true;
+ continue;
+ } else if (*next == ',') {
+ parsing_end = false;
+ s = next + 1;
+ end = num;
+ } else if (*next == '\0') {
+ parsing_end = false;
+ s = next;
+ end = num;
+ } else {
+ return -EINVAL;
+ }
+
+ if (start > end)
+ return -EINVAL;
+
+ if (end + 1 > set_len) {
+ new_len = end + 1;
+ tmp = realloc(set, new_len);
+ if (!tmp) {
+ free(set);
+ return -ENOMEM;
+ }
+ for (i = set_len; i < start; i++)
+ tmp[i] = false;
+ set = tmp;
+ set_len = new_len;
+ }
+ for (i = start; i <= end; i++)
+ set[i] = true;
+ }
+
+ if (!set)
+ return -EINVAL;
+
+ *num_set = set;
+ *num_set_len = set_len;
+
+ return 0;
+}
+
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
+{
+ __u32 info_len = sizeof(*info);
+ int err;
+
+ memset(info, 0, sizeof(*info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len);
+ if (err) {
+ printf("failed to get link info: %d\n", -errno);
+ return 0;
+ }
+ return info->prog_id;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
new file mode 100644
index 000000000000..d4f8e749611b
--- /dev/null
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (C) 2020 Facebook, Inc. */
+#include <stdbool.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+int parse_num_list(const char *s, bool **set, int *set_len);
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 4d0e913bbb22..1bbd1d9830c8 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -90,6 +90,33 @@ long ksym_get_addr(const char *name)
return 0;
}
+/* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
+ * this is faster than load + find.
+ */
+int kallsyms_find(const char *sym, unsigned long long *addr)
+{
+ char type, name[500];
+ unsigned long long value;
+ int err = 0;
+ FILE *f;
+
+ f = fopen("/proc/kallsyms", "r");
+ if (!f)
+ return -EINVAL;
+
+ while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
+ if (strcmp(name, sym) == 0) {
+ *addr = value;
+ goto out;
+ }
+ }
+ err = -ENOENT;
+
+out:
+ fclose(f);
+ return err;
+}
+
void read_trace_pipe(void)
{
int trace_fd;
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 25ef597dd03f..f62fdef9e589 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -12,6 +12,10 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
+
+/* open kallsyms and find addresses on the fly, faster than load + search. */
+int kallsyms_find(const char *sym, unsigned long long *addr);
+
void read_trace_pipe(void);
#endif
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index e0fad1548737..ca8fdb1b3f01 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -15,7 +15,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -44,7 +44,23 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "check known subreg with unknown reg",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234),
+ /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */
+ BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0
+},
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index f3c33e128709..1c4b1939f5a8 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -117,7 +117,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -137,7 +137,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+ .errstr = "R0 unbounded memory access, make sure to bounds check any such access",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
diff --git a/tools/testing/selftests/bpf/verifier/basic.c b/tools/testing/selftests/bpf/verifier/basic.c
index b8d18642653a..de84f0d57082 100644
--- a/tools/testing/selftests/bpf/verifier/basic.c
+++ b/tools/testing/selftests/bpf/verifier/basic.c
@@ -2,7 +2,7 @@
"empty prog",
.insns = {
},
- .errstr = "unknown opcode 00",
+ .errstr = "last insn is not an exit or jmp",
.result = REJECT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 58f4aa593b1b..dac40de3f868 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -20,7 +20,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT,
},
{
@@ -146,7 +146,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
.result = REJECT
},
{
@@ -354,7 +354,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT
},
{
@@ -557,3 +557,149 @@
.result = ACCEPT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "bounds check for reg = 0, reg xor 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg32 = 0, reg32 xor 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 1),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg = 2, reg xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg = any, reg xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value",
+ .errstr_unpriv = "invalid access to map value",
+},
+{
+ "bounds check for reg32 = any, reg32 xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value",
+ .errstr_unpriv = "invalid access to map value",
+},
+{
+ "bounds check for reg > 0, reg xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+ BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
+{
+ "bounds check for reg32 > 0, reg32 xor 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP32_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+ BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+ BPF_JMP32_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 3 },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 2d752c4f8d9d..c4f5d909e58a 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -19,7 +19,7 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 1,
@@ -105,7 +105,7 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_hash_8b = { 16 },
.result = REJECT,
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
},
{
"calls: overlapping caller/callee",
@@ -315,7 +315,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "allowed for root only",
+ .errstr_unpriv = "allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = POINTER_VALUE,
@@ -346,7 +346,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "allowed for root only",
+ .errstr_unpriv = "allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
@@ -397,7 +397,7 @@
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.fixup_map_hash_48b = { 3 },
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -647,13 +647,14 @@
.result = REJECT,
},
{
- "calls: ld_abs with changing ctx data in callee",
+ "calls: subprog call with ld_abs in main prog",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LD_ABS(BPF_B, 0),
BPF_LD_ABS(BPF_H, 0),
BPF_LD_ABS(BPF_W, 0),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
BPF_LD_ABS(BPF_B, 0),
@@ -666,8 +667,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
- .result = REJECT,
+ .result = ACCEPT,
},
{
"calls: two calls with bad fallthrough",
@@ -1064,7 +1064,7 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "allowed for root only",
+ .errstr_unpriv = "allowed for",
.result_unpriv = REJECT,
.errstr = "R0 !read_ok",
.result = REJECT,
@@ -1977,7 +1977,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
},
@@ -2003,7 +2003,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
@@ -2028,7 +2028,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c
index 84446dfc7c1d..6c214c58e8d4 100644
--- a/tools/testing/selftests/bpf/verifier/const_or.c
+++ b/tools/testing/selftests/bpf/verifier/const_or.c
@@ -6,7 +6,7 @@
BPF_MOV64_IMM(BPF_REG_2, 34),
BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
@@ -20,7 +20,7 @@
BPF_MOV64_IMM(BPF_REG_2, 34),
BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "invalid stack type R1 off=-48 access_size=58",
@@ -36,7 +36,7 @@
BPF_MOV64_IMM(BPF_REG_4, 13),
BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
@@ -51,7 +51,7 @@
BPF_MOV64_IMM(BPF_REG_4, 24),
BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "invalid stack type R1 off=-48 access_size=58",
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
new file mode 100644
index 000000000000..2ad5f974451c
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -0,0 +1,492 @@
+{
+ "valid 1,2,4,8-byte reads from bpf_sk_lookup",
+ .insns = {
+ /* 1-byte read from family field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 3),
+ /* 2-byte read from family field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 2),
+ /* 4-byte read from family field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+
+ /* 1-byte read from protocol field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 3),
+ /* 2-byte read from protocol field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 2),
+ /* 4-byte read from protocol field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+
+ /* 1-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 3),
+ /* 2-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+ /* 4-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+
+ /* 1-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 13),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 15),
+ /* 2-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+ /* 4-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+
+ /* 1-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 3),
+ /* 2-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 2),
+ /* 4-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+
+ /* 1-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 3),
+ /* 2-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+ /* 4-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+
+ /* 1-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 13),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 15),
+ /* 2-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+ /* 4-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+
+ /* 1-byte read from local_port field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 3),
+ /* 2-byte read from local_port field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 2),
+ /* 4-byte read from local_port field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+
+ /* 8-byte read from sk field */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
+{
+ "invalid 8-byte read from bpf_sk_lookup family field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup protocol field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_ip4 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_ip6 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_port field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_ip4 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_ip6 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_port field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
+{
+ "invalid 4-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 2-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 1-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* out of bounds and unaligned reads from bpf_sk_lookup */
+{
+ "invalid 4-byte read past end of bpf_sk_lookup",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ sizeof(struct bpf_sk_lookup)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte unaligned read from bpf_sk_lookup at odd offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte unaligned read from bpf_sk_lookup at even offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* in-bound and out-of-bound writes to bpf_sk_lookup */
+{
+ "invalid 8-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 2-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 1-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte write past end of bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ sizeof(struct bpf_sk_lookup)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
diff --git a/tools/testing/selftests/bpf/verifier/d_path.c b/tools/testing/selftests/bpf/verifier/d_path.c
new file mode 100644
index 000000000000..b988396379a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/d_path.c
@@ -0,0 +1,37 @@
+{
+ "d_path accept",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+ BPF_LD_IMM64(BPF_REG_3, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "dentry_open",
+},
+{
+ "d_path reject",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+ BPF_LD_IMM64(BPF_REG_3, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "helper call is not allowed in probe",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "d_path",
+},
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 50a8a63be4ac..5cf361d8eb1c 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -85,7 +85,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -103,7 +103,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -121,7 +121,7 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -137,7 +137,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
@@ -152,7 +152,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index 2c5fbe7bcd27..ae72536603fe 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -529,7 +529,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
+ .errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)",
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index b9fb28e8e224..988f46a1a4c7 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -68,7 +68,7 @@
},
.fixup_map_array_48b = { 1 },
.result = REJECT,
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
},
{
"direct map access, write test 7",
@@ -220,7 +220,7 @@
},
.fixup_map_array_small = { 1 },
.result = REJECT,
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
},
{
"direct map access, write test 19",
diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c
index 130553e19eca..c5e805980409 100644
--- a/tools/testing/selftests/bpf/verifier/event_output.c
+++ b/tools/testing/selftests/bpf/verifier/event_output.c
@@ -92,3 +92,28 @@
.result = ACCEPT,
.retval = 1,
},
+{
+ "perfevent for cgroup dev",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_DEVICE,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for cgroup sysctl",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for cgroup sockopt",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 67ab12410050..87c4e7900083 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -19,7 +19,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -36,7 +36,7 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "invalid indirect read from stack off -64+0 size 64",
@@ -55,7 +55,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -84,7 +84,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -112,7 +112,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -132,7 +132,7 @@
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -152,7 +152,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -171,7 +171,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -190,7 +190,7 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -208,7 +208,7 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -233,7 +233,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -259,7 +259,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -286,7 +286,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -313,12 +313,12 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 4 },
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -468,7 +468,7 @@
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "R1 type=inv expected=fp",
@@ -481,7 +481,7 @@
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_2, 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "R1 type=inv expected=fp",
@@ -495,7 +495,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
@@ -513,7 +513,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
@@ -534,7 +534,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
@@ -554,7 +554,7 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
@@ -580,7 +580,7 @@
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
BPF_EXIT_INSN(),
},
@@ -607,7 +607,7 @@
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
BPF_EXIT_INSN(),
},
diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c
index 7572e403ddb9..1c7882ddfa63 100644
--- a/tools/testing/selftests/bpf/verifier/helper_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/helper_value_access.c
@@ -10,7 +10,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -29,7 +29,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -67,7 +67,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -87,7 +87,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -109,7 +109,7 @@
BPF_MOV64_IMM(BPF_REG_2,
sizeof(struct test_val) - offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -129,7 +129,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_2, 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -170,7 +170,7 @@
BPF_MOV64_IMM(BPF_REG_2,
sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -191,7 +191,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_2, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -212,7 +212,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_2, -1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -235,7 +235,7 @@
BPF_MOV64_IMM(BPF_REG_2,
sizeof(struct test_val) - offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -256,7 +256,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -280,7 +280,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -300,7 +300,7 @@
sizeof(struct test_val) -
offsetof(struct test_val, foo) + 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -322,7 +322,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -344,7 +344,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, -1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -368,7 +368,7 @@
BPF_MOV64_IMM(BPF_REG_2,
sizeof(struct test_val) - offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -390,7 +390,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -415,7 +415,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -433,7 +433,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -458,7 +458,7 @@
sizeof(struct test_val) -
offsetof(struct test_val, foo) + 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -926,7 +926,7 @@
},
.fixup_map_hash_16b = { 3, 10 },
.result = REJECT,
- .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+ .errstr = "R2 unbounded memory access, make sure to bounds check any such access",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c
index 3856dba733e9..f9297900cea6 100644
--- a/tools/testing/selftests/bpf/verifier/ld_imm64.c
+++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c
@@ -51,14 +51,6 @@
.result = REJECT,
},
{
- "test5 ld_imm64",
- .insns = {
- BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
- },
- .errstr = "invalid bpf_ld_imm64 insn",
- .result = REJECT,
-},
-{
"test6 ld_imm64",
.insns = {
BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
new file mode 100644
index 000000000000..637f9293bda8
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -0,0 +1,94 @@
+{
+ "bpf_map_ptr: read with negative offset rejected",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = REJECT,
+ .errstr = "R1 is bpf_array invalid negative access: off=-8",
+},
+{
+ "bpf_map_ptr: write rejected",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = REJECT,
+ .errstr = "only read from bpf_array is supported",
+},
+{
+ "bpf_map_ptr: read non-existent field rejected",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = REJECT,
+ .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
+},
+{
+ "bpf_map_ptr: read ops field accepted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "bpf_map_ptr: r = 0, map_ptr = map_ptr + r",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 4 },
+ .result = ACCEPT,
+},
+{
+ "bpf_map_ptr: r = 0, r = r + map_ptr",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 4 },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
index cd26ee6b7b1d..1f2b8c4cb26d 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
@@ -56,7 +56,7 @@
.fixup_map_in_map = { 16 },
.fixup_map_array_48b = { 13 },
.result = REJECT,
- .errstr = "R0 invalid mem access 'map_ptr'",
+ .errstr = "only read from bpf_array is supported",
},
{
"cond: two branches returning different map pointers for lookup (tail, tail)",
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 02151f8c940f..6dc8003ffc70 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -31,14 +31,14 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
.fixup_map_array_48b = { 1 },
.result = VERBOSE_ACCEPT,
.errstr =
- "26: (85) call bpf_probe_read#4\
+ "26: (85) call bpf_probe_read_kernel#113\
last_idx 26 first_idx 20\
regs=4 stack=0 before 25\
regs=4 stack=0 before 24\
@@ -91,7 +91,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -99,7 +99,7 @@
.result = VERBOSE_ACCEPT,
.flags = BPF_F_TEST_STATE_FREQ,
.errstr =
- "26: (85) call bpf_probe_read#4\
+ "26: (85) call bpf_probe_read_kernel#113\
last_idx 26 first_idx 22\
regs=4 stack=0 before 25\
regs=4 stack=0 before 24\
diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
index da7a4b37cb98..fc4e301260f6 100644
--- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
@@ -1,34 +1,4 @@
{
- "prevent map lookup in sockmap",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 3 },
- .result = REJECT,
- .errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem",
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
- "prevent map lookup in sockhash",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockhash = { 3 },
- .result = REJECT,
- .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
"prevent map lookup in stack trace",
.insns = {
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 604b46151736..006b5bd99c08 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -821,3 +821,83 @@
.result = REJECT,
.errstr = "invalid mem access",
},
+{
+ "reference tracking: branch tracking valid pointer null comparison",
+ .insns = {
+ BPF_SK_LOOKUP(sk_lookup_tcp),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+},
+{
+ "reference tracking: branch tracking valid pointer value comparison",
+ .insns = {
+ BPF_SK_LOOKUP(sk_lookup_tcp),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 1234, 2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+},
+{
+ "reference tracking: bpf_sk_release(btf_tcp_sock)",
+ .insns = {
+ BPF_SK_LOOKUP(sk_lookup_tcp),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "unknown func",
+},
+{
+ "reference tracking: use ptr from bpf_skc_to_tcp_sock() after release",
+ .insns = {
+ BPF_SK_LOOKUP(sk_lookup_tcp),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid mem access",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "unknown func",
+},
diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c
new file mode 100644
index 000000000000..4ad7e05de706
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/regalloc.c
@@ -0,0 +1,269 @@
+{
+ "regalloc basic",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc negative",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=48 size=1",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc src_reg mark",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc src_reg negative",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=44 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc and spill",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7),
+ /* r0 has upper bound that should propagate into r2 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
+ /* r3 has lower and upper bounds */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc and spill negative",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7),
+ /* r0 has upper bound that should propagate into r2 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
+ /* r3 has lower and upper bounds */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=48 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc three regs",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc after call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc in callee",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+ "regalloc, spill, JEQ",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), /* spill r0 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+ /* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 20, 0),
+ /* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 with map_value */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), /* skip ldx if map_value == NULL */
+ /* Buggy verifier will think that r3 == 20 here */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), /* read from map_value */
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_48b = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 9ed192e14f5f..ce13ece08d51 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -222,7 +222,7 @@
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, rx_queue_mapping)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -516,3 +516,143 @@
.prog_type = BPF_PROG_TYPE_XDP,
.result = ACCEPT,
},
+{
+ "bpf_map_lookup_elem(sockmap, &key)",
+ .insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockmap = { 3 },
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .result = REJECT,
+ .errstr = "Unreleased reference id=2 alloc_insn=5",
+},
+{
+ "bpf_map_lookup_elem(sockhash, &key)",
+ .insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockhash = { 3 },
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .result = REJECT,
+ .errstr = "Unreleased reference id=2 alloc_insn=5",
+},
+{
+ "bpf_map_lookup_elem(sockmap, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
+ .insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockmap = { 3 },
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .result = ACCEPT,
+},
+{
+ "bpf_map_lookup_elem(sockhash, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
+ .insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockhash = { 3 },
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .result = ACCEPT,
+},
+{
+ "bpf_sk_select_reuseport(ctx, reuseport_array, &key, flags)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_reuseport_array = { 4 },
+ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+ .result = ACCEPT,
+},
+{
+ "bpf_sk_select_reuseport(ctx, sockmap, &key, flags)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockmap = { 4 },
+ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+ .result = ACCEPT,
+},
+{
+ "bpf_sk_select_reuseport(ctx, sockhash, &key, flags)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockmap = { 4 },
+ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+ .result = ACCEPT,
+},
+{
+ "mark null check on return value of bpf_skc_to helpers",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_request_sock),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid mem access",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "unknown func",
+},
diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c
index 860d4a71cd83..3ecb70a3d939 100644
--- a/tools/testing/selftests/bpf/verifier/value_or_null.c
+++ b/tools/testing/selftests/bpf/verifier/value_or_null.c
@@ -150,3 +150,22 @@
.result_unpriv = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "map lookup and null branch prediction",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_10, 10),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 4 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index a53d99cebd9f..ed4e76b24649 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -50,7 +50,7 @@
.fixup_map_array_48b = { 8 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R0 min value is outside of the array range",
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
.retval = 1,
},
{
@@ -325,7 +325,7 @@
},
.fixup_map_array_48b = { 3 },
.result = REJECT,
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
.result_unpriv = REJECT,
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
},
@@ -601,7 +601,7 @@
},
.fixup_map_array_48b = { 3 },
.result = REJECT,
- .errstr = "R1 max value is outside of the array range",
+ .errstr = "R1 max value is outside of the allowed memory range",
.errstr_unpriv = "R1 pointer arithmetic of map value goes out of range",
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -726,7 +726,7 @@
},
.fixup_map_array_48b = { 3 },
.result = REJECT,
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
},
{
"map access: value_ptr -= known scalar, 2",
@@ -836,3 +836,41 @@
.errstr = "R0 invalid mem access 'inv'",
.errstr_unpriv = "R0 pointer -= pointer prohibited",
},
+{
+ "32bit pkt_ptr -= scalar",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+ BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "32bit scalar -= pkt_ptr",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+ BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index b3ead29c6089..2cf6f10ab7c4 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -47,7 +47,7 @@ void child(int cpu)
_exit(0);
}
-bool run_test(int cpu)
+int run_test(int cpu)
{
int status;
pid_t pid = fork();
@@ -55,7 +55,7 @@ bool run_test(int cpu)
if (pid < 0) {
ksft_print_msg("fork() failed: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (pid == 0)
child(cpu);
@@ -63,67 +63,68 @@ bool run_test(int cpu)
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (!WIFSTOPPED(status)) {
ksft_print_msg("child did not stop: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (WSTOPSIG(status) != SIGSTOP) {
ksft_print_msg("child did not stop with SIGSTOP: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) {
if (errno == EIO) {
- ksft_exit_skip(
+ ksft_print_msg(
"ptrace(PTRACE_SINGLESTEP) not supported on this architecture: %s\n",
strerror(errno));
+ return KSFT_SKIP;
}
ksft_print_msg("ptrace(PTRACE_SINGLESTEP) failed: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
ksft_print_msg("waitpid() failed: $s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (WIFEXITED(status)) {
ksft_print_msg("child did not single-step: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (!WIFSTOPPED(status)) {
ksft_print_msg("child did not stop: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (WSTOPSIG(status) != SIGTRAP) {
ksft_print_msg("child did not stop with SIGTRAP: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
ksft_print_msg("ptrace(PTRACE_CONT) failed: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (!WIFEXITED(status)) {
ksft_print_msg("child did not exit after PTRACE_CONT: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
- return true;
+ return KSFT_PASS;
}
void suspend(void)
@@ -183,32 +184,38 @@ int main(int argc, char **argv)
}
}
+ err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
+ if (err < 0)
+ ksft_exit_fail_msg("sched_getaffinity() failed\n");
+
for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
if (!CPU_ISSET(cpu, &available_cpus))
continue;
tests++;
}
- ksft_set_plan(tests);
if (do_suspend)
suspend();
- err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
- if (err < 0)
- ksft_exit_fail_msg("sched_getaffinity() failed\n");
-
+ ksft_set_plan(tests);
for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
- bool test_success;
+ int test_success;
if (!CPU_ISSET(cpu, &available_cpus))
continue;
test_success = run_test(cpu);
- if (test_success) {
+ switch (test_success) {
+ case KSFT_PASS:
ksft_test_result_pass("CPU %d\n", cpu);
- } else {
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("CPU %d\n", cpu);
+ break;
+ case KSFT_FAIL:
ksft_test_result_fail("CPU %d\n", cpu);
succeeded = false;
+ break;
}
}
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index aa6de65b0838..84cfcabea838 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -2,3 +2,4 @@
test_memcontrol
test_core
test_freezer
+test_kmem \ No newline at end of file
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 967f268fde74..f027d933595b 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -6,11 +6,13 @@ all:
TEST_FILES := with_stress.sh
TEST_PROGS := test_stress.sh
TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_kmem
TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
include ../lib.mk
$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 8a637ca7d73a..05853b0b8831 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -106,7 +106,7 @@ int cg_read_strcmp(const char *cgroup, const char *control,
/* Handle the case of comparing against empty string */
if (!expected)
- size = 32;
+ return -1;
else
size = strlen(expected) + 1;
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
new file mode 100644
index 000000000000..0941aa16157e
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+
+/*
+ * Memory cgroup charging and vmstat data aggregation is performed using
+ * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum
+ * discrepancy between charge and vmstat entries is number of cpus multiplied
+ * by 32 pages multiplied by 2.
+ */
+#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs())
+
+
+static int alloc_dcache(const char *cgroup, void *arg)
+{
+ unsigned long i;
+ struct stat st;
+ char buf[128];
+
+ for (i = 0; i < (unsigned long)arg; i++) {
+ snprintf(buf, sizeof(buf),
+ "/something-non-existent-with-a-long-name-%64lu-%d",
+ i, getpid());
+ stat(buf, &st);
+ }
+
+ return 0;
+}
+
+/*
+ * This test allocates 100000 of negative dentries with long names.
+ * Then it checks that "slab" in memory.stat is larger than 1M.
+ * Then it sets memory.high to 1M and checks that at least 1/2
+ * of slab memory has been reclaimed.
+ */
+static int test_kmem_basic(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg = NULL;
+ long slab0, slab1, current;
+
+ cg = cg_name(root, "kmem_basic_test");
+ if (!cg)
+ goto cleanup;
+
+ if (cg_create(cg))
+ goto cleanup;
+
+ if (cg_run(cg, alloc_dcache, (void *)100000))
+ goto cleanup;
+
+ slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
+ if (slab0 < (1 << 20))
+ goto cleanup;
+
+ cg_write(cg, "memory.high", "1M");
+ slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
+ if (slab1 <= 0)
+ goto cleanup;
+
+ current = cg_read_long(cg, "memory.current");
+ if (current <= 0)
+ goto cleanup;
+
+ if (slab1 < slab0 / 2 && current < slab0 / 2)
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(cg);
+ free(cg);
+
+ return ret;
+}
+
+static void *alloc_kmem_fn(void *arg)
+{
+ alloc_dcache(NULL, (void *)100);
+ return NULL;
+}
+
+static int alloc_kmem_smp(const char *cgroup, void *arg)
+{
+ int nr_threads = 2 * get_nprocs();
+ pthread_t *tinfo;
+ unsigned long i;
+ int ret = -1;
+
+ tinfo = calloc(nr_threads, sizeof(pthread_t));
+ if (tinfo == NULL)
+ return -1;
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
+ (void *)i)) {
+ free(tinfo);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < nr_threads; i++) {
+ ret = pthread_join(tinfo[i], NULL);
+ if (ret)
+ break;
+ }
+
+ free(tinfo);
+ return ret;
+}
+
+static int cg_run_in_subcgroups(const char *parent,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg, int times)
+{
+ char *child;
+ int i;
+
+ for (i = 0; i < times; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ if (!child)
+ return -1;
+
+ if (cg_create(child)) {
+ cg_destroy(child);
+ free(child);
+ return -1;
+ }
+
+ if (cg_run(child, fn, NULL)) {
+ cg_destroy(child);
+ free(child);
+ return -1;
+ }
+
+ cg_destroy(child);
+ free(child);
+ }
+
+ return 0;
+}
+
+/*
+ * The test creates and destroys a large number of cgroups. In each cgroup it
+ * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
+ * threads. Then it checks the sanity of numbers on the parent level:
+ * the total size of the cgroups should be roughly equal to
+ * anon + file + slab + kernel_stack.
+ */
+static int test_kmem_memcg_deletion(const char *root)
+{
+ long current, slab, anon, file, kernel_stack, sum;
+ int ret = KSFT_FAIL;
+ char *parent;
+
+ parent = cg_name(root, "kmem_memcg_deletion_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
+ goto cleanup;
+
+ current = cg_read_long(parent, "memory.current");
+ slab = cg_read_key_long(parent, "memory.stat", "slab ");
+ anon = cg_read_key_long(parent, "memory.stat", "anon ");
+ file = cg_read_key_long(parent, "memory.stat", "file ");
+ kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
+ if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
+ kernel_stack < 0)
+ goto cleanup;
+
+ sum = slab + anon + file + kernel_stack;
+ if (abs(sum - current) < MAX_VMSTAT_ERROR) {
+ ret = KSFT_PASS;
+ } else {
+ printf("memory.current = %ld\n", current);
+ printf("slab + anon + file + kernel_stack = %ld\n", sum);
+ printf("slab = %ld\n", slab);
+ printf("anon = %ld\n", anon);
+ printf("file = %ld\n", file);
+ printf("kernel_stack = %ld\n", kernel_stack);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * The test reads the entire /proc/kpagecgroup. If the operation went
+ * successfully (and the kernel didn't panic), the test is treated as passed.
+ */
+static int test_kmem_proc_kpagecgroup(const char *root)
+{
+ unsigned long buf[128];
+ int ret = KSFT_FAIL;
+ ssize_t len;
+ int fd;
+
+ fd = open("/proc/kpagecgroup", O_RDONLY);
+ if (fd < 0)
+ return ret;
+
+ do {
+ len = read(fd, buf, sizeof(buf));
+ } while (len > 0);
+
+ if (len == 0)
+ ret = KSFT_PASS;
+
+ close(fd);
+ return ret;
+}
+
+static void *pthread_wait_fn(void *arg)
+{
+ sleep(100);
+ return NULL;
+}
+
+static int spawn_1000_threads(const char *cgroup, void *arg)
+{
+ int nr_threads = 1000;
+ pthread_t *tinfo;
+ unsigned long i;
+ long stack;
+ int ret = -1;
+
+ tinfo = calloc(nr_threads, sizeof(pthread_t));
+ if (tinfo == NULL)
+ return -1;
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
+ (void *)i)) {
+ free(tinfo);
+ return(-1);
+ }
+ }
+
+ stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
+ if (stack >= 4096 * 1000)
+ ret = 0;
+
+ free(tinfo);
+ return ret;
+}
+
+/*
+ * The test spawns a process, which spawns 1000 threads. Then it checks
+ * that memory.stat's kernel_stack is at least 1000 pages large.
+ */
+static int test_kmem_kernel_stacks(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg = NULL;
+
+ cg = cg_name(root, "kmem_kernel_stacks_test");
+ if (!cg)
+ goto cleanup;
+
+ if (cg_create(cg))
+ goto cleanup;
+
+ if (cg_run(cg, spawn_1000_threads, NULL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(cg);
+ free(cg);
+
+ return ret;
+}
+
+/*
+ * This test sequentionally creates 30 child cgroups, allocates some
+ * kernel memory in each of them, and deletes them. Then it checks
+ * that the number of dying cgroups on the parent level is 0.
+ */
+static int test_kmem_dead_cgroups(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent;
+ long dead;
+ int i;
+
+ parent = cg_name(root, "kmem_dead_cgroups_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
+ goto cleanup;
+
+ for (i = 0; i < 5; i++) {
+ dead = cg_read_key_long(parent, "cgroup.stat",
+ "nr_dying_descendants ");
+ if (dead == 0) {
+ ret = KSFT_PASS;
+ break;
+ }
+ /*
+ * Reclaiming cgroups might take some time,
+ * let's wait a bit and repeat.
+ */
+ sleep(1);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test creates a sub-tree with 1000 memory cgroups.
+ * Then it checks that the memory.current on the parent level
+ * is greater than 0 and approximates matches the percpu value
+ * from memory.stat.
+ */
+static int test_percpu_basic(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+ long current, percpu;
+ int i;
+
+ parent = cg_name(root, "percpu_basic_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ for (i = 0; i < 1000; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ if (!child)
+ return -1;
+
+ if (cg_create(child))
+ goto cleanup_children;
+
+ free(child);
+ }
+
+ current = cg_read_long(parent, "memory.current");
+ percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
+
+ if (current > 0 && percpu > 0 && abs(current - percpu) <
+ MAX_VMSTAT_ERROR)
+ ret = KSFT_PASS;
+ else
+ printf("memory.current %ld\npercpu %ld\n",
+ current, percpu);
+
+cleanup_children:
+ for (i = 0; i < 1000; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ cg_destroy(child);
+ free(child);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct kmem_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_kmem_basic),
+ T(test_kmem_memcg_deletion),
+ T(test_kmem_proc_kpagecgroup),
+ T(test_kmem_kernel_stacks),
+ T(test_kmem_dead_cgroups),
+ T(test_percpu_basic),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore
index a81085742d40..83c0f6246055 100644
--- a/tools/testing/selftests/clone3/.gitignore
+++ b/tools/testing/selftests/clone3/.gitignore
@@ -2,3 +2,4 @@
clone3
clone3_clear_sighand
clone3_set_tid
+clone3_cap_checkpoint_restore
diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile
index cf976c732906..ef7564cb7abe 100644
--- a/tools/testing/selftests/clone3/Makefile
+++ b/tools/testing/selftests/clone3/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -g -I../../../../usr/include/
+LDLIBS += -lcap
-TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid
+TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid \
+ clone3_cap_checkpoint_restore
include ../lib.mk
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index f14c269a5a18..42be3b925830 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -20,13 +20,6 @@
#include "../kselftest.h"
#include "clone3_selftests.h"
-/*
- * Different sizes of struct clone_args
- */
-#ifndef CLONE3_ARGS_SIZE_V0
-#define CLONE3_ARGS_SIZE_V0 64
-#endif
-
enum test_mode {
CLONE3_ARGS_NO_TEST,
CLONE3_ARGS_ALL_0,
@@ -38,13 +31,13 @@ enum test_mode {
static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
{
- struct clone_args args = {
+ struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
};
struct clone_args_extended {
- struct clone_args args;
+ struct __clone_args args;
__aligned_u64 excess_space[2];
} args_ext;
@@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
int status;
memset(&args_ext, 0, sizeof(args_ext));
- if (size > sizeof(struct clone_args))
+ if (size > sizeof(struct __clone_args))
args_ext.excess_space[1] = 1;
if (size == 0)
- size = sizeof(struct clone_args);
+ size = sizeof(struct __clone_args);
switch (test_mode) {
case CLONE3_ARGS_ALL_0:
@@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
break;
}
- memcpy(&args_ext.args, &args, sizeof(struct clone_args));
+ memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
- pid = sys_clone3((struct clone_args *)&args_ext, size);
+ pid = sys_clone3((struct __clone_args *)&args_ext, size);
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));
@@ -131,9 +124,9 @@ int main(int argc, char *argv[])
uid_t uid = getuid();
- test_clone3_supported();
ksft_print_header();
ksft_set_plan(17);
+ test_clone3_supported();
/* Just a simple clone3() should return 0.*/
test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST);
@@ -144,14 +137,14 @@ int main(int argc, char *argv[])
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */
- test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST);
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
+ test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */
- test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
+ test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with sizeof(struct clone_args) + 8 */
- test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+ test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with exit_signal having highest 32 bits non-zero */
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
@@ -165,31 +158,31 @@ int main(int argc, char *argv[])
/* Do a clone3() with NSIG < exit_signal < CSIG */
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
- test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+ test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
- test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG,
+ test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
CLONE3_ARGS_ALL_0);
- test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG,
+ test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
CLONE3_ARGS_ALL_0);
/* Do a clone3() with > page size */
test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
if (uid == 0)
- test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0,
+ test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
CLONE3_ARGS_NO_TEST);
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */
- test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL,
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
+ test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
CLONE3_ARGS_NO_TEST);
/* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
if (uid == 0)
- test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0,
+ test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
CLONE3_ARGS_NO_TEST);
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
new file mode 100644
index 000000000000..55bd387ce7ec
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Based on Christian Brauner's clone3() example.
+ * These tests are assuming to be running in the host's
+ * PID namespace.
+ */
+
+/* capabilities related code based on selftests/bpf/test_verifier.c */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/capability.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include "../kselftest_harness.h"
+#include "clone3_selftests.h"
+
+#ifndef MAX_PID_NS_LEVEL
+#define MAX_PID_NS_LEVEL 32
+#endif
+
+static void child_exit(int ret)
+{
+ fflush(stdout);
+ fflush(stderr);
+ _exit(ret);
+}
+
+static int call_clone3_set_tid(struct __test_metadata *_metadata,
+ pid_t *set_tid, size_t set_tid_size)
+{
+ int status;
+ pid_t pid = -1;
+
+ struct __clone_args args = {
+ .exit_signal = SIGCHLD,
+ .set_tid = ptr_to_u64(set_tid),
+ .set_tid_size = set_tid_size,
+ };
+
+ pid = sys_clone3(&args, sizeof(args));
+ if (pid < 0) {
+ TH_LOG("%s - Failed to create new process", strerror(errno));
+ return -errno;
+ }
+
+ if (pid == 0) {
+ int ret;
+ char tmp = 0;
+
+ TH_LOG("I am the child, my PID is %d (expected %d)", getpid(), set_tid[0]);
+
+ if (set_tid[0] != getpid())
+ child_exit(EXIT_FAILURE);
+ child_exit(EXIT_SUCCESS);
+ }
+
+ TH_LOG("I am the parent (%d). My child's pid is %d", getpid(), pid);
+
+ if (waitpid(pid, &status, 0) < 0) {
+ TH_LOG("Child returned %s", strerror(errno));
+ return -errno;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int test_clone3_set_tid(struct __test_metadata *_metadata,
+ pid_t *set_tid, size_t set_tid_size)
+{
+ int ret;
+
+ TH_LOG("[%d] Trying clone3() with CLONE_SET_TID to %d", getpid(), set_tid[0]);
+ ret = call_clone3_set_tid(_metadata, set_tid, set_tid_size);
+ TH_LOG("[%d] clone3() with CLONE_SET_TID %d says:%d", getpid(), set_tid[0], ret);
+ return ret;
+}
+
+struct libcap {
+ struct __user_cap_header_struct hdr;
+ struct __user_cap_data_struct data[2];
+};
+
+static int set_capability(void)
+{
+ cap_value_t cap_values[] = { CAP_SETUID, CAP_SETGID };
+ struct libcap *cap;
+ int ret = -1;
+ cap_t caps;
+
+ caps = cap_get_proc();
+ if (!caps) {
+ perror("cap_get_proc");
+ return -1;
+ }
+
+ /* Drop all capabilities */
+ if (cap_clear(caps)) {
+ perror("cap_clear");
+ goto out;
+ }
+
+ cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_values, CAP_SET);
+ cap_set_flag(caps, CAP_PERMITTED, 2, cap_values, CAP_SET);
+
+ cap = (struct libcap *) caps;
+
+ /* 40 -> CAP_CHECKPOINT_RESTORE */
+ cap->data[1].effective |= 1 << (40 - 32);
+ cap->data[1].permitted |= 1 << (40 - 32);
+
+ if (cap_set_proc(caps)) {
+ perror("cap_set_proc");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (cap_free(caps))
+ perror("cap_free");
+ return ret;
+}
+
+TEST(clone3_cap_checkpoint_restore)
+{
+ pid_t pid;
+ int status;
+ int ret = 0;
+ pid_t set_tid[1];
+
+ test_clone3_supported();
+
+ EXPECT_EQ(getuid(), 0)
+ XFAIL(return, "Skipping all tests as non-root\n");
+
+ memset(&set_tid, 0, sizeof(set_tid));
+
+ /* Find the current active PID */
+ pid = fork();
+ if (pid == 0) {
+ TH_LOG("Child has PID %d", getpid());
+ child_exit(EXIT_SUCCESS);
+ }
+ ASSERT_GT(waitpid(pid, &status, 0), 0)
+ TH_LOG("Waiting for child %d failed", pid);
+
+ /* After the child has finished, its PID should be free. */
+ set_tid[0] = pid;
+
+ ASSERT_EQ(set_capability(), 0)
+ TH_LOG("Could not set CAP_CHECKPOINT_RESTORE");
+
+ ASSERT_EQ(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), 0);
+
+ EXPECT_EQ(setgid(65534), 0)
+ TH_LOG("Failed to setgid(65534)");
+ ASSERT_EQ(setuid(65534), 0);
+
+ set_tid[0] = pid;
+ /* This would fail without CAP_CHECKPOINT_RESTORE */
+ ASSERT_EQ(test_clone3_set_tid(_metadata, set_tid, 1), -EPERM);
+ ASSERT_EQ(set_capability(), 0)
+ TH_LOG("Could not set CAP_CHECKPOINT_RESTORE");
+ /* This should work as we have CAP_CHECKPOINT_RESTORE as non-root */
+ ASSERT_EQ(test_clone3_set_tid(_metadata, set_tid, 1), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index 9e1af8aa7698..47a8c0fc3676 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void)
{
int ret;
pid_t pid;
- struct clone_args args = {};
+ struct __clone_args args = {};
struct sigaction act;
/*
@@ -119,9 +119,8 @@ static void test_clone3_clear_sighand(void)
int main(int argc, char **argv)
{
ksft_print_header();
- test_clone3_supported();
-
ksft_set_plan(1);
+ test_clone3_supported();
test_clone3_clear_sighand();
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index 91c1a78ddb39..e81ffaaee02b 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -19,13 +19,11 @@
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
#endif
-#ifndef CLONE_ARGS_SIZE_VER0
-#define CLONE_ARGS_SIZE_VER0 64
-#endif
-
#ifndef __NR_clone3
#define __NR_clone3 -1
-struct clone_args {
+#endif
+
+struct __clone_args {
__aligned_u64 flags;
__aligned_u64 pidfd;
__aligned_u64 child_tid;
@@ -34,15 +32,21 @@ struct clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
-#define CLONE_ARGS_SIZE_VER1 80
+#ifndef CLONE_ARGS_SIZE_VER0
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
+#endif
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
-#define CLONE_ARGS_SIZE_VER2 88
+#ifndef CLONE_ARGS_SIZE_VER1
+#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
+#endif
__aligned_u64 cgroup;
+#ifndef CLONE_ARGS_SIZE_VER2
+#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
+#endif
};
-#endif /* __NR_clone3 */
-static pid_t sys_clone3(struct clone_args *args, size_t size)
+static pid_t sys_clone3(struct __clone_args *args, size_t size)
{
fflush(stdout);
fflush(stderr);
@@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size)
static inline void test_clone3_supported(void)
{
pid_t pid;
- struct clone_args args = {};
+ struct __clone_args args = {};
if (__NR_clone3 < 0)
ksft_exit_skip("clone3() syscall is not supported\n");
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 25beb22f35b5..0229e9ebb995 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid,
int status;
pid_t pid = -1;
- struct clone_args args = {
+ struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
.set_tid = ptr_to_u64(set_tid),
.set_tid_size = set_tid_size,
};
- pid = sys_clone3(&args, sizeof(struct clone_args));
+ pid = sys_clone3(&args, sizeof(args));
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));
@@ -157,8 +157,8 @@ int main(int argc, char *argv[])
pid_t set_tid[MAX_PID_NS_LEVEL * 2];
ksft_print_header();
- test_clone3_supported();
ksft_set_plan(29);
+ test_clone3_supported();
if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0)
ksft_exit_fail_msg("pipe() failed\n");
diff --git a/tools/testing/selftests/core/.gitignore b/tools/testing/selftests/core/.gitignore
new file mode 100644
index 000000000000..6e6712ce5817
--- /dev/null
+++ b/tools/testing/selftests/core/.gitignore
@@ -0,0 +1 @@
+close_range_test
diff --git a/tools/testing/selftests/core/Makefile b/tools/testing/selftests/core/Makefile
new file mode 100644
index 000000000000..f6f2d6f473c6
--- /dev/null
+++ b/tools/testing/selftests/core/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -g -I../../../../usr/include/
+
+TEST_GEN_PROGS := close_range_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
new file mode 100644
index 000000000000..c99b98b0d461
--- /dev/null
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kernel.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+#include "../clone3/clone3_selftests.h"
+
+#ifndef __NR_close_range
+#define __NR_close_range -1
+#endif
+
+#ifndef CLOSE_RANGE_UNSHARE
+#define CLOSE_RANGE_UNSHARE (1U << 1)
+#endif
+
+static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
+ unsigned int flags)
+{
+ return syscall(__NR_close_range, fd, max_fd, flags);
+}
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+TEST(close_range)
+{
+ int i, ret;
+ int open_fds[101];
+
+ for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ XFAIL(return, "Skipping test since /dev/null does not exist");
+ }
+
+ open_fds[i] = fd;
+ }
+
+ EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
+ if (errno == ENOSYS)
+ XFAIL(return, "close_range() syscall not supported");
+ }
+
+ EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
+
+ for (i = 0; i <= 50; i++)
+ EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
+
+ for (i = 51; i <= 100; i++)
+ EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
+
+ /* create a couple of gaps */
+ close(57);
+ close(78);
+ close(81);
+ close(82);
+ close(84);
+ close(90);
+
+ EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
+
+ for (i = 51; i <= 92; i++)
+ EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
+
+ for (i = 93; i <= 100; i++)
+ EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
+
+ /* test that the kernel caps and still closes all fds */
+ EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
+
+ for (i = 93; i <= 99; i++)
+ EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
+
+ EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
+
+ EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
+
+ EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
+}
+
+TEST(close_range_unshare)
+{
+ int i, ret, status;
+ pid_t pid;
+ int open_fds[101];
+ struct clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ XFAIL(return, "Skipping test since /dev/null does not exist");
+ }
+
+ open_fds[i] = fd;
+ }
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ ret = sys_close_range(open_fds[0], open_fds[50],
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ for (i = 0; i <= 50; i++)
+ if (fcntl(open_fds[i], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ for (i = 51; i <= 100; i++)
+ if (fcntl(open_fds[i], F_GETFL) == -1)
+ exit(EXIT_FAILURE);
+
+ /* create a couple of gaps */
+ close(57);
+ close(78);
+ close(81);
+ close(82);
+ close(84);
+ close(90);
+
+ ret = sys_close_range(open_fds[51], open_fds[92],
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ for (i = 51; i <= 92; i++)
+ if (fcntl(open_fds[i], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ for (i = 93; i <= 100; i++)
+ if (fcntl(open_fds[i], F_GETFL) == -1)
+ exit(EXIT_FAILURE);
+
+ /* test that the kernel caps and still closes all fds */
+ ret = sys_close_range(open_fds[93], open_fds[99],
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ for (i = 93; i <= 99; i++)
+ if (fcntl(open_fds[i], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ if (fcntl(open_fds[100], F_GETFL) == -1)
+ exit(EXIT_FAILURE);
+
+ ret = sys_close_range(open_fds[100], open_fds[100],
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ if (fcntl(open_fds[100], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST(close_range_unshare_capped)
+{
+ int i, ret, status;
+ pid_t pid;
+ int open_fds[101];
+ struct clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ XFAIL(return, "Skipping test since /dev/null does not exist");
+ }
+
+ open_fds[i] = fd;
+ }
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ ret = sys_close_range(open_fds[0], UINT_MAX,
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ for (i = 0; i <= 100; i++)
+ if (fcntl(open_fds[i], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
index 26044e397157..b32ba5fec59d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
@@ -107,7 +107,7 @@ ingress_flow_action_drop_test()
RET=0
- devlink_trap_drop_test ingress_flow_action_drop acl_drops $swp2 101
+ devlink_trap_drop_test ingress_flow_action_drop $swp2 101
log_test "ingress_flow_action_drop"
@@ -132,7 +132,7 @@ egress_flow_action_drop_test()
RET=0
- devlink_trap_drop_test egress_flow_action_drop acl_drops $swp2 102
+ devlink_trap_drop_test egress_flow_action_drop $swp2 102
log_test "egress_flow_action_drop"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
new file mode 100755
index 000000000000..a37273473c1b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -0,0 +1,688 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap control trap functionality over mlxsw. Each registered
+# control packet trap is tested to make sure it is triggered under the right
+# conditions.
+#
+# +---------------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/24 |
+# | | 2001:db8:1::1/64 |
+# | | |
+# | | default via 192.0.2.2 |
+# | | default via 2001:db8:1::2 |
+# +----|----------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | + $rp1 |
+# | 192.0.2.2/24 |
+# | 2001:db8:1::2/64 |
+# | |
+# | 2001:db8:2::2/64 |
+# | 198.51.100.2/24 |
+# | + $rp2 |
+# | | |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------+
+# | | default via 198.51.100.2 |
+# | | default via 2001:db8:2::2 |
+# | | |
+# | | 2001:db8:2::1/64 |
+# | | 198.51.100.1/24 |
+# | + $h2 |
+# | H2 (vrf) |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ stp_test
+ lacp_test
+ lldp_test
+ igmp_query_test
+ igmp_v1_report_test
+ igmp_v2_report_test
+ igmp_v3_report_test
+ igmp_v2_leave_test
+ mld_query_test
+ mld_v1_report_test
+ mld_v2_report_test
+ mld_v1_done_test
+ ipv4_dhcp_test
+ ipv6_dhcp_test
+ arp_request_test
+ arp_response_test
+ ipv6_neigh_solicit_test
+ ipv6_neigh_advert_test
+ ipv4_bfd_test
+ ipv6_bfd_test
+ ipv4_ospf_test
+ ipv6_ospf_test
+ ipv4_bgp_test
+ ipv6_bgp_test
+ ipv4_vrrp_test
+ ipv6_vrrp_test
+ ipv4_pim_test
+ ipv6_pim_test
+ uc_loopback_test
+ local_route_test
+ external_route_test
+ ipv6_uc_dip_link_local_scope_test
+ ipv4_router_alert_test
+ ipv6_router_alert_test
+ ipv6_dip_all_nodes_test
+ ipv6_dip_all_routers_test
+ ipv6_router_solicit_test
+ ipv6_router_advert_test
+ ipv6_redirect_test
+ ptp_event_test
+ ptp_general_test
+ flow_action_sample_test
+ flow_action_trap_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+ ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+ ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+ __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+ __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+ __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ router_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+stp_test()
+{
+ devlink_trap_stats_test "STP" "stp" $MZ $h1 -c 1 -t bpdu -q
+}
+
+lacp_payload_get()
+{
+ local source_mac=$1; shift
+ local p
+
+ p=$(:
+ )"01:80:C2:00:00:02:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"88:09:"$( : ETH type
+ )
+ echo $p
+}
+
+lacp_test()
+{
+ local h1mac=$(mac_get $h1)
+
+ devlink_trap_stats_test "LACP" "lacp" $MZ $h1 -c 1 \
+ $(lacp_payload_get $h1mac) -p 100 -q
+}
+
+lldp_payload_get()
+{
+ local source_mac=$1; shift
+ local p
+
+ p=$(:
+ )"01:80:C2:00:00:0E:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"88:CC:"$( : ETH type
+ )
+ echo $p
+}
+
+lldp_test()
+{
+ local h1mac=$(mac_get $h1)
+
+ devlink_trap_stats_test "LLDP" "lldp" $MZ $h1 -c 1 \
+ $(lldp_payload_get $h1mac) -p 100 -q
+}
+
+igmp_query_test()
+{
+ # IGMP (IP Protocol 2) Membership Query (Type 0x11)
+ devlink_trap_stats_test "IGMP Membership Query" "igmp_query" \
+ $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+ -A 192.0.2.1 -B 224.0.0.1 -t ip proto=2,p=11 -p 100 -q
+}
+
+igmp_v1_report_test()
+{
+ # IGMP (IP Protocol 2) Version 1 Membership Report (Type 0x12)
+ devlink_trap_stats_test "IGMP Version 1 Membership Report" \
+ "igmp_v1_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+ -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=12 -p 100 -q
+}
+
+igmp_v2_report_test()
+{
+ # IGMP (IP Protocol 2) Version 2 Membership Report (Type 0x16)
+ devlink_trap_stats_test "IGMP Version 2 Membership Report" \
+ "igmp_v2_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+ -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=16 -p 100 -q
+}
+
+igmp_v3_report_test()
+{
+ # IGMP (IP Protocol 2) Version 3 Membership Report (Type 0x22)
+ devlink_trap_stats_test "IGMP Version 3 Membership Report" \
+ "igmp_v3_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+ -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=22 -p 100 -q
+}
+
+igmp_v2_leave_test()
+{
+ # IGMP (IP Protocol 2) Version 2 Leave Group (Type 0x17)
+ devlink_trap_stats_test "IGMP Version 2 Leave Group" \
+ "igmp_v2_leave" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:02 \
+ -A 192.0.2.1 -B 224.0.0.2 -t ip proto=2,p=17 -p 100 -q
+}
+
+mld_payload_get()
+{
+ local type=$1; shift
+ local p
+
+ type=$(printf "%x" $type)
+ p=$(:
+ )"3A:"$( : Next Header - ICMPv6
+ )"00:"$( : Hdr Ext Len
+ )"00:00:00:00:00:00:"$( : Options and Padding
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+mld_query_test()
+{
+ # MLD Multicast Listener Query (Type 130)
+ devlink_trap_stats_test "MLD Multicast Listener Query" "mld_query" \
+ $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1 \
+ -t ip hop=1,next=0,payload=$(mld_payload_get 130) -p 100 -q
+}
+
+mld_v1_report_test()
+{
+ # MLD Version 1 Multicast Listener Report (Type 131)
+ devlink_trap_stats_test "MLD Version 1 Multicast Listener Report" \
+ "mld_v1_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+ -t ip hop=1,next=0,payload=$(mld_payload_get 131) -p 100 -q
+}
+
+mld_v2_report_test()
+{
+ # MLD Version 2 Multicast Listener Report (Type 143)
+ devlink_trap_stats_test "MLD Version 2 Multicast Listener Report" \
+ "mld_v2_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+ -t ip hop=1,next=0,payload=$(mld_payload_get 143) -p 100 -q
+}
+
+mld_v1_done_test()
+{
+ # MLD Version 1 Multicast Listener Done (Type 132)
+ devlink_trap_stats_test "MLD Version 1 Multicast Listener Done" \
+ "mld_v1_done" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+ -t ip hop=1,next=0,payload=$(mld_payload_get 132) -p 100 -q
+}
+
+ipv4_dhcp_test()
+{
+ devlink_trap_stats_test "IPv4 DHCP Port 67" "ipv4_dhcp" \
+ $MZ $h1 -c 1 -a own -b bcast -A 0.0.0.0 -B 255.255.255.255 \
+ -t udp sp=68,dp=67 -p 100 -q
+
+ devlink_trap_stats_test "IPv4 DHCP Port 68" "ipv4_dhcp" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) -A 192.0.2.1 \
+ -B 255.255.255.255 -t udp sp=67,dp=68 -p 100 -q
+}
+
+ipv6_dhcp_test()
+{
+ devlink_trap_stats_test "IPv6 DHCP Port 547" "ipv6_dhcp" \
+ $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=546,dp=547 \
+ -p 100 -q
+
+ devlink_trap_stats_test "IPv6 DHCP Port 546" "ipv6_dhcp" \
+ $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=547,dp=546 \
+ -p 100 -q
+}
+
+arp_request_test()
+{
+ devlink_trap_stats_test "ARP Request" "arp_request" \
+ $MZ $h1 -c 1 -a own -b bcast -t arp request -p 100 -q
+}
+
+arp_response_test()
+{
+ devlink_trap_stats_test "ARP Response" "arp_response" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) -t arp reply -p 100 -q
+}
+
+icmpv6_header_get()
+{
+ local type=$1; shift
+ local p
+
+ type=$(printf "%x" $type)
+ p=$(:
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+ipv6_neigh_solicit_test()
+{
+ devlink_trap_stats_test "IPv6 Neighbour Solicitation" \
+ "ipv6_neigh_solicit" $MZ $h1 -6 -c 1 \
+ -A fe80::1 -B ff02::1:ff00:02 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 135) -p 100 -q
+}
+
+ipv6_neigh_advert_test()
+{
+ devlink_trap_stats_test "IPv6 Neighbour Advertisement" \
+ "ipv6_neigh_advert" $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A fe80::1 -B 2001:db8:1::2 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 136) -p 100 -q
+}
+
+ipv4_bfd_test()
+{
+ devlink_trap_stats_test "IPv4 BFD Control - Port 3784" "ipv4_bfd" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3784 -p 100 -q
+
+ devlink_trap_stats_test "IPv4 BFD Echo - Port 3785" "ipv4_bfd" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv6_bfd_test()
+{
+ devlink_trap_stats_test "IPv6 BFD Control - Port 3784" "ipv6_bfd" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 \
+ -t udp sp=49153,dp=3784 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 BFD Echo - Port 3785" "ipv6_bfd" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 \
+ -t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv4_ospf_test()
+{
+ devlink_trap_stats_test "IPv4 OSPF - Multicast" "ipv4_ospf" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:05 \
+ -A 192.0.2.1 -B 224.0.0.5 -t ip proto=89 -p 100 -q
+
+ devlink_trap_stats_test "IPv4 OSPF - Unicast" "ipv4_ospf" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t ip proto=89 -p 100 -q
+}
+
+ipv6_ospf_test()
+{
+ devlink_trap_stats_test "IPv6 OSPF - Multicast" "ipv6_ospf" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:05 \
+ -A fe80::1 -B ff02::5 -t ip next=89 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 OSPF - Unicast" "ipv6_ospf" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 -t ip next=89 -p 100 -q
+}
+
+ipv4_bgp_test()
+{
+ devlink_trap_stats_test "IPv4 BGP" "ipv4_bgp" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t tcp sp=54321,dp=179,flags=rst \
+ -p 100 -q
+}
+
+ipv6_bgp_test()
+{
+ devlink_trap_stats_test "IPv6 BGP" "ipv6_bgp" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 \
+ -t tcp sp=54321,dp=179,flags=rst -p 100 -q
+}
+
+ipv4_vrrp_test()
+{
+ devlink_trap_stats_test "IPv4 VRRP" "ipv4_vrrp" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:12 \
+ -A 192.0.2.1 -B 224.0.0.18 -t ip proto=112 -p 100 -q
+}
+
+ipv6_vrrp_test()
+{
+ devlink_trap_stats_test "IPv6 VRRP" "ipv6_vrrp" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:12 \
+ -A fe80::1 -B ff02::12 -t ip next=112 -p 100 -q
+}
+
+ipv4_pim_test()
+{
+ devlink_trap_stats_test "IPv4 PIM - Multicast" "ipv4_pim" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:0d \
+ -A 192.0.2.1 -B 224.0.0.13 -t ip proto=103 -p 100 -q
+
+ devlink_trap_stats_test "IPv4 PIM - Unicast" "ipv4_pim" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t ip proto=103 -p 100 -q
+}
+
+ipv6_pim_test()
+{
+ devlink_trap_stats_test "IPv6 PIM - Multicast" "ipv6_pim" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:0d \
+ -A fe80::1 -B ff02::d -t ip next=103 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 PIM - Unicast" "ipv6_pim" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A fe80::1 -B 2001:db8:1::2 -t ip next=103 -p 100 -q
+}
+
+uc_loopback_test()
+{
+ # Add neighbours to the fake destination IPs, so that the packets are
+ # routed in the device and not trapped due to an unresolved neighbour
+ # exception.
+ ip -4 neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud permanent \
+ dev $rp1
+ ip -6 neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud permanent \
+ dev $rp1
+
+ devlink_trap_stats_test "IPv4 Unicast Loopback" "uc_loopback" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.3 -t udp sp=54321,dp=12345 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 Unicast Loopback" "uc_loopback" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::3 -t udp sp=54321,dp=12345 \
+ -p 100 -q
+
+ ip -6 neigh del 2001:db8:1::3 dev $rp1
+ ip -4 neigh del 192.0.2.3 dev $rp1
+}
+
+local_route_test()
+{
+ # Use a fake source IP to prevent the trap from being triggered twice
+ # when the router sends back a port unreachable message.
+ devlink_trap_stats_test "IPv4 Local Route" "local_route" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.3 -B 192.0.2.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 Local Route" "local_route" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::3 -B 2001:db8:1::2 -t udp sp=54321,sp=12345 \
+ -p 100 -q
+}
+
+external_route_test()
+{
+ # Add a dummy device through which the incoming packets should be
+ # routed.
+ ip link add name dummy10 up type dummy
+ ip address add 203.0.113.1/24 dev dummy10
+ ip -6 address add 2001:db8:10::1/64 dev dummy10
+
+ devlink_trap_stats_test "IPv4 External Route" "external_route" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 203.0.113.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 External Route" "external_route" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:10::2 -t udp sp=54321,sp=12345 \
+ -p 100 -q
+
+ ip -6 address del 2001:db8:10::1/64 dev dummy10
+ ip address del 203.0.113.1/24 dev dummy10
+ ip link del dev dummy10
+}
+
+ipv6_uc_dip_link_local_scope_test()
+{
+ # Add a dummy link-local prefix route to allow the packet to be routed.
+ ip -6 route add fe80:1::/64 dev $rp2
+
+ devlink_trap_stats_test \
+ "IPv6 Unicast Destination IP With Link-Local Scope" \
+ "ipv6_uc_dip_link_local_scope" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A fe80::1 -B fe80:1::2 -t udp sp=54321,sp=12345 \
+ -p 100 -q
+
+ ip -6 route del fe80:1::/64 dev $rp2
+}
+
+ipv4_router_alert_get()
+{
+ local p
+
+ # https://en.wikipedia.org/wiki/IPv4#Options
+ p=$(:
+ )"94:"$( : Option Number
+ )"04:"$( : Option Length
+ )"00:00:"$( : Option Data
+ )
+ echo $p
+}
+
+ipv4_router_alert_test()
+{
+ devlink_trap_stats_test "IPv4 Router Alert" "ipv4_router_alert" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 198.51.100.3 \
+ -t ip option=$(ipv4_router_alert_get) -p 100 -q
+}
+
+ipv6_router_alert_get()
+{
+ local p
+
+ # https://en.wikipedia.org/wiki/IPv6_packet#Hop-by-hop_options_and_destination_options
+ # https://tools.ietf.org/html/rfc2711#section-2.1
+ p=$(:
+ )"11:"$( : Next Header - UDP
+ )"00:"$( : Hdr Ext Len
+ )"05:02:00:00:00:00:"$( : Option Data
+ )
+ echo $p
+}
+
+ipv6_router_alert_test()
+{
+ devlink_trap_stats_test "IPv6 Router Alert" "ipv6_router_alert" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::3 \
+ -t ip next=0,payload=$(ipv6_router_alert_get) -p 100 -q
+}
+
+ipv6_dip_all_nodes_test()
+{
+ devlink_trap_stats_test "IPv6 Destination IP \"All Nodes Address\"" \
+ "ipv6_dip_all_nodes" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+ -A 2001:db8:1::1 -B ff02::1 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_dip_all_routers_test()
+{
+ devlink_trap_stats_test "IPv6 Destination IP \"All Routers Address\"" \
+ "ipv6_dip_all_routers" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+ -A 2001:db8:1::1 -B ff02::2 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_router_solicit_test()
+{
+ devlink_trap_stats_test "IPv6 Router Solicitation" \
+ "ipv6_router_solicit" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+ -A fe80::1 -B ff02::2 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 133) -p 100 -q
+}
+
+ipv6_router_advert_test()
+{
+ devlink_trap_stats_test "IPv6 Router Advertisement" \
+ "ipv6_router_advert" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+ -A fe80::1 -B ff02::1 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 134) -p 100 -q
+}
+
+ipv6_redirect_test()
+{
+ devlink_trap_stats_test "IPv6 Redirect Message" \
+ "ipv6_redirect" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A fe80::1 -B 2001:db8:1::2 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 137) -p 100 -q
+}
+
+ptp_event_test()
+{
+ # PTP is only supported on Spectrum-1, for now.
+ [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+ # PTP Sync (0)
+ devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+ -A 192.0.2.1 -B 224.0.1.129 \
+ -t udp sp=12345,dp=319,payload=10 -p 100 -q
+}
+
+ptp_general_test()
+{
+ # PTP is only supported on Spectrum-1, for now.
+ [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+ # PTP Announce (b)
+ devlink_trap_stats_test "PTP General Message" "ptp_general" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+ -A 192.0.2.1 -B 224.0.1.129 \
+ -t udp sp=12345,dp=320,payload=1b -p 100 -q
+}
+
+flow_action_sample_test()
+{
+ # Install a filter that samples every incoming packet.
+ tc qdisc add dev $rp1 clsact
+ tc filter add dev $rp1 ingress proto all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1 group 1
+
+ devlink_trap_stats_test "Flow Sampling" "flow_action_sample" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+ tc filter del dev $rp1 ingress proto all pref 1 handle 101 matchall
+ tc qdisc del dev $rp1 clsact
+}
+
+flow_action_trap_test()
+{
+ # Install a filter that traps a specific flow.
+ tc qdisc add dev $rp1 clsact
+ tc filter add dev $rp1 ingress proto ip pref 1 handle 101 flower \
+ skip_sw ip_proto udp src_port 12345 dst_port 54321 action trap
+
+ devlink_trap_stats_test "Flow Trapping (Logging)" "flow_action_trap" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+ tc filter del dev $rp1 ingress proto ip pref 1 handle 101 flower
+ tc qdisc del dev $rp1 clsact
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index e7aecb065409..a4c2812e9807 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -96,7 +96,6 @@ source_mac_is_multicast_test()
{
local trap_name="source_mac_is_multicast"
local smac=01:02:03:04:05:06
- local group_name="l2_drops"
local mz_pid
tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
@@ -107,7 +106,7 @@ source_mac_is_multicast_test()
RET=0
- devlink_trap_drop_test $trap_name $group_name $swp2 101
+ devlink_trap_drop_test $trap_name $swp2 101
log_test "Source MAC is multicast"
@@ -118,7 +117,6 @@ __vlan_tag_mismatch_test()
{
local trap_name="vlan_tag_mismatch"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local opt=$1; shift
local mz_pid
@@ -132,7 +130,7 @@ __vlan_tag_mismatch_test()
$MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2 101
+ devlink_trap_drop_test $trap_name $swp2 101
# Add PVID and make sure packets are no longer dropped.
bridge vlan add vid 1 dev $swp1 pvid untagged master
@@ -140,7 +138,7 @@ __vlan_tag_mismatch_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -179,7 +177,6 @@ ingress_vlan_filter_test()
{
local trap_name="ingress_vlan_filter"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local mz_pid
local vid=10
@@ -193,7 +190,7 @@ ingress_vlan_filter_test()
$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2 101
+ devlink_trap_drop_test $trap_name $swp2 101
# Add the VLAN on the bridge port and make sure packets are no longer
# dropped.
@@ -202,7 +199,7 @@ ingress_vlan_filter_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -222,7 +219,6 @@ __ingress_stp_filter_test()
{
local trap_name="ingress_spanning_tree_filter"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local state=$1; shift
local mz_pid
local vid=20
@@ -237,7 +233,7 @@ __ingress_stp_filter_test()
$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2 101
+ devlink_trap_drop_test $trap_name $swp2 101
# Change STP state to forwarding and make sure packets are no longer
# dropped.
@@ -246,7 +242,7 @@ __ingress_stp_filter_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -292,7 +288,6 @@ port_list_is_empty_uc_test()
{
local trap_name="port_list_is_empty"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local mz_pid
# Disable unicast flooding on both ports, so that packets cannot egress
@@ -308,7 +303,7 @@ port_list_is_empty_uc_test()
$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2 101
+ devlink_trap_drop_test $trap_name $swp2 101
# Allow packets to be flooded to one port.
ip link set dev $swp2 type bridge_slave flood on
@@ -316,7 +311,7 @@ port_list_is_empty_uc_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -335,7 +330,6 @@ port_list_is_empty_mc_test()
{
local trap_name="port_list_is_empty"
local dmac=01:00:5e:00:00:01
- local group_name="l2_drops"
local dip=239.0.0.1
local mz_pid
@@ -354,7 +348,7 @@ port_list_is_empty_mc_test()
$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2 101
+ devlink_trap_drop_test $trap_name $swp2 101
# Allow packets to be flooded to one port.
ip link set dev $swp2 type bridge_slave mcast_flood on
@@ -362,7 +356,7 @@ port_list_is_empty_mc_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -387,7 +381,6 @@ port_loopback_filter_uc_test()
{
local trap_name="port_loopback_filter"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local mz_pid
# Make sure packets can only egress the input port.
@@ -401,7 +394,7 @@ port_loopback_filter_uc_test()
$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2 101
+ devlink_trap_drop_test $trap_name $swp2 101
# Allow packets to be flooded.
ip link set dev $swp2 type bridge_slave flood on
@@ -409,7 +402,7 @@ port_loopback_filter_uc_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index 616f47d86a61..f5abb1ebd392 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -161,7 +161,6 @@ ping_check()
non_ip_test()
{
local trap_name="non_ip"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -176,7 +175,7 @@ non_ip_test()
00:00 de:ad:be:ef" &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Non IP"
@@ -190,7 +189,6 @@ __uc_dip_over_mc_dmac_test()
local dip=$1; shift
local flags=${1:-""}; shift
local trap_name="uc_dip_over_mc_dmac"
- local group_name="l3_drops"
local dmac=01:02:03:04:05:06
local mz_pid
@@ -206,7 +204,7 @@ __uc_dip_over_mc_dmac_test()
-B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Unicast destination IP over multicast destination MAC: $desc"
@@ -227,7 +225,6 @@ __sip_is_loopback_test()
local dip=$1; shift
local flags=${1:-""}; shift
local trap_name="sip_is_loopback_address"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -242,7 +239,7 @@ __sip_is_loopback_test()
-b $rp1mac -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Source IP is loopback address: $desc"
@@ -262,7 +259,6 @@ __dip_is_loopback_test()
local dip=$1; shift
local flags=${1:-""}; shift
local trap_name="dip_is_loopback_address"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -277,7 +273,7 @@ __dip_is_loopback_test()
-B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Destination IP is loopback address: $desc"
@@ -298,7 +294,6 @@ __sip_is_mc_test()
local dip=$1; shift
local flags=${1:-""}; shift
local trap_name="sip_is_mc"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -313,7 +308,7 @@ __sip_is_mc_test()
-b $rp1mac -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Source IP is multicast: $desc"
@@ -329,7 +324,6 @@ sip_is_mc_test()
ipv4_sip_is_limited_bc_test()
{
local trap_name="ipv4_sip_is_limited_bc"
- local group_name="l3_drops"
local sip=255.255.255.255
local mz_pid
@@ -345,7 +339,7 @@ ipv4_sip_is_limited_bc_test()
-B $h2_ipv4 -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IPv4 source IP is limited broadcast"
@@ -382,7 +376,6 @@ __ipv4_header_corrupted_test()
local ihl=$1; shift
local checksum=$1; shift
local trap_name="ip_header_corrupted"
- local group_name="l3_drops"
local payload
local mz_pid
@@ -399,7 +392,7 @@ __ipv4_header_corrupted_test()
$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IP header corrupted: $desc: IPv4"
@@ -429,7 +422,6 @@ __ipv6_header_corrupted_test()
local desc=$1; shift
local ipver=$1; shift
local trap_name="ip_header_corrupted"
- local group_name="l3_drops"
local payload
local mz_pid
@@ -446,7 +438,7 @@ __ipv6_header_corrupted_test()
$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IP header corrupted: $desc: IPv6"
@@ -469,7 +461,6 @@ ip_header_corrupted_test()
ipv6_mc_dip_reserved_scope_test()
{
local trap_name="ipv6_mc_dip_reserved_scope"
- local group_name="l3_drops"
local dip=FF00::
local mz_pid
@@ -485,7 +476,7 @@ ipv6_mc_dip_reserved_scope_test()
"33:33:00:00:00:00" -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IPv6 multicast destination IP reserved scope"
@@ -495,7 +486,6 @@ ipv6_mc_dip_reserved_scope_test()
ipv6_mc_dip_interface_local_scope_test()
{
local trap_name="ipv6_mc_dip_interface_local_scope"
- local group_name="l3_drops"
local dip=FF01::
local mz_pid
@@ -511,7 +501,7 @@ ipv6_mc_dip_interface_local_scope_test()
"33:33:00:00:00:00" -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IPv6 multicast destination IP interface-local scope"
@@ -526,7 +516,6 @@ __blackhole_route_test()
local dip=$1; shift
local ip_proto=${1:-"icmp"}; shift
local trap_name="blackhole_route"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -542,7 +531,7 @@ __blackhole_route_test()
-B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2 101
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Blackhole route: IPv$flags"
devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
@@ -558,7 +547,6 @@ blackhole_route_test()
irif_disabled_test()
{
local trap_name="irif_disabled"
- local group_name="l3_drops"
local t0_packets t0_bytes
local t1_packets t1_bytes
local mz_pid
@@ -613,7 +601,6 @@ irif_disabled_test()
erif_disabled_test()
{
local trap_name="erif_disabled"
- local group_name="l3_drops"
local t0_packets t0_bytes
local t1_packets t1_bytes
local mz_pid
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 2bc6df42d597..1fedfc9da434 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -169,7 +169,6 @@ trap_action_check()
mtu_value_is_too_small_test()
{
local trap_name="mtu_value_is_too_small"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -191,7 +190,7 @@ mtu_value_is_too_small_test()
-B 198.51.100.1 -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets_hitting "dev $h1 ingress" 101
check_err $? "Packets were not received to h1"
@@ -208,7 +207,6 @@ __ttl_value_is_too_small_test()
{
local ttl_val=$1; shift
local trap_name="ttl_value_is_too_small"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -227,7 +225,7 @@ __ttl_value_is_too_small_test()
-b $rp1mac -B 198.51.100.1 -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets_hitting "dev $h1 ingress" 101
check_err $? "Packets were not received to h1"
@@ -271,7 +269,6 @@ __mc_reverse_path_forwarding_test()
local proto=$1; shift
local flags=${1:-""}; shift
local trap_name="mc_reverse_path_forwarding"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -292,7 +289,7 @@ __mc_reverse_path_forwarding_test()
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $rp2 egress" 101 0
check_err $? "Packets were not dropped"
@@ -322,7 +319,6 @@ __reject_route_test()
local unreachable=$1; shift
local flags=${1:-""}; shift
local trap_name="reject_route"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -341,7 +337,7 @@ __reject_route_test()
-B $dst_ip -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets_hitting "dev $h1 ingress" 101
check_err $? "ICMP packet was not received to h1"
@@ -370,7 +366,6 @@ __host_miss_test()
local desc=$1; shift
local dip=$1; shift
local trap_name="unresolved_neigh"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -405,7 +400,6 @@ __invalid_nexthop_test()
local subnet=$1; shift
local via_add=$1; shift
local trap_name="unresolved_neigh"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -494,7 +488,6 @@ vrf_without_routes_destroy()
ipv4_lpm_miss_test()
{
local trap_name="ipv4_lpm_miss"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -511,7 +504,7 @@ ipv4_lpm_miss_test()
-B 203.0.113.1 -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
log_test "LPM miss: IPv4"
@@ -522,7 +515,6 @@ ipv4_lpm_miss_test()
ipv6_lpm_miss_test()
{
local trap_name="ipv6_lpm_miss"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -539,7 +531,7 @@ ipv6_lpm_miss_test()
-B 2001:db8::1 -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
log_test "LPM miss: IPv6"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
index 47edf099a17e..508a702f0021 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -207,7 +207,7 @@ __rate_test()
RET=0
- devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
devlink trap group set $DEVLINK_DEV group l3_drops policer $id
# Send packets at highest possible rate and make sure they are dropped
@@ -220,8 +220,8 @@ __rate_test()
rate=$(trap_rate_get)
pct=$((100 * (rate - 1000) / 1000))
- ((-5 <= pct && pct <= 5))
- check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%"
+ ((-10 <= pct && pct <= 10))
+ check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%"
log_info "Expected rate 1000 pps, measured rate $rate pps"
drop_rate=$(policer_drop_rate_get $id)
@@ -288,35 +288,12 @@ __burst_test()
RET=0
- devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
devlink trap group set $DEVLINK_DEV group l3_drops policer $id
- # Send a burst of 64 packets and make sure that about 32 are received
- # and the rest are dropped by the policer
- log_info "=== Tx burst size: 64, Policer burst size: 32 pps ==="
-
- t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
- t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
- start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
-
- t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
- t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
- rx=$((t1_rx - t0_rx))
- pct=$((100 * (rx - 32) / 32))
- ((-20 <= pct && pct <= 20))
- check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%"
- log_info "Expected burst size of 32 packets, measured burst size of $rx packets"
-
- drop=$((t1_drop - t0_drop))
- (( drop > 0 ))
- check_err $? "Expected non-zero policer drops, got 0"
- log_info "Measured policer drops of $drop packets"
-
# Send a burst of 16 packets and make sure that 16 are received
# and that none are dropped by the policer
- log_info "=== Tx burst size: 16, Policer burst size: 32 pps ==="
+ log_info "=== Tx burst size: 16, Policer burst size: 512 ==="
t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
index 039629bb92a3..8817851da7a9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -140,7 +140,6 @@ ecn_payload_get()
ecn_decap_test()
{
local trap_name="decap_error"
- local group_name="tunnel_drops"
local desc=$1; shift
local ecn_desc=$1; shift
local outer_tos=$1; shift
@@ -161,7 +160,7 @@ ecn_decap_test()
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $swp1 egress" 101 0
check_err $? "Packets were not dropped"
@@ -200,7 +199,6 @@ ipip_payload_get()
no_matching_tunnel_test()
{
local trap_name="decap_error"
- local group_name="tunnel_drops"
local desc=$1; shift
local sip=$1; shift
local mz_pid
@@ -218,7 +216,7 @@ no_matching_tunnel_test()
-A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $swp1 egress" 101 0
check_err $? "Packets were not dropped"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
index e11a416323cf..10e0f3dbc930 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -159,7 +159,6 @@ ecn_payload_get()
ecn_decap_test()
{
local trap_name="decap_error"
- local group_name="tunnel_drops"
local desc=$1; shift
local ecn_desc=$1; shift
local outer_tos=$1; shift
@@ -177,7 +176,7 @@ ecn_decap_test()
-t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $swp1 egress" 101 0
check_err $? "Packets were not dropped"
@@ -228,7 +227,6 @@ short_payload_get()
corrupted_packet_test()
{
local trap_name="decap_error"
- local group_name="tunnel_drops"
local desc=$1; shift
local payload_get=$1; shift
local mz_pid
@@ -246,7 +244,7 @@ corrupted_packet_test()
-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $swp1 egress" 101 0
check_err $? "Packets were not dropped"
@@ -297,7 +295,6 @@ mc_smac_payload_get()
overlay_smac_is_mc_test()
{
local trap_name="overlay_smac_is_mc"
- local group_name="tunnel_drops"
local mz_pid
RET=0
@@ -314,7 +311,7 @@ overlay_smac_is_mc_test()
-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp1 101
+ devlink_trap_drop_test $trap_name $swp1 101
log_test "Overlay source MAC is multicast"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
index 6d1790b5de7a..e9f8718af979 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -147,17 +147,26 @@ switch_create()
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
+ devlink_pool_size_thtype_save 0
devlink_pool_size_thtype_set 0 dynamic 10000000
+ devlink_pool_size_thtype_save 4
devlink_pool_size_thtype_set 4 dynamic 10000000
+ devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 6
+ devlink_tc_bind_pool_th_save $swp1 1 ingress
devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
+ devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 6
+ devlink_tc_bind_pool_th_save $swp2 2 ingress
devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
+ devlink_tc_bind_pool_th_save $swp3 1 egress
devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
+ devlink_tc_bind_pool_th_save $swp3 2 egress
devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
+ devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 7
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
new file mode 100755
index 000000000000..27de3d9ed08e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -0,0 +1,379 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ test_defaults
+ test_dcb_ets
+ test_mtu
+ test_pfc
+ test_int_buf
+ test_tc_priomap
+ test_tc_mtu
+ test_tc_sizes
+ test_tc_int_buf
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+ pre_cleanup
+}
+
+get_prio_pg()
+{
+ __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
+ grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
+}
+
+get_prio_pfc()
+{
+ __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
+ grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
+}
+
+get_prio_tc()
+{
+ __mlnx_qos -i $swp | sed -n '/^tc/,$p' |
+ awk '/^tc/ { TC = $2 }
+ /priority:/ { PRIO[$2]=TC }
+ END {
+ for (i in PRIO)
+ printf("%d ", PRIO[i])
+ }'
+}
+
+get_buf_size()
+{
+ local idx=$1; shift
+
+ __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
+}
+
+get_tot_size()
+{
+ __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
+}
+
+check_prio_pg()
+{
+ local expect=$1; shift
+
+ local current=$(get_prio_pg)
+ test "$current" = "$expect"
+ check_err $? "prio2buffer is '$current', expected '$expect'"
+}
+
+check_prio_pfc()
+{
+ local expect=$1; shift
+
+ local current=$(get_prio_pfc)
+ test "$current" = "$expect"
+ check_err $? "prio PFC is '$current', expected '$expect'"
+}
+
+check_prio_tc()
+{
+ local expect=$1; shift
+
+ local current=$(get_prio_tc)
+ test "$current" = "$expect"
+ check_err $? "prio_tc is '$current', expected '$expect'"
+}
+
+__check_buf_size()
+{
+ local idx=$1; shift
+ local expr=$1; shift
+ local what=$1; shift
+
+ local current=$(get_buf_size $idx)
+ ((current $expr))
+ check_err $? "${what}buffer $idx size is '$current', expected '$expr'"
+ echo $current
+}
+
+check_buf_size()
+{
+ __check_buf_size "$@" > /dev/null
+}
+
+test_defaults()
+{
+ RET=0
+
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+ check_prio_tc "0 0 0 0 0 0 0 0 "
+ check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+ log_test "Default headroom configuration"
+}
+
+test_dcb_ets()
+{
+ RET=0
+
+ __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
+
+ check_prio_pg "0 2 4 6 1 3 5 7 "
+ check_prio_tc "0 2 4 6 1 3 5 7 "
+ check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+ __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+ check_prio_tc "0 0 0 0 0 0 0 0 "
+
+ __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
+ check_fail $? "prio2buffer accepted in DCB mode"
+
+ log_test "Configuring headroom through ETS"
+}
+
+test_mtu()
+{
+ local what=$1; shift
+ local buf0size_2
+ local buf0size
+
+ RET=0
+ buf0size=$(__check_buf_size 0 "> 0")
+
+ mtu_set $swp 3000
+ buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ")
+ mtu_restore $swp
+
+ mtu_set $swp 6000
+ check_buf_size 0 "> $buf0size_2" "MTU 6000: "
+ mtu_restore $swp
+
+ check_buf_size 0 "== $buf0size"
+
+ log_test "${what}MTU impacts buffer size"
+}
+
+test_tc_mtu()
+{
+ # In TC mode, MTU still impacts the threshold below which a buffer is
+ # not permitted to go.
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ test_mtu "TC: "
+ tc qdisc delete dev $swp root
+}
+
+test_pfc()
+{
+ RET=0
+
+ __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
+
+ local buf0size=$(get_buf_size 0)
+ local buf1size=$(get_buf_size 1)
+ local buf2size=$(get_buf_size 2)
+ local buf3size=$(get_buf_size 3)
+ check_buf_size 0 "> 0"
+ check_buf_size 1 "> 0"
+ check_buf_size 2 "> 0"
+ check_buf_size 3 "> 0"
+ check_buf_size 4 "== 0"
+ check_buf_size 5 "== 0"
+ check_buf_size 6 "== 0"
+ check_buf_size 7 "== 0"
+
+ log_test "Buffer size sans PFC"
+
+ RET=0
+
+ __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
+
+ check_prio_pg "0 0 0 0 0 1 2 3 "
+ check_prio_pfc "0 0 0 0 0 1 1 1 "
+ check_buf_size 0 "== $buf0size"
+ check_buf_size 1 "> $buf1size"
+ check_buf_size 2 "> $buf2size"
+ check_buf_size 3 "> $buf3size"
+
+ local buf1size=$(get_buf_size 1)
+ check_buf_size 2 "== $buf1size"
+ check_buf_size 3 "== $buf1size"
+
+ log_test "PFC: Cable length 0"
+
+ RET=0
+
+ __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
+
+ check_buf_size 0 "== $buf0size"
+ check_buf_size 1 "> $buf1size"
+ check_buf_size 2 "> $buf1size"
+ check_buf_size 3 "> $buf1size"
+
+ log_test "PFC: Cable length 1000"
+
+ RET=0
+
+ __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
+ __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+ check_prio_tc "0 0 0 0 0 0 0 0 "
+ check_buf_size 0 "> 0"
+ check_buf_size 1 "== 0"
+ check_buf_size 2 "== 0"
+ check_buf_size 3 "== 0"
+ check_buf_size 4 "== 0"
+ check_buf_size 5 "== 0"
+ check_buf_size 6 "== 0"
+ check_buf_size 7 "== 0"
+
+ log_test "PFC: Restore defaults"
+}
+
+test_tc_priomap()
+{
+ RET=0
+
+ __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
+ check_prio_pg "0 1 2 3 4 5 6 7 "
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+
+ __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
+ check_prio_pg "1 3 5 7 0 2 4 6 "
+
+ tc qdisc delete dev $swp root
+ check_prio_pg "0 1 2 3 4 5 6 7 "
+
+ # Clean up.
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
+ tc qdisc delete dev $swp root
+ __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+ log_test "TC: priomap"
+}
+
+test_tc_sizes()
+{
+ local cell_size=$(devlink_cell_size_get)
+ local size=$((cell_size * 1000))
+
+ RET=0
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ check_fail $? "buffer_size should fail before qdisc is added"
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ check_err $? "buffer_size should pass after qdisc is added"
+ check_buf_size 0 "== $size" "set size: "
+
+ mtu_set $swp 6000
+ check_buf_size 0 "== $size" "set MTU: "
+ mtu_restore $swp
+
+ __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+
+ # After replacing the qdisc for the same kind, buffer_size still has to
+ # work.
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1M
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ check_buf_size 0 "== $size" "post replace, set size: "
+
+ __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+
+ # Likewise after replacing for a different kind.
+ tc qdisc replace dev $swp root handle 2: prio bands 8
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ check_buf_size 0 "== $size" "post replace different kind, set size: "
+
+ tc qdisc delete dev $swp root
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ check_fail $? "buffer_size should fail after qdisc is deleted"
+
+ log_test "TC: buffer size"
+}
+
+test_int_buf()
+{
+ local what=$1; shift
+
+ RET=0
+
+ local buf0size=$(get_buf_size 0)
+ local tot_size=$(get_tot_size)
+
+ # Size of internal buffer and buffer 9.
+ local dsize=$((tot_size - buf0size))
+
+ tc qdisc add dev $swp clsact
+ tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp
+
+ local buf0size_2=$(get_buf_size 0)
+ local tot_size_2=$(get_tot_size)
+ local dsize_2=$((tot_size_2 - buf0size_2))
+
+ # Egress SPAN should have added to the "invisible" buffer configuration.
+ ((dsize_2 > dsize))
+ check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'"
+
+ mtu_set $swp 3000
+
+ local buf0size_3=$(get_buf_size 0)
+ local tot_size_3=$(get_tot_size)
+ local dsize_3=$((tot_size_3 - buf0size_3))
+
+ # MTU change might change buffer 0, which will show at total, but the
+ # hidden buffers should stay the same size.
+ ((dsize_3 == dsize_2))
+ check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'"
+
+ mtu_restore $swp
+ tc qdisc del dev $swp clsact
+
+ # After SPAN removal, hidden buffers should be back to the original sizes.
+ local buf0size_4=$(get_buf_size 0)
+ local tot_size_4=$(get_tot_size)
+ local dsize_4=$((tot_size_4 - buf0size_4))
+ ((dsize_4 == dsize))
+ check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'"
+
+ log_test "${what}internal buffer size"
+}
+
+test_tc_int_buf()
+{
+ local cell_size=$(devlink_cell_size_get)
+ local size=$((cell_size * 1000))
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ test_int_buf "TC: "
+
+ __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ test_int_buf "TC+buffsize: "
+
+ __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ tc qdisc delete dev $swp root
+}
+
+trap cleanup EXIT
+
+bail_on_lldpad
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index faa51012cdac..0bf76f13c030 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -82,3 +82,17 @@ bail_on_lldpad()
fi
fi
}
+
+__mlnx_qos()
+{
+ local err
+
+ mlnx_qos "$@" 2>/dev/null
+ err=$?
+
+ if ((err)); then
+ echo "Error ($err) in mlnx_qos $@" >/dev/stderr
+ fi
+
+ return $err
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index b025daea062d..8f164c80e215 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -145,12 +145,17 @@ switch_create()
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
+ devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 5
+ devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
+ devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 5
+ devlink_tc_bind_pool_th_save $swp2 1 ingress
devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
+ devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 12
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
new file mode 100755
index 000000000000..4d900bc1f76c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -0,0 +1,403 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority
+# of 1. This stream is consistently prioritized as priority 1, is put to PG
+# buffer 1, and scheduled at TC 1.
+#
+# - the stream first ingresses through $swp1, where it is forwarded to $swp3
+#
+# - then it ingresses through $swp4. Here it is put to a lossless buffer and put
+# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is
+# shaped, and thus the PFC pool eventually fills, therefore the headroom
+# fills, and $swp3 is paused.
+#
+# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at
+# a pool ("overflow pool"). The overflow pool needs to be large enough to
+# contain the whole burst.
+#
+# - eventually the PFC pool gets some traffic out, headroom therefore gets some
+# traffic to the pool, and $swp3 is unpaused again. This way the traffic is
+# gradually forwarded from the overflow pool, through the PFC pool, out of
+# $swp2, and eventually to $h2.
+#
+# - if PFC works, all lossless flow packets that ingress through $swp1 should
+# also be seen ingressing $h2. If it doesn't, there will be drops due to
+# discrepancy between the speeds of $swp1 and $h2.
+#
+# - it should all play out relatively quickly, so that SLL and HLL will not
+# cause drops.
+#
+# +-----------------------+
+# | H1 |
+# | + $h1.111 |
+# | | 192.0.2.33/28 |
+# | | |
+# | + $h1 |
+# +---|-------------------+ +--------------------+
+# | | |
+# +---|----------------------|--------------------|---------------------------+
+# | + $swp1 $swp3 + + $swp4 |
+# | | iPOOL1 iPOOL0 | | iPOOL2 |
+# | | ePOOL4 ePOOL5 | | ePOOL4 |
+# | | 1Gbps | | 1Gbps |
+# | | PFC:enabled=1 | | PFC:enabled=1 |
+# | +-|----------------------|-+ +-|------------------------+ |
+# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | |
+# | | | | | |
+# | | BR1 | | BR2 | |
+# | | | | | |
+# | | | | + $swp2.111 | |
+# | +--------------------------+ +---------|----------------+ |
+# | | |
+# | iPOOL0: 500KB dynamic | |
+# | iPOOL1: 10MB static | |
+# | iPOOL2: 1MB static + $swp2 |
+# | ePOOL4: 500KB dynamic | iPOOL0 |
+# | ePOOL5: 10MB static | ePOOL6 |
+# | ePOOL6: "infinite" static | 200Mbps shaper |
+# +-------------------------------------------------------|-------------------+
+# |
+# +---|-------------------+
+# | + $h2 H2 |
+# | | |
+# | + $h2.111 |
+# | 192.0.2.34/28 |
+# +-----------------------+
+#
+# iPOOL0+ePOOL4 is a helper pool for control traffic etc.
+# iPOOL1+ePOOL5 are overflow pools.
+# iPOOL2+ePOOL6 are PFC pools.
+
+ALL_TESTS="
+ ping_ipv4
+ test_qos_pfc
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+_1KB=1000
+_100KB=$((100 * _1KB))
+_500KB=$((500 * _1KB))
+_1MB=$((1000 * _1KB))
+_10MB=$((10 * _1MB))
+
+h1_create()
+{
+ simple_if_init $h1
+ mtu_set $h1 10000
+
+ vlan_create $h1 111 v$h1 192.0.2.33/28
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 111
+
+ mtu_restore $h1
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ mtu_set $h2 10000
+
+ vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 111
+
+ mtu_restore $h2
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ # pools
+ # -----
+
+ devlink_pool_size_thtype_save 0
+ devlink_pool_size_thtype_save 4
+ devlink_pool_size_thtype_save 1
+ devlink_pool_size_thtype_save 5
+ devlink_pool_size_thtype_save 2
+ devlink_pool_size_thtype_save 6
+
+ devlink_port_pool_th_save $swp1 1
+ devlink_port_pool_th_save $swp2 6
+ devlink_port_pool_th_save $swp3 5
+ devlink_port_pool_th_save $swp4 2
+
+ devlink_tc_bind_pool_th_save $swp1 1 ingress
+ devlink_tc_bind_pool_th_save $swp2 1 egress
+ devlink_tc_bind_pool_th_save $swp3 1 egress
+ devlink_tc_bind_pool_th_save $swp4 1 ingress
+
+ # Control traffic pools. Just reduce the size. Keep them dynamic so that
+ # we don't need to change all the uninteresting quotas.
+ devlink_pool_size_thtype_set 0 dynamic $_500KB
+ devlink_pool_size_thtype_set 4 dynamic $_500KB
+
+ # Overflow pools.
+ devlink_pool_size_thtype_set 1 static $_10MB
+ devlink_pool_size_thtype_set 5 static $_10MB
+
+ # PFC pools. As per the writ, the size of egress PFC pool should be
+ # infinice, but actually it just needs to be large enough to not matter
+ # in practice, so reuse the 10MB limit.
+ devlink_pool_size_thtype_set 2 static $_1MB
+ devlink_pool_size_thtype_set 6 static $_10MB
+
+ # $swp1
+ # -----
+
+ ip link set dev $swp1 up
+ mtu_set $swp1 10000
+ vlan_create $swp1 111
+ ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp1 1 $_10MB
+ devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB
+
+ # Configure qdisc so that we can configure PG and therefore pool
+ # assignment.
+ tc qdisc replace dev $swp1 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+
+ # $swp2
+ # -----
+
+ ip link set dev $swp2 up
+ mtu_set $swp2 10000
+ vlan_create $swp2 111
+ ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp2 6 $_10MB
+ devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB
+
+ # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped.
+ tc qdisc replace dev $swp2 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ tc qdisc replace dev $swp2 parent 1:7 handle 17: \
+ tbf rate 200Mbit burst 131072 limit 1M
+
+ # $swp3
+ # -----
+
+ ip link set dev $swp3 up
+ mtu_set $swp3 10000
+ vlan_create $swp3 111
+ ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp3 5 $_10MB
+ devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB
+
+ # prio 0->TC0 (band 7), 1->TC1 (band 6)
+ tc qdisc replace dev $swp3 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+
+ # Need to enable PFC so that PAUSE takes effect. Therefore need to put
+ # the lossless prio into a buffer of its own. Don't bother with buffer
+ # sizes though, there is not going to be any pressure in the "backward"
+ # direction.
+ __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+
+ # $swp4
+ # -----
+
+ ip link set dev $swp4 up
+ mtu_set $swp4 10000
+ vlan_create $swp4 111
+ ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp4 2 $_1MB
+ devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB
+
+ # Configure qdisc so that we can hand-tune headroom.
+ tc qdisc replace dev $swp4 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
+ # is (-2*MTU) about 80K of delay provision.
+ __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
+
+ # bridges
+ # -------
+
+ ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev $swp1.111 master br1
+ ip link set dev $swp3.111 master br1
+ ip link set dev br1 up
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev $swp2.111 master br2
+ ip link set dev $swp4.111 master br2
+ ip link set dev br2 up
+}
+
+switch_destroy()
+{
+ # Do this first so that we can reset the limits to values that are only
+ # valid for the original static / dynamic setting.
+ devlink_pool_size_thtype_restore 6
+ devlink_pool_size_thtype_restore 5
+ devlink_pool_size_thtype_restore 4
+ devlink_pool_size_thtype_restore 2
+ devlink_pool_size_thtype_restore 1
+ devlink_pool_size_thtype_restore 0
+
+ # bridges
+ # -------
+
+ ip link set dev br2 down
+ ip link set dev $swp4.111 nomaster
+ ip link set dev $swp2.111 nomaster
+ ip link del dev br2
+
+ ip link set dev br1 down
+ ip link set dev $swp3.111 nomaster
+ ip link set dev $swp1.111 nomaster
+ ip link del dev br1
+
+ # $swp4
+ # -----
+
+ __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ tc qdisc del dev $swp4 root
+
+ devlink_tc_bind_pool_th_restore $swp4 1 ingress
+ devlink_port_pool_th_restore $swp4 2
+
+ vlan_destroy $swp4 111
+ mtu_restore $swp4
+ ip link set dev $swp4 down
+
+ # $swp3
+ # -----
+
+ __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
+ __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ tc qdisc del dev $swp3 root
+
+ devlink_tc_bind_pool_th_restore $swp3 1 egress
+ devlink_port_pool_th_restore $swp3 5
+
+ vlan_destroy $swp3 111
+ mtu_restore $swp3
+ ip link set dev $swp3 down
+
+ # $swp2
+ # -----
+
+ tc qdisc del dev $swp2 parent 1:7
+ tc qdisc del dev $swp2 root
+
+ devlink_tc_bind_pool_th_restore $swp2 1 egress
+ devlink_port_pool_th_restore $swp2 6
+
+ vlan_destroy $swp2 111
+ mtu_restore $swp2
+ ip link set dev $swp2 down
+
+ # $swp1
+ # -----
+
+ __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ tc qdisc del dev $swp1 root
+
+ devlink_tc_bind_pool_th_restore $swp1 1 ingress
+ devlink_port_pool_th_restore $swp1 1
+
+ vlan_destroy $swp1 111
+ mtu_restore $swp1
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ swp4=${NETIFS[p6]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.34
+}
+
+test_qos_pfc()
+{
+ RET=0
+
+ # 10M pool, each packet is 8K of payload + headers
+ local pkts=$((_10MB / 8050))
+ local size=$((pkts * 8050))
+ local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+ local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+ $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \
+ -a own -b $h2mac -c $pkts -t udp -q
+ sleep 2
+
+ local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+ local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+ local din=$((in1 - in0))
+ local dout=$((out1 - out0))
+
+ local pct_in=$((din * 100 / size))
+
+ ((pct_in > 95 && pct_in < 105))
+ check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%"
+
+ ((dout == din))
+ check_err $? "$((din - dout)) bytes out of $din ingressed got lost"
+
+ log_test "PFC"
+}
+
+trap cleanup EXIT
+
+bail_on_lldpad
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index 94c37124a840..af64bc9ea8ab 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -27,11 +27,17 @@ switch_create()
# amount of traffic that is admitted to the shared buffers. This makes
# sure that there is always enough traffic of all types to select from
# for the DWRR process.
+ devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 12
+ devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
+ devlink_port_pool_th_save $swp2 4
devlink_port_pool_th_set $swp2 4 12
+ devlink_tc_bind_pool_th_save $swp2 7 egress
devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
+ devlink_tc_bind_pool_th_save $swp2 6 egress
devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
+ devlink_tc_bind_pool_th_save $swp2 5 egress
devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
# Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 0d347d48c112..b0cb1aaffdda 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -121,6 +121,7 @@ h1_destroy()
h2_create()
{
host_create $h2 2
+ tc qdisc add dev $h2 clsact
# Some of the tests in this suite use multicast traffic. As this traffic
# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
@@ -141,6 +142,7 @@ h2_create()
h2_destroy()
{
ethtool -s $h2 autoneg on
+ tc qdisc del dev $h2 clsact
host_destroy $h2
}
@@ -206,6 +208,7 @@ switch_create()
ip link set dev br2_11 up
local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+ devlink_port_pool_th_save $swp3 8
devlink_port_pool_th_set $swp3 8 $size
}
@@ -336,6 +339,17 @@ get_qdisc_npackets()
qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
}
+send_packets()
+{
+ local vlan=$1; shift
+ local proto=$1; shift
+ local pkts=$1; shift
+
+ $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+ -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+ -t $proto -q -c $pkts "$@"
+}
+
# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
# success. After 10 failed attempts it bails out and returns 1. It dumps the
# backlog size to stdout.
@@ -364,9 +378,7 @@ build_backlog()
return 1
fi
- $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
- -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
- -t $proto -q -c $pkts "$@"
+ send_packets $vlan $proto $pkts "$@"
done
}
@@ -531,3 +543,115 @@ do_mc_backlog_test()
log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
}
+
+do_drop_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local trigger=$1; shift
+ local subtest=$1; shift
+ local fetch_counter=$1; shift
+ local backlog
+ local base
+ local now
+ local pct
+
+ RET=0
+
+ start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac
+
+ # Create a bit of a backlog and observe no mirroring due to drops.
+ qevent_rule_install_$subtest
+ base=$($fetch_counter)
+
+ build_backlog $vlan $((2 * limit / 3)) udp >/dev/null
+
+ busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null
+ check_fail $? "Spurious packets observed without buffer pressure"
+
+ # Push to the queue until it's at the limit. The configured limit is
+ # rounded by the qdisc and then by the driver, so this is the best we
+ # can do to get to the real limit of the system.
+ build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
+
+ base=$($fetch_counter)
+ send_packets $vlan udp 11
+
+ now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter)
+ check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen"
+
+ # When no extra traffic is injected, there should be no mirroring.
+ busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+ check_fail $? "Spurious packets observed"
+
+ # When the rule is uninstalled, there should be no mirroring.
+ qevent_rule_uninstall_$subtest
+ send_packets $vlan udp 11
+ busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+ check_fail $? "Spurious packets observed after uninstall"
+
+ log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd"
+
+ stop_traffic
+ sleep 1
+}
+
+qevent_rule_install_mirror()
+{
+ tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+ action mirred egress mirror dev $swp2 hw_stats disabled
+}
+
+qevent_rule_uninstall_mirror()
+{
+ tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_mirror()
+{
+ tc_rule_handle_stats_get "dev $h2 ingress" 101
+}
+
+do_drop_mirror_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local qevent_name=$1; shift
+
+ tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+ flower skip_sw ip_proto udp \
+ action drop
+
+ do_drop_test "$vlan" "$limit" "$qevent_name" mirror \
+ qevent_counter_fetch_mirror
+
+ tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
+
+qevent_rule_install_trap()
+{
+ tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+ action trap hw_stats disabled
+}
+
+qevent_rule_uninstall_trap()
+{
+ tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_trap()
+{
+ local trap_name=$1; shift
+
+ devlink_trap_rx_packets_get "$trap_name"
+}
+
+do_drop_trap_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local trap_name=$1; shift
+
+ do_drop_test "$vlan" "$limit" "$trap_name" trap \
+ "qevent_counter_fetch_trap $trap_name"
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 1c36c576613b..3f007c5f8361 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -7,6 +7,8 @@ ALL_TESTS="
ecn_nodrop_test
red_test
mc_backlog_test
+ red_mirror_test
+ red_trap_test
"
: ${QDISC:=ets}
source sch_red_core.sh
@@ -83,6 +85,26 @@ mc_backlog_test()
uninstall_qdisc
}
+red_mirror_test()
+{
+ install_qdisc qevent early_drop block 10
+
+ do_drop_mirror_test 10 $BACKLOG1 early_drop
+ do_drop_mirror_test 11 $BACKLOG2 early_drop
+
+ uninstall_qdisc
+}
+
+red_trap_test()
+{
+ install_qdisc qevent early_drop block 10
+
+ do_drop_trap_test 10 $BACKLOG1 early_drop
+ do_drop_trap_test 11 $BACKLOG2 early_drop
+
+ uninstall_qdisc
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index 558667ea11ec..ede9c38d3eff 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -7,6 +7,7 @@ ALL_TESTS="
ecn_nodrop_test
red_test
mc_backlog_test
+ red_mirror_test
"
source sch_red_core.sh
@@ -57,6 +58,13 @@ mc_backlog_test()
uninstall_qdisc
}
+red_mirror_test()
+{
+ install_qdisc qevent early_drop block 10
+ do_drop_mirror_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index 58f3a05f08af..7d9e73a43a49 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -15,7 +15,7 @@ source mlxsw_lib.sh
SB_POOL_ING=0
SB_POOL_EGR_CPU=10
-SB_ITC_CPU_IP=3
+SB_ITC_CPU_IP=2
SB_ITC_CPU_ARP=2
SB_ITC=0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index fd583a171db7..d7cf33a3f18d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -28,7 +28,7 @@ cleanup()
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre"
+ALL_TESTS="router tc_flower mirror_gre tc_police"
for current_test in ${TESTS:-$ALL_TESTS}; do
source ${current_test}_scale.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 43ba1b438f6d..43f662401bc3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -22,7 +22,7 @@ cleanup()
devlink_sp_read_kvd_defaults
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre"
+ALL_TESTS="router tc_flower mirror_gre tc_police"
for current_test in ${TESTS:-$ALL_TESTS}; do
source ${current_test}_scale.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
deleted file mode 100755
index 68c80d0ec1ec..000000000000
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-lib_dir=$(dirname $0)/../../../net/forwarding
-
-ALL_TESTS="
- shared_block_drop_test
- egress_redirect_test
- multi_mirror_test
-"
-NUM_NETIFS=2
-
-source $lib_dir/tc_common.sh
-source $lib_dir/lib.sh
-
-switch_create()
-{
- simple_if_init $swp1 192.0.2.1/24
- simple_if_init $swp2 192.0.2.2/24
-}
-
-switch_destroy()
-{
- simple_if_fini $swp2 192.0.2.2/24
- simple_if_fini $swp1 192.0.2.1/24
-}
-
-shared_block_drop_test()
-{
- RET=0
-
- # It is forbidden in mlxsw driver to have mixed-bound
- # shared block with a drop rule.
-
- tc qdisc add dev $swp1 ingress_block 22 clsact
- check_err $? "Failed to create clsact with ingress block"
-
- tc filter add block 22 protocol ip pref 1 handle 101 flower \
- skip_sw dst_ip 192.0.2.2 action drop
- check_err $? "Failed to add drop rule to ingress bound block"
-
- tc qdisc add dev $swp2 ingress_block 22 clsact
- check_err $? "Failed to create another clsact with ingress shared block"
-
- tc qdisc del dev $swp2 clsact
-
- tc qdisc add dev $swp2 egress_block 22 clsact
- check_fail $? "Incorrect success to create another clsact with egress shared block"
-
- tc filter del block 22 protocol ip pref 1 handle 101 flower
-
- tc qdisc add dev $swp2 egress_block 22 clsact
- check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed"
-
- tc filter add block 22 protocol ip pref 1 handle 101 flower \
- skip_sw dst_ip 192.0.2.2 action drop
- check_fail $? "Incorrect success to add drop rule to mixed bound block"
-
- tc qdisc del dev $swp1 clsact
-
- tc qdisc add dev $swp1 egress_block 22 clsact
- check_err $? "Failed to create another clsact with egress shared block"
-
- tc filter add block 22 protocol ip pref 1 handle 101 flower \
- skip_sw dst_ip 192.0.2.2 action drop
- check_err $? "Failed to add drop rule to egress bound shared block"
-
- tc filter del block 22 protocol ip pref 1 handle 101 flower
-
- tc qdisc del dev $swp2 clsact
- tc qdisc del dev $swp1 clsact
-
- log_test "shared block drop"
-}
-
-egress_redirect_test()
-{
- RET=0
-
- # It is forbidden in mlxsw driver to have mirred redirect on
- # egress-bound block.
-
- tc qdisc add dev $swp1 ingress_block 22 clsact
- check_err $? "Failed to create clsact with ingress block"
-
- tc filter add block 22 protocol ip pref 1 handle 101 flower \
- skip_sw dst_ip 192.0.2.2 \
- action mirred egress redirect dev $swp2
- check_err $? "Failed to add redirect rule to ingress bound block"
-
- tc qdisc add dev $swp2 ingress_block 22 clsact
- check_err $? "Failed to create another clsact with ingress shared block"
-
- tc qdisc del dev $swp2 clsact
-
- tc qdisc add dev $swp2 egress_block 22 clsact
- check_fail $? "Incorrect success to create another clsact with egress shared block"
-
- tc filter del block 22 protocol ip pref 1 handle 101 flower
-
- tc qdisc add dev $swp2 egress_block 22 clsact
- check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed"
-
- tc filter add block 22 protocol ip pref 1 handle 101 flower \
- skip_sw dst_ip 192.0.2.2 \
- action mirred egress redirect dev $swp2
- check_fail $? "Incorrect success to add redirect rule to mixed bound block"
-
- tc qdisc del dev $swp1 clsact
-
- tc qdisc add dev $swp1 egress_block 22 clsact
- check_err $? "Failed to create another clsact with egress shared block"
-
- tc filter add block 22 protocol ip pref 1 handle 101 flower \
- skip_sw dst_ip 192.0.2.2 \
- action mirred egress redirect dev $swp2
- check_fail $? "Incorrect success to add redirect rule to egress bound shared block"
-
- tc qdisc del dev $swp2 clsact
-
- tc filter add block 22 protocol ip pref 1 handle 101 flower \
- skip_sw dst_ip 192.0.2.2 \
- action mirred egress redirect dev $swp2
- check_fail $? "Incorrect success to add redirect rule to egress bound block"
-
- tc qdisc del dev $swp1 clsact
-
- log_test "shared block drop"
-}
-
-multi_mirror_test()
-{
- RET=0
-
- # It is forbidden in mlxsw driver to have multiple mirror
- # actions in a single rule.
-
- tc qdisc add dev $swp1 clsact
-
- tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
- skip_sw dst_ip 192.0.2.2 \
- action mirred egress mirror dev $swp2
- check_err $? "Failed to add rule with single mirror action"
-
- tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
-
- tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
- skip_sw dst_ip 192.0.2.2 \
- action mirred egress mirror dev $swp2 \
- action mirred egress mirror dev $swp1
- check_fail $? "Incorrect success to add rule with two mirror actions"
-
- tc qdisc del dev $swp1 clsact
-
- log_test "multi mirror"
-}
-
-setup_prepare()
-{
- swp1=${NETIFS[p1]}
- swp2=${NETIFS[p2]}
-
- vrf_prepare
-
- switch_create
-}
-
-cleanup()
-{
- pre_cleanup
-
- switch_destroy
-
- vrf_cleanup
-}
-
-check_tc_shblock_support
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
new file mode 100755
index 000000000000..448b75c1545a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that policers shared by different tc filters are correctly reference
+# counted by observing policers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ tc_police_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+tc_police_occ_get()
+{
+ devlink_resource_occ_get global_policers single_rate_policers
+}
+
+tc_police_occ_test()
+{
+ RET=0
+
+ local occ=$(tc_police_occ_get)
+
+ tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/ok
+ (( occ + 1 == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+ tc filter del dev $swp1 ingress pref 1 handle 101 flower
+ (( occ == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+ tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/ok \
+ index 10
+ tc filter add dev $swp1 ingress pref 2 handle 102 proto ip \
+ flower skip_sw action police index 10
+
+ (( occ + 1 == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+ tc filter del dev $swp1 ingress pref 2 handle 102 flower
+ (( occ + 1 == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+ tc filter del dev $swp1 ingress pref 1 handle 101 flower
+ (( occ == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+ log_test "tc police occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
new file mode 100644
index 000000000000..3e3e06ea5703
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TC_POLICE_NUM_NETIFS=2
+
+tc_police_h1_create()
+{
+ simple_if_init $h1
+}
+
+tc_police_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+tc_police_switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+tc_police_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+tc_police_addr()
+{
+ local num=$1; shift
+
+ printf "2001:db8:1::%x" $num
+}
+
+tc_police_rules_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ TC_POLICE_BATCH_FILE="$(mktemp)"
+
+ for ((i = 0; i < count; ++i)); do
+ cat >> $TC_POLICE_BATCH_FILE <<-EOF
+ filter add dev $swp1 ingress \
+ prot ipv6 \
+ pref 1000 \
+ flower skip_sw dst_ip $(tc_police_addr $i) \
+ action police rate 10mbit burst 100k \
+ conform-exceed drop/ok
+ EOF
+ done
+
+ tc -b $TC_POLICE_BATCH_FILE
+ check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_police_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ tc_police_rules_create $count $should_fail
+
+ offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l)
+ ((offload_count == count))
+ check_err_fail $should_fail $? "tc police offload count"
+}
+
+tc_police_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ if ! tc_offload_check $TC_POLICE_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ __tc_police_test $count $should_fail
+}
+
+tc_police_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ tc_police_h1_create
+ tc_police_switch_create
+}
+
+tc_police_cleanup()
+{
+ pre_cleanup
+
+ tc_police_switch_destroy
+ tc_police_h1_destroy
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
new file mode 100755
index 000000000000..553cb9fad508
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -0,0 +1,394 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ shared_block_drop_test
+ egress_redirect_test
+ multi_mirror_test
+ matchall_sample_egress_test
+ matchall_mirror_behind_flower_ingress_test
+ matchall_sample_behind_flower_ingress_test
+ matchall_mirror_behind_flower_egress_test
+ police_limits_test
+ multi_police_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.1/24
+ simple_if_init $swp2 192.0.2.2/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.2/24
+ simple_if_fini $swp1 192.0.2.1/24
+}
+
+shared_block_drop_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have mixed-bound
+ # shared block with a drop rule.
+
+ tc qdisc add dev $swp1 ingress_block 22 clsact
+ check_err $? "Failed to create clsact with ingress block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add drop rule to ingress bound block"
+
+ tc qdisc add dev $swp2 ingress_block 22 clsact
+ check_err $? "Failed to create another clsact with ingress shared block"
+
+ tc qdisc del dev $swp2 clsact
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_fail $? "Incorrect success to add drop rule to mixed bound block"
+
+ tc qdisc del dev $swp1 clsact
+
+ tc qdisc add dev $swp1 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add drop rule to egress bound shared block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ log_test "shared block drop"
+}
+
+egress_redirect_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have mirred redirect on
+ # egress-bound block.
+
+ tc qdisc add dev $swp1 ingress_block 22 clsact
+ check_err $? "Failed to create clsact with ingress block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_err $? "Failed to add redirect rule to ingress bound block"
+
+ tc qdisc add dev $swp2 ingress_block 22 clsact
+ check_err $? "Failed to create another clsact with ingress shared block"
+
+ tc qdisc del dev $swp2 clsact
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_fail $? "Incorrect success to add redirect rule to mixed bound block"
+
+ tc qdisc del dev $swp1 clsact
+
+ tc qdisc add dev $swp1 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_fail $? "Incorrect success to add redirect rule to egress bound shared block"
+
+ tc qdisc del dev $swp2 clsact
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_fail $? "Incorrect success to add redirect rule to egress bound block"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "shared block drop"
+}
+
+multi_mirror_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have multiple mirror
+ # actions in a single rule.
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress mirror dev $swp2
+ check_err $? "Failed to add rule with single mirror action"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress mirror dev $swp2 \
+ action mirred egress mirror dev $swp1
+ check_fail $? "Incorrect success to add rule with two mirror actions"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "multi mirror"
+}
+
+matchall_sample_egress_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have matchall with sample action
+ # bound on egress
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol all pref 1 handle 101 \
+ matchall skip_sw action sample rate 100 group 1
+ check_err $? "Failed to add rule with sample action on ingress"
+
+ tc filter del dev $swp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $swp1 egress protocol all pref 1 handle 101 \
+ matchall skip_sw action sample rate 100 group 1
+ check_fail $? "Incorrect success to add rule with sample action on egress"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall sample egress"
+}
+
+matchall_behind_flower_ingress_test()
+{
+ local action=$1
+ local action_args=$2
+
+ RET=0
+
+ # On ingress, all matchall-mirror and matchall-sample
+ # rules have to be in front of the flower rules
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+
+ tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+ matchall skip_sw action $action_args
+ check_err $? "Failed to add matchall rule in front of a flower rule"
+
+ tc filter del dev $swp1 ingress protocol all pref 9 handle 102 matchall
+
+ tc filter add dev $swp1 ingress protocol all pref 11 handle 102 \
+ matchall skip_sw action $action_args
+ check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+ tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+ matchall skip_sw action $action_args
+
+ tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add flower rule behind a matchall rule"
+
+ tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol ip pref 8 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall $action flower ingress"
+}
+
+matchall_mirror_behind_flower_ingress_test()
+{
+ matchall_behind_flower_ingress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+matchall_sample_behind_flower_ingress_test()
+{
+ matchall_behind_flower_ingress_test "sample" "sample rate 100 group 1"
+}
+
+matchall_behind_flower_egress_test()
+{
+ local action=$1
+ local action_args=$2
+
+ RET=0
+
+ # On egress, all matchall-mirror rules have to be behind the flower rules
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+
+ tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+ matchall skip_sw action $action_args
+ check_err $? "Failed to add matchall rule in front of a flower rule"
+
+ tc filter del dev $swp1 egress protocol all pref 11 handle 102 matchall
+
+ tc filter add dev $swp1 egress protocol all pref 9 handle 102 \
+ matchall skip_sw action $action_args
+ check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+ tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+ tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+ matchall skip_sw action $action_args
+
+ tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add flower rule behind a matchall rule"
+
+ tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+ tc filter add dev $swp1 egress protocol ip pref 12 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall $action flower egress"
+}
+
+matchall_mirror_behind_flower_egress_test()
+{
+ matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+police_limits_test()
+{
+ RET=0
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 0.5kbit burst 1m conform-exceed drop/ok
+ check_fail $? "Incorrect success to add police action with too low rate"
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 2.5tbit burst 1g conform-exceed drop/ok
+ check_fail $? "Incorrect success to add police action with too high rate"
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.5kbit burst 1m conform-exceed drop/ok
+ check_err $? "Failed to add police action with low rate"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.9tbit burst 1g conform-exceed drop/ok
+ check_err $? "Failed to add police action with high rate"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.5kbit burst 512b conform-exceed drop/ok
+ check_fail $? "Incorrect success to add police action with too low burst size"
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.5kbit burst 2k conform-exceed drop/ok
+ check_err $? "Failed to add police action with low burst size"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "police rate and burst limits"
+}
+
+multi_police_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have multiple police
+ # actions in a single rule.
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/ok
+ check_err $? "Failed to add rule with single police action"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/pipe \
+ action police rate 200mbit burst 200k conform-exceed drop/ok
+ check_fail $? "Incorrect success to add rule with two police actions"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "multi police"
+}
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ vrf_cleanup
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 9f9741444549..40909c254365 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -23,6 +23,27 @@ fw_flash_test()
devlink dev flash $DL_HANDLE file dummy
check_err $? "Failed to flash with status updates on"
+ devlink dev flash $DL_HANDLE file dummy component fw.mgmt
+ check_err $? "Failed to flash with component attribute"
+
+ devlink dev flash $DL_HANDLE file dummy overwrite settings
+ check_fail $? "Flash with overwrite settings should be rejected"
+
+ echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask
+ check_err $? "Failed to change allowed overwrite mask"
+
+ devlink dev flash $DL_HANDLE file dummy overwrite settings
+ check_err $? "Failed to flash with settings overwrite enabled"
+
+ devlink dev flash $DL_HANDLE file dummy overwrite identifiers
+ check_fail $? "Flash with overwrite settings should be identifiers"
+
+ echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask
+ check_err $? "Failed to change allowed overwrite mask"
+
+ devlink dev flash $DL_HANDLE file dummy overwrite identifiers overwrite settings
+ check_err $? "Failed to flash with settings and identifiers overwrite enabled"
+
echo "n"> $DEBUGFS_DIR/fw_update_status
check_err $? "Failed to disable status updates"
@@ -146,11 +167,39 @@ regions_test()
check_region_snapshot_count dummy post-first-request 3
+ devlink region dump $DL_HANDLE/dummy snapshot 25 >> /dev/null
+ check_err $? "Failed to dump snapshot with id 25"
+
+ devlink region read $DL_HANDLE/dummy snapshot 25 addr 0 len 1 >> /dev/null
+ check_err $? "Failed to read snapshot with id 25 (1 byte)"
+
+ devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len 128 >> /dev/null
+ check_err $? "Failed to read snapshot with id 25 (128 bytes)"
+
+ devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len $((1<<32)) >> /dev/null
+ check_err $? "Failed to read snapshot with id 25 (oversized)"
+
+ devlink region read $DL_HANDLE/dummy snapshot 25 addr $((1<<32)) len 128 >> /dev/null 2>&1
+ check_fail $? "Bad read of snapshot with id 25 did not fail"
+
devlink region del $DL_HANDLE/dummy snapshot 25
check_err $? "Failed to delete snapshot with id 25"
check_region_snapshot_count dummy post-second-delete 2
+ sid=$(devlink -j region new $DL_HANDLE/dummy | jq '.[][][][]')
+ check_err $? "Failed to create a new snapshot with id allocated by the kernel"
+
+ check_region_snapshot_count dummy post-first-request 3
+
+ devlink region dump $DL_HANDLE/dummy snapshot $sid >> /dev/null
+ check_err $? "Failed to dump a snapshot with id allocated by the kernel"
+
+ devlink region del $DL_HANDLE/dummy snapshot $sid
+ check_err $? "Failed to delete snapshot with id allocated by the kernel"
+
+ check_region_snapshot_count dummy post-first-request 2
+
log_test "regions test"
}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
new file mode 100755
index 000000000000..25c896b9e2eb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0
+NSIM_NETDEV=
+num_passes=0
+num_errors=0
+
+function cleanup_nsim {
+ if [ -e $NSIM_DEV_SYS ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/del_device
+ fi
+}
+
+function cleanup {
+ cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function get_netdev_name {
+ local -n old=$1
+
+ new=$(ls /sys/class/net)
+
+ for netdev in $new; do
+ for check in $old; do
+ [ $netdev == $check ] && break
+ done
+
+ if [ $netdev != $check ]; then
+ echo $netdev
+ break
+ fi
+ done
+}
+
+function check {
+ local code=$1
+ local str=$2
+ local exp_str=$3
+
+ if [ $code -ne 0 ]; then
+ ((num_errors++))
+ return
+ fi
+
+ if [ "$str" != "$exp_str" ]; then
+ echo -e "Expected: '$exp_str', got '$str'"
+ ((num_errors++))
+ return
+ fi
+
+ ((num_passes++))
+}
+
+# Bail if ethtool is too old
+if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then
+ echo "SKIP: No --include-statistics support in ethtool"
+ exit 4
+fi
+
+# Make a netdevsim
+old_netdevs=$(ls /sys/class/net)
+
+modprobe netdevsim
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+set -o pipefail
+
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "null"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "{}"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "1"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "2"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.rx_pause_frames')
+check $? "$s" "1"
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
new file mode 100755
index 000000000000..1b08e042cf94
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -0,0 +1,953 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+VNI_GEN=$RANDOM
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID
+NSIM_NETDEV=
+HAS_ETHTOOL=
+STATIC_ENTRIES=
+EXIT_STATUS=0
+num_cases=0
+num_errors=0
+
+clean_up_devs=( )
+
+function err_cnt {
+ echo "ERROR:" $@
+ EXIT_STATUS=1
+ ((num_errors++))
+ ((num_cases++))
+}
+
+function pass_cnt {
+ ((num_cases++))
+}
+
+function cleanup_tuns {
+ for dev in "${clean_up_devs[@]}"; do
+ [ -e /sys/class/net/$dev ] && ip link del dev $dev
+ done
+ clean_up_devs=( )
+}
+
+function cleanup_nsim {
+ if [ -e $NSIM_DEV_SYS ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/del_device
+ fi
+}
+
+function cleanup {
+ cleanup_tuns
+ cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function new_vxlan {
+ local dev=$1
+ local dstport=$2
+ local lower=$3
+ local ipver=$4
+ local flags=$5
+
+ local group ipfl
+
+ [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1
+ [ "$ipver" != '6' ] || ipfl="-6"
+
+ [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))"
+
+ ip $ipfl link add $dev type vxlan \
+ group $group \
+ dev $lower \
+ dstport $dstport \
+ $flags
+
+ ip link set dev $dev up
+
+ clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+ check_tables
+}
+
+function new_geneve {
+ local dev=$1
+ local dstport=$2
+ local ipver=$3
+ local flags=$4
+
+ local group ipfl
+
+ [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2
+ [ "$ipver" != '6' ] || ipfl="-6"
+
+ [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))"
+
+ ip $ipfl link add $dev type geneve \
+ remote $remote \
+ dstport $dstport \
+ $flags
+
+ ip link set dev $dev up
+
+ clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+ check_tables
+}
+
+function del_dev {
+ local dev=$1
+
+ ip link del dev $dev
+ check_tables
+}
+
+# Helpers for netdevsim port/type encoding
+function mke {
+ local port=$1
+ local type=$2
+
+ echo $((port << 16 | type))
+}
+
+function pre {
+ local val=$1
+
+ echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))"
+}
+
+function pre_ethtool {
+ local val=$1
+ local port=$((val >> 16))
+ local type=$((val & 0xffff))
+
+ case $type in
+ 1)
+ type_name="vxlan"
+ ;;
+ 2)
+ type_name="geneve"
+ ;;
+ 4)
+ type_name="vxlan-gpe"
+ ;;
+ *)
+ type_name="bit X"
+ ;;
+ esac
+
+ echo "port $port, $type_name"
+}
+
+function check_table {
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local -n expected=$2
+ local last=$3
+
+ read -a have < $path
+
+ if [ ${#expected[@]} -ne ${#have[@]} ]; then
+ echo "check_table: BAD NUMBER OF ITEMS"
+ return 0
+ fi
+
+ for i in "${!expected[@]}"; do
+ if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then
+ pp_expected=`pre_ethtool ${expected[i]}`
+ ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null
+ if [ $? -ne 0 -a $last -ne 0 ]; then
+ err_cnt "ethtool table $1 on port $port: $pfx - $msg"
+ echo " check_table: ethtool does not contain '$pp_expected'"
+ ethtool --show-tunnels $NSIM_NETDEV
+ return 0
+
+ fi
+ fi
+
+ if [ ${expected[i]} != ${have[i]} ]; then
+ if [ $last -ne 0 ]; then
+ err_cnt "table $1 on port $port: $pfx - $msg"
+ echo " check_table: wrong entry $i"
+ echo " expected: `pre ${expected[i]}`"
+ echo " have: `pre ${have[i]}`"
+ return 0
+ fi
+ return 1
+ fi
+ done
+
+ pass_cnt
+ return 0
+}
+
+function check_tables {
+ # Need retries in case we have workqueue making the changes
+ local retries=10
+
+ while ! check_table 0 exp0 $((retries == 0)); do
+ sleep 0.02
+ ((retries--))
+ done
+ while ! check_table 1 exp1 $((retries == 0)); do
+ sleep 0.02
+ ((retries--))
+ done
+
+ if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then
+ fail=0
+ for i in "${!STATIC_ENTRIES[@]}"; do
+ pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}`
+ cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected")
+ if [ $cnt -ne 1 ]; then
+ err_cnt "ethtool static entry: $pfx - $msg"
+ echo " check_table: ethtool does not contain '$pp_expected'"
+ ethtool --show-tunnels $NSIM_NETDEV
+ fail=1
+ fi
+ done
+ [ $fail == 0 ] && pass_cnt
+ fi
+}
+
+function print_table {
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ read -a have < $path
+
+ tree $NSIM_DEV_DFS/
+
+ echo "Port $port table $1:"
+
+ for i in "${!have[@]}"; do
+ echo " `pre ${have[i]}`"
+ done
+
+}
+
+function print_tables {
+ print_table 0
+ print_table 1
+}
+
+function get_netdev_name {
+ local -n old=$1
+
+ new=$(ls /sys/class/net)
+
+ for netdev in $new; do
+ for check in $old; do
+ [ $netdev == $check ] && break
+ done
+
+ if [ $netdev != $check ]; then
+ echo $netdev
+ break
+ fi
+ done
+}
+
+###
+### Code start
+###
+
+# Probe ethtool support
+ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y
+
+modprobe netdevsim
+
+# Basic test
+pfx="basic"
+
+for port in 0 1; do
+ old_netdevs=$(ls /sys/class/net)
+ if [ $port -eq 0 ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/new_device
+ else
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ echo 1 > $NSIM_DEV_SYS/new_port
+ fi
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+ msg="new NIC device created"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ check_tables
+
+ msg="VxLAN v4 devices"
+ exp0=( `mke 4789 1` 0 0 0 )
+ new_vxlan vxlan0 4789 $NSIM_NETDEV
+ new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+ msg="VxLAN v4 devices go down"
+ exp0=( 0 0 0 0 )
+ ifconfig vxlan1 down
+ ifconfig vxlan0 down
+ check_tables
+
+ msg="VxLAN v6 devices"
+ exp0=( `mke 4789 1` 0 0 0 )
+ new_vxlan vxlanA 4789 $NSIM_NETDEV 6
+
+ for ifc in vxlan0 vxlan1; do
+ ifconfig $ifc up
+ done
+
+ new_vxlan vxlanB 4789 $NSIM_NETDEV 6
+
+ msg="another VxLAN v6 devices"
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+ msg="Geneve device"
+ exp1=( `mke 6081 2` 0 0 0 )
+ new_geneve gnv0 6081
+
+ msg="NIC device goes down"
+ ifconfig $NSIM_NETDEV down
+ if [ $port -eq 1 ]; then
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ fi
+ check_tables
+ msg="NIC device goes up again"
+ ifconfig $NSIM_NETDEV up
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ exp1=( `mke 6081 2` 0 0 0 )
+ check_tables
+
+ cleanup_tuns
+
+ msg="tunnels destroyed"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ check_tables
+
+ modprobe -r geneve
+ modprobe -r vxlan
+ modprobe -r udp_tunnel
+
+ check_tables
+done
+
+modprobe -r netdevsim
+
+# Module tests
+pfx="module tests"
+
+if modinfo netdevsim | grep udp_tunnel >/dev/null; then
+ err_cnt "netdevsim depends on udp_tunnel"
+else
+ pass_cnt
+fi
+
+modprobe netdevsim
+
+old_netdevs=$(ls /sys/class/net)
+port=0
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
+echo 0 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+modprobe -r vxlan
+modprobe -r udp_tunnel
+
+msg="remove tunnels"
+exp0=( 0 0 0 0 )
+check_tables
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+exp0=( 0 0 0 0 )
+
+modprobe -r netdevsim
+modprobe netdevsim
+
+# Overflow the table
+
+function overflow_table0 {
+ local pfx=$1
+
+ msg="create VxLANs 1/5"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="create VxLANs 2/5"
+ exp0=( `mke 10000 1` `mke 10001 1` 0 0 )
+ new_vxlan vxlan1 10001 $NSIM_NETDEV
+
+ msg="create VxLANs 3/5"
+ exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 )
+ new_vxlan vxlan2 10002 $NSIM_NETDEV
+
+ msg="create VxLANs 4/5"
+ exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` )
+ new_vxlan vxlan3 10003 $NSIM_NETDEV
+
+ msg="create VxLANs 5/5"
+ new_vxlan vxlan4 10004 $NSIM_NETDEV
+}
+
+function overflow_table1 {
+ local pfx=$1
+
+ msg="create GENEVE 1/5"
+ exp1=( `mke 20000 2` 0 0 0 )
+ new_geneve gnv0 20000
+
+ msg="create GENEVE 2/5"
+ exp1=( `mke 20000 2` `mke 20001 2` 0 0 )
+ new_geneve gnv1 20001
+
+ msg="create GENEVE 3/5"
+ exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 )
+ new_geneve gnv2 20002
+
+ msg="create GENEVE 4/5"
+ exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` )
+ new_geneve gnv3 20003
+
+ msg="create GENEVE 5/5"
+ new_geneve gnv4 20004
+}
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ overflow_table0 "overflow NIC table"
+ overflow_table1 "overflow NIC table"
+
+ msg="replace VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+ del_dev vxlan1
+
+ msg="vacate VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+ del_dev vxlan2
+
+ msg="replace GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+ del_dev gnv1
+
+ msg="vacate GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+ del_dev gnv2
+
+ msg="table sharing - share"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+ new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+ msg="table sharing - overflow"
+ new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+ msg="table sharing - overflow v6"
+ new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+ exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+ del_dev gnv4
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Sync all
+pfx="sync all"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ overflow_table0 "overflow NIC table"
+ overflow_table1 "overflow NIC table"
+
+ msg="replace VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+ del_dev vxlan1
+
+ msg="vacate VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+ del_dev vxlan2
+
+ msg="replace GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+ del_dev gnv1
+
+ msg="vacate GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+ del_dev gnv2
+
+ msg="table sharing - share"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+ new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+ msg="table sharing - overflow"
+ new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+ msg="table sharing - overflow v6"
+ new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+ exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+ del_dev gnv4
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Destroy full NIC
+pfx="destroy full"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ overflow_table0 "destroy NIC"
+ overflow_table1 "destroy NIC"
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# IPv4 only
+pfx="IPv4 only"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v6"
+ new_vxlan vxlanA1 10000 $NSIM_NETDEV 6
+
+ ip link set dev vxlanA0 down
+ ip link set dev vxlanA0 up
+ check_tables
+
+ msg="create VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="down VxLANs v4"
+ exp0=( 0 0 0 0 )
+ ip link set dev vxlan0 down
+ check_tables
+
+ msg="up VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ip link set dev vxlan0 up
+ check_tables
+
+ msg="destroy VxLANs v4"
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+
+ msg="recreate VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ del_dev vxlanA0
+ del_dev vxlanA1
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Failures
+pfx="error injection"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+
+ msg="1 - create VxLANs v6"
+ exp0=( 0 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="1 - create VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="1 - remove VxLANs v4"
+ del_dev vxlan0
+
+ msg="1 - remove VxLANs v6"
+ exp0=( 0 0 0 0 )
+ del_dev vxlanA0
+
+ msg="2 - create GENEVE"
+ exp1=( `mke 20000 2` 0 0 0 )
+ new_geneve gnv0 20000
+
+ msg="2 - destroy GENEVE"
+ echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ exp1=( `mke 20000 2` 0 0 0 )
+ del_dev gnv0
+
+ msg="2 - create second GENEVE"
+ exp1=( 0 `mke 20001 2` 0 0 )
+ new_geneve gnv0 20001
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# netdev flags
+pfx="netdev flags"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v4"
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="turn off"
+ exp0=( 0 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn on"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="remove both"
+ del_dev vxlanA0
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+ check_tables
+
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+
+ msg="create VxLANs v4 - off"
+ exp0=( 0 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="created off - turn on"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# device initiated reset
+pfx="reset notification"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v4"
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="NIC device goes down"
+ ifconfig $NSIM_NETDEV down
+ if [ $port -eq 1 ]; then
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ fi
+ check_tables
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="NIC device goes up again"
+ ifconfig $NSIM_NETDEV up
+ exp0=( `mke 10000 1` 0 0 0 )
+ check_tables
+
+ msg="remove both"
+ del_dev vxlanA0
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+ check_tables
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# shared port tables
+pfx="table sharing"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
+echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+echo 1 > $NSIM_DEV_DFS/udp_ports_shared
+
+old_netdevs=$(ls /sys/class/net)
+echo 1 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+old_netdevs=$(ls /sys/class/net)
+echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV2=`get_netdev_name old_netdevs`
+
+msg="VxLAN v4 devices"
+exp0=( `mke 4789 1` 0 0 0 )
+exp1=( 0 0 0 0 )
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV2
+
+msg="VxLAN v4 devices go down"
+exp0=( 0 0 0 0 )
+ifconfig vxlan1 down
+ifconfig vxlan0 down
+check_tables
+
+for ifc in vxlan0 vxlan1; do
+ ifconfig $ifc up
+done
+
+msg="VxLAN v6 device"
+exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+msg="Geneve device"
+exp1=( `mke 6081 2` 0 0 0 )
+new_geneve gnv0 6081
+
+msg="NIC device goes down"
+ifconfig $NSIM_NETDEV down
+check_tables
+
+msg="NIC device goes up again"
+ifconfig $NSIM_NETDEV up
+check_tables
+
+for i in `seq 2`; do
+ msg="turn feature off - 1, rep $i"
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn feature off - 2, rep $i"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn feature on - 1, rep $i"
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ exp1=( `mke 6081 2` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="turn feature on - 2, rep $i"
+ ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on
+ check_tables
+done
+
+msg="tunnels destroyed 1"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+overflow_table0 "overflow NIC table"
+
+msg="re-add a port"
+
+echo 2 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/new_port
+check_tables
+
+msg="replace VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+del_dev vxlan1
+
+msg="vacate VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+del_dev vxlan2
+
+echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+check_tables
+
+msg="tunnels destroyed 2"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+echo 1 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/del_port
+
+cleanup_nsim
+
+# Static IANA port
+pfx="static IANA vxlan"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan
+STATIC_ENTRIES=( `mke 4789 1` )
+
+port=1
+old_netdevs=$(ls /sys/class/net)
+echo $port > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="check empty"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="add on static port"
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+msg="add on different port"
+exp0=( `mke 4790 1` 0 0 0 )
+new_vxlan vxlan2 4790 $NSIM_NETDEV
+
+cleanup_tuns
+
+msg="tunnels destroyed"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="different type"
+new_geneve gnv0 4789
+
+cleanup_tuns
+cleanup_nsim
+
+# END
+
+modprobe -r netdevsim
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $num_cases checks"
+else
+ echo "FAILED $num_errors/$num_cases checks"
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
new file mode 100755
index 000000000000..beee0d5646a6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -0,0 +1,316 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2020 NXP Semiconductors
+
+WAIT_TIME=1
+NUM_NETIFS=4
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command tcpdump
+
+#
+# +---------------------------------------------+
+# | DUT ports Generator ports |
+# | +--------+ +--------+ +--------+ +--------+ |
+# | | | | | | | | | |
+# | | eth0 | | eth1 | | eth2 | | eth3 | |
+# | | | | | | | | | |
+# +-+--------+-+--------+-+--------+-+--------+-+
+# | | | |
+# | | | |
+# | +-----------+ |
+# | |
+# +--------------------------------+
+
+eth0=${NETIFS[p1]}
+eth1=${NETIFS[p2]}
+eth2=${NETIFS[p3]}
+eth3=${NETIFS[p4]}
+
+eth0_mac="de:ad:be:ef:00:00"
+eth1_mac="de:ad:be:ef:00:01"
+eth2_mac="de:ad:be:ef:00:02"
+eth3_mac="de:ad:be:ef:00:03"
+
+# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number
+# used by the kernel driver. The numbers are:
+# VCAP IS1 lookup 0: 10000
+# VCAP IS1 lookup 1: 11000
+# VCAP IS1 lookup 2: 12000
+# VCAP IS2 lookup 0 policy 0: 20000
+# VCAP IS2 lookup 0 policy 1: 20001
+# VCAP IS2 lookup 0 policy 255: 20255
+# VCAP IS2 lookup 1 policy 0: 21000
+# VCAP IS2 lookup 1 policy 1: 21001
+# VCAP IS2 lookup 1 policy 255: 21255
+IS1()
+{
+ local lookup=$1
+
+ echo $((10000 + 1000 * lookup))
+}
+
+IS2()
+{
+ local lookup=$1
+ local pag=$2
+
+ echo $((20000 + 1000 * lookup + pag))
+}
+
+ES0()
+{
+ echo 0
+}
+
+# The Ocelot switches have a fixed ingress pipeline composed of:
+#
+# +----------------------------------------------+ +-----------------------------------------+
+# | VCAP IS1 | | VCAP IS2 |
+# | | | |
+# | +----------+ +----------+ +----------+ | | +----------+ +----------+ |
+# | | Lookup 0 | | Lookup 1 | | Lookup 2 | | --+------> PAG 0: | Lookup 0 | -> | Lookup 1 | |
+# | +----------+ -> +----------+ -> +----------+ | | | +----------+ +----------+ |
+# | |key&action| |key&action| |key&action| | | | |key&action| |key&action| |
+# | |key&action| |key&action| |key&action| | | | | .. | | .. | |
+# | | .. | | .. | | .. | | | | +----------+ +----------+ |
+# | +----------+ +----------+ +----------+ | | | |
+# | selects PAG | | | +----------+ +----------+ |
+# +----------------------------------------------+ +------> PAG 1: | Lookup 0 | -> | Lookup 1 | |
+# | | +----------+ +----------+ |
+# | | |key&action| |key&action| |
+# | | | .. | | .. | |
+# | | +----------+ +----------+ |
+# | | ... |
+# | | |
+# | | +----------+ +----------+ |
+# +----> PAG 254: | Lookup 0 | -> | Lookup 1 | |
+# | | +----------+ +----------+ |
+# | | |key&action| |key&action| |
+# | | | .. | | .. | |
+# | | +----------+ +----------+ |
+# | | |
+# | | +----------+ +----------+ |
+# +----> PAG 255: | Lookup 0 | -> | Lookup 1 | |
+# | +----------+ +----------+ |
+# | |key&action| |key&action| |
+# | | .. | | .. | |
+# | +----------+ +----------+ |
+# +-----------------------------------------+
+#
+# Both the VCAP IS1 (Ingress Stage 1) and IS2 (Ingress Stage 2) are indexed
+# (looked up) multiple times: IS1 3 times, and IS2 2 times. Each filter
+# (key and action pair) can be configured to only match during the first, or
+# second, etc, lookup.
+#
+# During one TCAM lookup, the filter processing stops at the first entry that
+# matches, then the pipeline jumps to the next lookup.
+# The driver maps each individual lookup of each individual ingress TCAM to a
+# separate chain number. For correct rule offloading, it is mandatory that each
+# filter installed in one TCAM is terminated by a non-optional GOTO action to
+# the next lookup from the fixed pipeline.
+#
+# A chain can only be used if there is a GOTO action correctly set up from the
+# prior lookup in the processing pipeline. Setting up all chains is not
+# mandatory.
+
+# NOTE: VCAP IS1 currently uses only S1_NORMAL half keys and VCAP IS2
+# dynamically chooses between MAC_ETYPE, ARP, IP4_TCP_UDP, IP4_OTHER, which are
+# all half keys as well.
+
+create_tcam_skeleton()
+{
+ local eth=$1
+
+ tc qdisc add dev $eth clsact
+
+ # VCAP IS1 is the Ingress Classification TCAM and can offload the
+ # following actions:
+ # - skbedit priority
+ # - vlan pop
+ # - vlan modify
+ # - goto (only in lookup 2, the last IS1 lookup)
+ tc filter add dev $eth ingress chain 0 pref 49152 flower \
+ skip_sw action goto chain $(IS1 0)
+ tc filter add dev $eth ingress chain $(IS1 0) pref 49152 \
+ flower skip_sw action goto chain $(IS1 1)
+ tc filter add dev $eth ingress chain $(IS1 1) pref 49152 \
+ flower skip_sw action goto chain $(IS1 2)
+ tc filter add dev $eth ingress chain $(IS1 2) pref 49152 \
+ flower skip_sw action goto chain $(IS2 0 0)
+
+ # VCAP IS2 is the Security Enforcement ingress TCAM and can offload the
+ # following actions:
+ # - trap
+ # - drop
+ # - police
+ # The two VCAP IS2 lookups can be segmented into up to 256 groups of
+ # rules, called Policies. A Policy is selected through the Policy
+ # Association Group (PAG) action of VCAP IS1 (which is the
+ # GOTO offload).
+ tc filter add dev $eth ingress chain $(IS2 0 0) pref 49152 \
+ flower skip_sw action goto chain $(IS2 1 0)
+}
+
+setup_prepare()
+{
+ create_tcam_skeleton $eth0
+
+ ip link add br0 type bridge
+ ip link set $eth0 master br0
+ ip link set $eth1 master br0
+ ip link set br0 up
+
+ ip link add link $eth3 name $eth3.100 type vlan id 100
+ ip link set $eth3.100 up
+
+ ip link add link $eth3 name $eth3.200 type vlan id 200
+ ip link set $eth3.200 up
+
+ tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \
+ protocol 802.1Q flower skip_sw vlan_id 100 \
+ action vlan pop \
+ action goto chain $(IS1 2)
+
+ tc filter add dev $eth0 egress chain $(ES0) pref 1 \
+ flower skip_sw indev $eth1 \
+ action vlan push protocol 802.1Q id 100
+
+ tc filter add dev $eth0 ingress chain $(IS1 0) pref 2 \
+ protocol ipv4 flower skip_sw src_ip 10.1.1.2 \
+ action skbedit priority 7 \
+ action goto chain $(IS1 1)
+
+ tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
+ protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
+ action police rate 50mbit burst 64k \
+ action goto chain $(IS2 1 0)
+}
+
+cleanup()
+{
+ ip link del $eth3.200
+ ip link del $eth3.100
+ tc qdisc del dev $eth0 clsact
+ ip link del br0
+}
+
+test_vlan_pop()
+{
+ printf "Testing VLAN pop.. "
+
+ tcpdump_start $eth2
+
+ # Work around Mausezahn VLAN builder bug
+ # (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using
+ # an 8021q upper
+ $MZ $eth3.100 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop
+
+ if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
+ echo "OK"
+ else
+ echo "FAIL"
+ fi
+
+ tcpdump_cleanup
+}
+
+test_vlan_push()
+{
+ printf "Testing VLAN push.. "
+
+ tcpdump_start $eth3.100
+
+ $MZ $eth2 -q -c 1 -p 64 -a $eth2_mac -b $eth3_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop
+
+ if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then
+ echo "OK"
+ else
+ echo "FAIL"
+ fi
+
+ tcpdump_cleanup
+}
+
+test_vlan_modify()
+{
+ printf "Testing VLAN modification.. "
+
+ ip link set br0 type bridge vlan_filtering 1
+ bridge vlan add dev $eth0 vid 200
+ bridge vlan add dev $eth0 vid 300
+ bridge vlan add dev $eth1 vid 300
+
+ tc filter add dev $eth0 ingress chain $(IS1 2) pref 3 \
+ protocol 802.1Q flower skip_sw vlan_id 200 \
+ action vlan modify id 300 \
+ action goto chain $(IS2 0 0)
+
+ tcpdump_start $eth2
+
+ $MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop
+
+ if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
+ echo "OK"
+ else
+ echo "FAIL"
+ fi
+
+ tcpdump_cleanup
+
+ tc filter del dev $eth0 ingress chain $(IS1 2) pref 3
+
+ bridge vlan del dev $eth0 vid 200
+ bridge vlan del dev $eth0 vid 300
+ bridge vlan del dev $eth1 vid 300
+ ip link set br0 type bridge vlan_filtering 0
+}
+
+test_skbedit_priority()
+{
+ local num_pkts=100
+
+ printf "Testing frame prioritization.. "
+
+ before=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+
+ $MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2
+
+ after=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+
+ if [ $((after - before)) = $num_pkts ]; then
+ echo "OK"
+ else
+ echo "FAIL"
+ fi
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_vlan_pop
+ test_vlan_push
+ test_vlan_modify
+ test_skbedit_priority
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index c078ece12ff0..9e2f00343f15 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -7,5 +7,8 @@ execveat.moved
execveat.path.ephemeral
execveat.ephemeral
execveat.denatured
+/load_address_*
/recursion-depth
xxxxxxxx*
+pipe
+S_I*.test
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 33339e31e365..cf69b2fcce59 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -3,14 +3,16 @@ CFLAGS = -Wall
CFLAGS += -Wno-nonnull
CFLAGS += -D_GNU_SOURCE
-TEST_GEN_PROGS := execveat
-TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
+TEST_PROGS := binfmt_script non-regular
+TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
TEST_GEN_PROGS += recursion-depth
-EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
+EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \
+ $(OUTPUT)/S_I*.test
include ../lib.mk
@@ -25,4 +27,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
cp $< $@
chmod -x $@
-
+$(OUTPUT)/load_address_4096: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@
+$(OUTPUT)/load_address_2097152: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@
+$(OUTPUT)/load_address_16777216: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@
diff --git a/tools/testing/selftests/exec/binfmt_script b/tools/testing/selftests/exec/binfmt_script
new file mode 100755
index 000000000000..05f94a741c7a
--- /dev/null
+++ b/tools/testing/selftests/exec/binfmt_script
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that truncation of bprm->buf doesn't cause unexpected execs paths, along
+# with various other pathological cases.
+import os, subprocess
+
+# Relevant commits
+#
+# b5372fe5dc84 ("exec: load_script: Do not exec truncated interpreter path")
+# 6eb3c3d0a52d ("exec: increase BINPRM_BUF_SIZE to 256")
+
+# BINPRM_BUF_SIZE
+SIZE=256
+
+NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
+
+test_num=0
+
+code='''#!/usr/bin/perl
+print "Executed interpreter! Args:\n";
+print "0 : '$0'\n";
+$counter = 1;
+foreach my $a (@ARGV) {
+ print "$counter : '$a'\n";
+ $counter++;
+}
+'''
+
+##
+# test - produce a binfmt_script hashbang line for testing
+#
+# @size: bytes for bprm->buf line, including hashbang but not newline
+# @good: whether this script is expected to execute correctly
+# @hashbang: the special 2 bytes for running binfmt_script
+# @leading: any leading whitespace before the executable path
+# @root: start of executable pathname
+# @target: end of executable pathname
+# @arg: bytes following the executable pathname
+# @fill: character to fill between @root and @target to reach @size bytes
+# @newline: character to use as newline, not counted towards @size
+# ...
+def test(name, size, good=True, leading="", root="./", target="/perl",
+ fill="A", arg="", newline="\n", hashbang="#!"):
+ global test_num, tests, NAME_MAX
+ test_num += 1
+ if test_num > tests:
+ raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
+ % (test_num, tests))
+
+ middle = ""
+ remaining = size - len(hashbang) - len(leading) - len(root) - len(target) - len(arg)
+ # The middle of the pathname must not exceed NAME_MAX
+ while remaining >= NAME_MAX:
+ middle += fill * (NAME_MAX - 1)
+ middle += '/'
+ remaining -= NAME_MAX
+ middle += fill * remaining
+
+ dirpath = root + middle
+ binary = dirpath + target
+ if len(target):
+ os.makedirs(dirpath, mode=0o755, exist_ok=True)
+ open(binary, "w").write(code)
+ os.chmod(binary, 0o755)
+
+ buf=hashbang + leading + root + middle + target + arg + newline
+ if len(newline) > 0:
+ buf += 'echo this is not really perl\n'
+
+ script = "binfmt_script-%s" % (name)
+ open(script, "w").write(buf)
+ os.chmod(script, 0o755)
+
+ proc = subprocess.Popen(["./%s" % (script)], shell=True,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ stdout = proc.communicate()[0]
+
+ if proc.returncode == 0 and b'Executed interpreter' in stdout:
+ if good:
+ print("ok %d - binfmt_script %s (successful good exec)"
+ % (test_num, name))
+ else:
+ print("not ok %d - binfmt_script %s succeeded when it should have failed"
+ % (test_num, name))
+ else:
+ if good:
+ print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
+ % (test_num, name, proc.returncode))
+ else:
+ print("ok %d - binfmt_script %s (correctly failed bad exec)"
+ % (test_num, name))
+
+ # Clean up crazy binaries
+ os.unlink(script)
+ if len(target):
+ elements = binary.split('/')
+ os.unlink(binary)
+ elements.pop()
+ while len(elements) > 1:
+ os.rmdir("/".join(elements))
+ elements.pop()
+
+tests=27
+print("TAP version 1.3")
+print("1..%d" % (tests))
+
+### FAIL (8 tests)
+
+# Entire path is well past the BINFMT_BUF_SIZE.
+test(name="too-big", size=SIZE+80, good=False)
+# Path is right at max size, making it impossible to tell if it was truncated.
+test(name="exact", size=SIZE, good=False)
+# Same as above, but with leading whitespace.
+test(name="exact-space", size=SIZE, good=False, leading=" ")
+# Huge buffer of only whitespace.
+test(name="whitespace-too-big", size=SIZE+71, good=False, root="",
+ fill=" ", target="")
+# A good path, but it gets truncated due to leading whitespace.
+test(name="truncated", size=SIZE+17, good=False, leading=" " * 19)
+# Entirely empty except for #!
+test(name="empty", size=2, good=False, root="",
+ fill="", target="", newline="")
+# Within size, but entirely spaces
+test(name="spaces", size=SIZE-1, good=False, root="", fill=" ",
+ target="", newline="")
+# Newline before binary.
+test(name="newline-prefix", size=SIZE-1, good=False, leading="\n",
+ root="", fill=" ", target="")
+
+### ok (19 tests)
+
+# The original test case that was broken by commit:
+# 8099b047ecc4 ("exec: load_script: don't blindly truncate shebang string")
+test(name="test.pl", size=439, leading=" ",
+ root="./nix/store/bwav8kz8b3y471wjsybgzw84mrh4js9-perl-5.28.1/bin",
+ arg=" -I/nix/store/x6yyav38jgr924nkna62q3pkp0dgmzlx-perl5.28.1-File-Slurp-9999.25/lib/perl5/site_perl -I/nix/store/ha8v67sl8dac92r9z07vzr4gv1y9nwqz-perl5.28.1-Net-DBus-1.1.0/lib/perl5/site_perl -I/nix/store/dcrkvnjmwh69ljsvpbdjjdnqgwx90a9d-perl5.28.1-XML-Parser-2.44/lib/perl5/site_perl -I/nix/store/rmji88k2zz7h4zg97385bygcydrf2q8h-perl5.28.1-XML-Twig-3.52/lib/perl5/site_perl")
+# One byte under size, leaving newline visible.
+test(name="one-under", size=SIZE-1)
+# Two bytes under size, leaving newline visible.
+test(name="two-under", size=SIZE-2)
+# Exact size, but trailing whitespace visible instead of newline
+test(name="exact-trunc-whitespace", size=SIZE, arg=" ")
+# Exact size, but trailing space and first arg char visible instead of newline.
+test(name="exact-trunc-arg", size=SIZE, arg=" f")
+# One bute under, with confirmed non-truncated arg since newline now visible.
+test(name="one-under-full-arg", size=SIZE-1, arg=" f")
+# Short read buffer by one byte.
+test(name="one-under-no-nl", size=SIZE-1, newline="")
+# Short read buffer by half buffer size.
+test(name="half-under-no-nl", size=int(SIZE/2), newline="")
+# One byte under with whitespace arg. leaving wenline visible.
+test(name="one-under-trunc-arg", size=SIZE-1, arg=" ")
+# One byte under with whitespace leading. leaving wenline visible.
+test(name="one-under-leading", size=SIZE-1, leading=" ")
+# One byte under with whitespace leading and as arg. leaving newline visible.
+test(name="one-under-leading-trunc-arg", size=SIZE-1, leading=" ", arg=" ")
+# Same as above, but with 2 bytes under
+test(name="two-under-no-nl", size=SIZE-2, newline="")
+test(name="two-under-trunc-arg", size=SIZE-2, arg=" ")
+test(name="two-under-leading", size=SIZE-2, leading=" ")
+test(name="two-under-leading-trunc-arg", size=SIZE-2, leading=" ", arg=" ")
+# Same as above, but with buffer half filled
+test(name="two-under-no-nl", size=int(SIZE/2), newline="")
+test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ")
+test(name="two-under-leading", size=int(SIZE/2), leading=" ")
+test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
+
+if test_num != tests:
+ raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
+ % (test_num, tests))
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index cbb6efbdb786..67bf7254a48f 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -5,7 +5,9 @@
* Selftests for execveat(2).
*/
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* to get O_PATH, AT_EMPTY_PATH */
+#endif
#include <sys/sendfile.h>
#include <sys/stat.h>
#include <sys/syscall.h>
@@ -311,6 +313,10 @@ static int run_tests(void)
fail += check_execveat_fail(AT_FDCWD, fullname_symlink,
AT_SYMLINK_NOFOLLOW, ELOOP);
+ /* Non-regular file failure */
+ fail += check_execveat_fail(dot_dfd, "pipe", 0, EACCES);
+ unlink("pipe");
+
/* Shell script wrapping executable file: */
/* dfd + path */
fail += check_execveat(subdir_dfd, "../script", 0);
@@ -384,6 +390,8 @@ static void prerequisites(void)
fd = open("subdir.ephemeral/script", O_RDWR|O_CREAT|O_TRUNC, 0755);
write(fd, script, strlen(script));
close(fd);
+
+ mkfifo("pipe", 0755);
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
new file mode 100644
index 000000000000..d487c2f6a615
--- /dev/null
+++ b/tools/testing/selftests/exec/load_address.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct Statistics {
+ unsigned long long load_address;
+ unsigned long long alignment;
+};
+
+int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
+{
+ struct Statistics *stats = (struct Statistics *) data;
+ int i;
+
+ if (info->dlpi_name != NULL && info->dlpi_name[0] != '\0') {
+ // Ignore headers from other than the executable.
+ return 2;
+ }
+
+ stats->load_address = (unsigned long long) info->dlpi_addr;
+ stats->alignment = 0;
+
+ for (i = 0; i < info->dlpi_phnum; i++) {
+ if (info->dlpi_phdr[i].p_type != PT_LOAD)
+ continue;
+
+ if (info->dlpi_phdr[i].p_align > stats->alignment)
+ stats->alignment = info->dlpi_phdr[i].p_align;
+ }
+
+ return 1; // Terminate dl_iterate_phdr.
+}
+
+int main(int argc, char **argv)
+{
+ struct Statistics extracted;
+ unsigned long long misalign;
+ int ret;
+
+ ret = dl_iterate_phdr(ExtractStatistics, &extracted);
+ if (ret != 1) {
+ fprintf(stderr, "FAILED\n");
+ return 1;
+ }
+
+ if (extracted.alignment == 0) {
+ fprintf(stderr, "No alignment found\n");
+ return 1;
+ } else if (extracted.alignment & (extracted.alignment - 1)) {
+ fprintf(stderr, "Alignment is not a power of 2\n");
+ return 1;
+ }
+
+ misalign = extracted.load_address & (extracted.alignment - 1);
+ if (misalign) {
+ printf("alignment = %llu, load_address = %llu\n",
+ extracted.alignment, extracted.load_address);
+ fprintf(stderr, "FAILED\n");
+ return 1;
+ }
+
+ fprintf(stderr, "PASS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c
new file mode 100644
index 000000000000..cd3a34aca93e
--- /dev/null
+++ b/tools/testing/selftests/exec/non-regular.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#include "../kselftest_harness.h"
+
+/* Remove a file, ignoring the result if it didn't exist. */
+void rm(struct __test_metadata *_metadata, const char *pathname,
+ int is_dir)
+{
+ int rc;
+
+ if (is_dir)
+ rc = rmdir(pathname);
+ else
+ rc = unlink(pathname);
+
+ if (rc < 0) {
+ ASSERT_EQ(errno, ENOENT) {
+ TH_LOG("Not ENOENT: %s", pathname);
+ }
+ } else {
+ ASSERT_EQ(rc, 0) {
+ TH_LOG("Failed to remove: %s", pathname);
+ }
+ }
+}
+
+FIXTURE(file) {
+ char *pathname;
+ int is_dir;
+};
+
+FIXTURE_VARIANT(file)
+{
+ const char *name;
+ int expected;
+ int is_dir;
+ void (*setup)(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant);
+ int major, minor, mode; /* for mknod() */
+};
+
+void setup_link(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ const char * const paths[] = {
+ "/bin/true",
+ "/usr/bin/true",
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(paths); i++) {
+ if (access(paths[i], X_OK) == 0) {
+ ASSERT_EQ(symlink(paths[i], self->pathname), 0);
+ return;
+ }
+ }
+ ASSERT_EQ(1, 0) {
+ TH_LOG("Could not find viable 'true' binary");
+ }
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFLNK)
+{
+ .name = "S_IFLNK",
+ .expected = ELOOP,
+ .setup = setup_link,
+};
+
+void setup_dir(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ ASSERT_EQ(mkdir(self->pathname, 0755), 0);
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFDIR)
+{
+ .name = "S_IFDIR",
+ .is_dir = 1,
+ .expected = EACCES,
+ .setup = setup_dir,
+};
+
+void setup_node(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ dev_t dev;
+ int rc;
+
+ dev = makedev(variant->major, variant->minor);
+ rc = mknod(self->pathname, 0755 | variant->mode, dev);
+ ASSERT_EQ(rc, 0) {
+ if (errno == EPERM)
+ SKIP(return, "Please run as root; cannot mknod(%s)",
+ variant->name);
+ }
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFBLK)
+{
+ .name = "S_IFBLK",
+ .expected = EACCES,
+ .setup = setup_node,
+ /* /dev/loop0 */
+ .major = 7,
+ .minor = 0,
+ .mode = S_IFBLK,
+};
+
+FIXTURE_VARIANT_ADD(file, S_IFCHR)
+{
+ .name = "S_IFCHR",
+ .expected = EACCES,
+ .setup = setup_node,
+ /* /dev/zero */
+ .major = 1,
+ .minor = 5,
+ .mode = S_IFCHR,
+};
+
+void setup_fifo(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ ASSERT_EQ(mkfifo(self->pathname, 0755), 0);
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFIFO)
+{
+ .name = "S_IFIFO",
+ .expected = EACCES,
+ .setup = setup_fifo,
+};
+
+FIXTURE_SETUP(file)
+{
+ ASSERT_GT(asprintf(&self->pathname, "%s.test", variant->name), 6);
+ self->is_dir = variant->is_dir;
+
+ rm(_metadata, self->pathname, variant->is_dir);
+ variant->setup(_metadata, self, variant);
+}
+
+FIXTURE_TEARDOWN(file)
+{
+ rm(_metadata, self->pathname, self->is_dir);
+}
+
+TEST_F(file, exec_errno)
+{
+ char * const argv[2] = { (char * const)self->pathname, NULL };
+
+ EXPECT_LT(execv(argv[0], argv), 0);
+ EXPECT_EQ(errno, variant->expected);
+}
+
+/* S_IFSOCK */
+FIXTURE(sock)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(sock)
+{
+ self->fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_TEARDOWN(sock)
+{
+ if (self->fd >= 0)
+ ASSERT_EQ(close(self->fd), 0);
+}
+
+TEST_F(sock, exec_errno)
+{
+ char * const argv[2] = { " magic socket ", NULL };
+ char * const envp[1] = { NULL };
+
+ EXPECT_LT(fexecve(self->fd, argv, envp), 0);
+ EXPECT_EQ(errno, EACCES);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 8a6b507e34a8..1d27f52c61e6 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -21,7 +21,6 @@
#include <linux/android/binder.h>
#include <linux/android/binderfs.h>
-#include "../../kselftest.h"
#include "../../kselftest_harness.h"
#define DEFAULT_THREADS 4
@@ -37,37 +36,26 @@
fd = -EBADF; \
}
-#define log_exit(format, ...) \
- ({ \
- fprintf(stderr, format "\n", ##__VA_ARGS__); \
- exit(EXIT_FAILURE); \
- })
-
-static void change_mountns(void)
+static void change_mountns(struct __test_metadata *_metadata)
{
int ret;
ret = unshare(CLONE_NEWNS);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n",
- strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unshare mount namespace",
+ strerror(errno));
+ }
ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to mount / as private\n",
- strerror(errno));
-}
-
-static void rmdir_protect_errno(const char *dir)
-{
- int saved_errno = errno;
- (void)rmdir(dir);
- errno = saved_errno;
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to mount / as private",
+ strerror(errno));
+ }
}
-static int __do_binderfs_test(void)
+static int __do_binderfs_test(struct __test_metadata *_metadata)
{
- int fd, ret, saved_errno;
+ int fd, ret, saved_errno, result = 1;
size_t len;
ssize_t wret;
struct binderfs_device device = { 0 };
@@ -75,113 +63,107 @@ static int __do_binderfs_test(void)
char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
- change_mountns();
+ change_mountns(_metadata);
- if (!mkdtemp(binderfs_mntpt))
- ksft_exit_fail_msg(
- "%s - Failed to create binderfs mountpoint\n",
+ EXPECT_NE(mkdtemp(binderfs_mntpt), NULL) {
+ TH_LOG("%s - Failed to create binderfs mountpoint",
strerror(errno));
+ goto out;
+ }
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
- if (ret < 0) {
- if (errno != ENODEV)
- ksft_exit_fail_msg("%s - Failed to mount binderfs\n",
- strerror(errno));
-
- rmdir_protect_errno(binderfs_mntpt);
- return 1;
+ EXPECT_EQ(ret, 0) {
+ if (errno == ENODEV)
+ XFAIL(goto out, "binderfs missing");
+ TH_LOG("%s - Failed to mount binderfs", strerror(errno));
+ goto rmdir;
}
- /* binderfs mount test passed */
- ksft_inc_pass_cnt();
+ /* success: binderfs mounted */
memcpy(device.name, "my-binder", strlen("my-binder"));
snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
fd = open(device_path, O_RDONLY | O_CLOEXEC);
- if (fd < 0)
- ksft_exit_fail_msg(
- "%s - Failed to open binder-control device\n",
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder-control device",
strerror(errno));
+ goto umount;
+ }
ret = ioctl(fd, BINDER_CTL_ADD, &device);
saved_errno = errno;
close(fd);
errno = saved_errno;
- if (ret < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg(
- "%s - Failed to allocate new binder device\n",
+ EXPECT_GE(ret, 0) {
+ TH_LOG("%s - Failed to allocate new binder device",
strerror(errno));
+ goto umount;
}
- ksft_print_msg(
- "Allocated new binder device with major %d, minor %d, and name %s\n",
+ TH_LOG("Allocated new binder device with major %d, minor %d, and name %s",
device.major, device.minor, device.name);
- /* binder device allocation test passed */
- ksft_inc_pass_cnt();
+ /* success: binder device allocation */
snprintf(device_path, sizeof(device_path), "%s/my-binder", binderfs_mntpt);
fd = open(device_path, O_CLOEXEC | O_RDONLY);
- if (fd < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg("%s - Failed to open my-binder device\n",
- strerror(errno));
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open my-binder device",
+ strerror(errno));
+ goto umount;
}
ret = ioctl(fd, BINDER_VERSION, &version);
saved_errno = errno;
close(fd);
errno = saved_errno;
- if (ret < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg(
- "%s - Failed to open perform BINDER_VERSION request\n",
+ EXPECT_GE(ret, 0) {
+ TH_LOG("%s - Failed to open perform BINDER_VERSION request",
strerror(errno));
+ goto umount;
}
- ksft_print_msg("Detected binder version: %d\n",
- version.protocol_version);
+ TH_LOG("Detected binder version: %d", version.protocol_version);
- /* binder transaction with binderfs binder device passed */
- ksft_inc_pass_cnt();
+ /* success: binder transaction with binderfs binder device */
ret = unlink(device_path);
- if (ret < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg("%s - Failed to delete binder device\n",
- strerror(errno));
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to delete binder device",
+ strerror(errno));
+ goto umount;
}
- /* binder device removal passed */
- ksft_inc_pass_cnt();
+ /* success: binder device removal */
snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
ret = unlink(device_path);
- if (!ret) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg("Managed to delete binder-control device\n");
- } else if (errno != EPERM) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg(
- "%s - Failed to delete binder-control device but exited with unexpected error code\n",
+ EXPECT_NE(ret, 0) {
+ TH_LOG("Managed to delete binder-control device");
+ goto umount;
+ }
+ EXPECT_EQ(errno, EPERM) {
+ TH_LOG("%s - Failed to delete binder-control device but exited with unexpected error code",
strerror(errno));
+ goto umount;
}
- /* binder-control device removal failed as expected */
- ksft_inc_xfail_cnt();
+ /* success: binder-control device removal failed as expected */
+ result = 0;
-on_error:
+umount:
ret = umount2(binderfs_mntpt, MNT_DETACH);
- rmdir_protect_errno(binderfs_mntpt);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unmount binderfs\n",
- strerror(errno));
-
- /* binderfs unmount test passed */
- ksft_inc_pass_cnt();
- return 0;
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unmount binderfs", strerror(errno));
+ }
+rmdir:
+ ret = rmdir(binderfs_mntpt);
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to rmdir binderfs mount", strerror(errno));
+ }
+out:
+ return result;
}
static int wait_for_pid(pid_t pid)
@@ -291,7 +273,7 @@ static int write_id_mapping(enum idmap_type type, pid_t pid, const char *buf,
return 0;
}
-static void change_userns(int syncfds[2])
+static void change_userns(struct __test_metadata *_metadata, int syncfds[2])
{
int ret;
char buf;
@@ -299,25 +281,29 @@ static void change_userns(int syncfds[2])
close_prot_errno_disarm(syncfds[1]);
ret = unshare(CLONE_NEWUSER);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unshare user namespace\n",
- strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unshare user namespace",
+ strerror(errno));
+ }
ret = write_nointr(syncfds[0], "1", 1);
- if (ret != 1)
- ksft_exit_fail_msg("write_nointr() failed\n");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("write_nointr() failed");
+ }
ret = read_nointr(syncfds[0], &buf, 1);
- if (ret != 1)
- ksft_exit_fail_msg("read_nointr() failed\n");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("read_nointr() failed");
+ }
close_prot_errno_disarm(syncfds[0]);
- if (setid_userns_root())
- ksft_exit_fail_msg("setid_userns_root() failed");
+ ASSERT_EQ(setid_userns_root(), 0) {
+ TH_LOG("setid_userns_root() failed");
+ }
}
-static void change_idmaps(int syncfds[2], pid_t pid)
+static void change_idmaps(struct __test_metadata *_metadata, int syncfds[2], pid_t pid)
{
int ret;
char buf;
@@ -326,35 +312,42 @@ static void change_idmaps(int syncfds[2], pid_t pid)
close_prot_errno_disarm(syncfds[0]);
ret = read_nointr(syncfds[1], &buf, 1);
- if (ret != 1)
- ksft_exit_fail_msg("read_nointr() failed\n");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("read_nointr() failed");
+ }
snprintf(id_map, sizeof(id_map), "0 %d 1\n", getuid());
ret = write_id_mapping(UID_MAP, pid, id_map, strlen(id_map));
- if (ret)
- ksft_exit_fail_msg("write_id_mapping(UID_MAP) failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("write_id_mapping(UID_MAP) failed");
+ }
snprintf(id_map, sizeof(id_map), "0 %d 1\n", getgid());
ret = write_id_mapping(GID_MAP, pid, id_map, strlen(id_map));
- if (ret)
- ksft_exit_fail_msg("write_id_mapping(GID_MAP) failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("write_id_mapping(GID_MAP) failed");
+ }
ret = write_nointr(syncfds[1], "1", 1);
- if (ret != 1)
- ksft_exit_fail_msg("write_nointr() failed");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("write_nointr() failed");
+ }
close_prot_errno_disarm(syncfds[1]);
}
+struct __test_metadata *_thread_metadata;
static void *binder_version_thread(void *data)
{
+ struct __test_metadata *_metadata = _thread_metadata;
int fd = PTR_TO_INT(data);
struct binder_version version = { 0 };
int ret;
ret = ioctl(fd, BINDER_VERSION, &version);
if (ret < 0)
- ksft_print_msg("%s - Failed to open perform BINDER_VERSION request\n", strerror(errno));
+ TH_LOG("%s - Failed to open perform BINDER_VERSION request\n",
+ strerror(errno));
pthread_exit(data);
}
@@ -377,68 +370,79 @@ TEST(binderfs_stress)
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to create socket pair", strerror(errno));
+ }
pid = fork();
- if (pid < 0) {
+ ASSERT_GE(pid, 0) {
+ TH_LOG("%s - Failed to fork", strerror(errno));
close_prot_errno_disarm(syncfds[0]);
close_prot_errno_disarm(syncfds[1]);
- ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
}
if (pid == 0) {
int i, j, k, nthreads;
pthread_attr_t attr;
pthread_t threads[DEFAULT_THREADS];
- change_userns(syncfds);
- change_mountns();
+ change_userns(_metadata, syncfds);
+ change_mountns(_metadata);
- if (!mkdtemp(binderfs_mntpt))
- log_exit("%s - Failed to create binderfs mountpoint\n",
- strerror(errno));
+ ASSERT_NE(mkdtemp(binderfs_mntpt), NULL) {
+ TH_LOG("%s - Failed to create binderfs mountpoint",
+ strerror(errno));
+ }
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
- if (ret < 0)
- log_exit("%s - Failed to mount binderfs\n", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to mount binderfs", strerror(errno));
+ }
for (int i = 0; i < ARRAY_SIZE(fds); i++) {
snprintf(device_path, sizeof(device_path),
"%s/binder-control", binderfs_mntpt);
fd = open(device_path, O_RDONLY | O_CLOEXEC);
- if (fd < 0)
- log_exit("%s - Failed to open binder-control device\n", strerror(errno));
+ ASSERT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder-control device",
+ strerror(errno));
+ }
memset(&device, 0, sizeof(device));
snprintf(device.name, sizeof(device.name), "%d", i);
ret = ioctl(fd, BINDER_CTL_ADD, &device);
close_prot_errno_disarm(fd);
- if (ret < 0)
- log_exit("%s - Failed to allocate new binder device\n", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to allocate new binder device",
+ strerror(errno));
+ }
snprintf(device_path, sizeof(device_path), "%s/%d",
binderfs_mntpt, i);
fds[i] = open(device_path, O_RDONLY | O_CLOEXEC);
- if (fds[i] < 0)
- log_exit("%s - Failed to open binder device\n", strerror(errno));
+ ASSERT_GE(fds[i], 0) {
+ TH_LOG("%s - Failed to open binder device", strerror(errno));
+ }
}
ret = umount2(binderfs_mntpt, MNT_DETACH);
- rmdir_protect_errno(binderfs_mntpt);
- if (ret < 0)
- log_exit("%s - Failed to unmount binderfs\n", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unmount binderfs", strerror(errno));
+ rmdir(binderfs_mntpt);
+ }
nthreads = get_nprocs_conf();
if (nthreads > DEFAULT_THREADS)
nthreads = DEFAULT_THREADS;
+ _thread_metadata = _metadata;
pthread_attr_init(&attr);
for (k = 0; k < ARRAY_SIZE(fds); k++) {
for (i = 0; i < nthreads; i++) {
ret = pthread_create(&threads[i], &attr, binder_version_thread, INT_TO_PTR(fds[k]));
if (ret) {
- ksft_print_msg("%s - Failed to create thread %d\n", strerror(errno), i);
+ TH_LOG("%s - Failed to create thread %d",
+ strerror(errno), i);
break;
}
}
@@ -448,7 +452,8 @@ TEST(binderfs_stress)
ret = pthread_join(threads[j], &fdptr);
if (ret)
- ksft_print_msg("%s - Failed to join thread %d for fd %d\n", strerror(errno), j, PTR_TO_INT(fdptr));
+ TH_LOG("%s - Failed to join thread %d for fd %d",
+ strerror(errno), j, PTR_TO_INT(fdptr));
}
}
pthread_attr_destroy(&attr);
@@ -459,11 +464,12 @@ TEST(binderfs_stress)
exit(EXIT_SUCCESS);
}
- change_idmaps(syncfds, pid);
+ change_idmaps(_metadata, syncfds, pid);
ret = wait_for_pid(pid);
- if (ret)
- ksft_exit_fail_msg("wait_for_pid() failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("wait_for_pid() failed");
+ }
}
TEST(binderfs_test_privileged)
@@ -471,7 +477,7 @@ TEST(binderfs_test_privileged)
if (geteuid() != 0)
XFAIL(return, "Tests are not run as root. Skipping privileged tests");
- if (__do_binderfs_test() == 1)
+ if (__do_binderfs_test(_metadata))
XFAIL(return, "The Android binderfs filesystem is not available");
}
@@ -482,31 +488,33 @@ TEST(binderfs_test_unprivileged)
pid_t pid;
ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to create socket pair", strerror(errno));
+ }
pid = fork();
- if (pid < 0) {
+ ASSERT_GE(pid, 0) {
close_prot_errno_disarm(syncfds[0]);
close_prot_errno_disarm(syncfds[1]);
- ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
+ TH_LOG("%s - Failed to fork", strerror(errno));
}
if (pid == 0) {
- change_userns(syncfds);
- if (__do_binderfs_test() == 1)
+ change_userns(_metadata, syncfds);
+ if (__do_binderfs_test(_metadata))
exit(2);
exit(EXIT_SUCCESS);
}
- change_idmaps(syncfds, pid);
+ change_idmaps(_metadata, syncfds, pid);
ret = wait_for_pid(pid);
if (ret) {
if (ret == 2)
XFAIL(return, "The Android binderfs filesystem is not available");
- else
- ksft_exit_fail_msg("wait_for_pid() failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("wait_for_pid() failed");
+ }
}
}
diff --git a/tools/testing/selftests/firmware/.gitignore b/tools/testing/selftests/firmware/.gitignore
new file mode 100644
index 000000000000..62abc92a94c4
--- /dev/null
+++ b/tools/testing/selftests/firmware/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+fw_namespace
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index fcc281373b4d..c2a2a100114b 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -149,6 +149,26 @@ config_unset_into_buf()
echo 0 > $DIR/config_into_buf
}
+config_set_buf_size()
+{
+ echo $1 > $DIR/config_buf_size
+}
+
+config_set_file_offset()
+{
+ echo $1 > $DIR/config_file_offset
+}
+
+config_set_partial()
+{
+ echo 1 > $DIR/config_partial
+}
+
+config_unset_partial()
+{
+ echo 0 > $DIR/config_partial
+}
+
config_set_sync_direct()
{
echo 1 > $DIR/config_sync_direct
@@ -207,6 +227,35 @@ read_firmwares()
done
}
+read_partial_firmwares()
+{
+ if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+ fwfile="${FW_INTO_BUF}"
+ else
+ fwfile="${FW}"
+ fi
+
+ if [ "$1" = "xzonly" ]; then
+ fwfile="${fwfile}-orig"
+ fi
+
+ # Strip fwfile down to match partial offset and length
+ partial_data="$(cat $fwfile)"
+ partial_data="${partial_data:$2:$3}"
+
+ for i in $(seq 0 3); do
+ config_set_read_fw_idx $i
+
+ read_firmware="$(cat $DIR/read_firmware)"
+
+ # Verify the contents are what we expect.
+ if [ $read_firmware != $partial_data ]; then
+ echo "request #$i: partial firmware was not loaded" >&2
+ exit 1
+ fi
+ done
+}
+
read_firmwares_expect_nofile()
{
for i in $(seq 0 3); do
@@ -242,6 +291,21 @@ test_batched_request_firmware_into_buf_nofile()
echo "OK"
}
+test_request_partial_firmware_into_buf_nofile()
+{
+ echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_set_into_buf
+ config_set_partial
+ config_set_buf_size $2
+ config_set_file_offset $1
+ config_trigger_sync
+ read_firmwares_expect_nofile
+ release_all_firmware
+ echo "OK"
+}
+
test_batched_request_firmware_direct_nofile()
{
echo -n "Batched request_firmware_direct() nofile try #$1: "
@@ -356,6 +420,21 @@ test_request_firmware_nowait_custom()
echo "OK"
}
+test_request_partial_firmware_into_buf()
+{
+ echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: "
+ config_reset
+ config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+ config_set_into_buf
+ config_set_partial
+ config_set_buf_size $2
+ config_set_file_offset $1
+ config_trigger_sync
+ read_partial_firmwares normal $1 $2
+ release_all_firmware
+ echo "OK"
+}
+
# Only continue if batched request triggers are present on the
# test-firmware driver
test_config_present
@@ -383,6 +462,12 @@ for i in $(seq 1 5); do
test_request_firmware_nowait_custom $i normal
done
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf 0 10
+test_request_partial_firmware_into_buf 0 5
+test_request_partial_firmware_into_buf 1 6
+test_request_partial_firmware_into_buf 2 10
+
# Test for file not found, errors are expected, the failure would be
# a hung task, which would require a hard reset.
echo
@@ -407,6 +492,12 @@ for i in $(seq 1 5); do
test_request_firmware_nowait_custom_nofile $i
done
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf_nofile 0 10
+test_request_partial_firmware_into_buf_nofile 0 5
+test_request_partial_firmware_into_buf_nofile 1 6
+test_request_partial_firmware_into_buf_nofile 2 10
+
test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
# test with both files present
diff --git a/tools/testing/selftests/firmware/settings b/tools/testing/selftests/firmware/settings
new file mode 100644
index 000000000000..085e664ee093
--- /dev/null
+++ b/tools/testing/selftests/firmware/settings
@@ -0,0 +1,8 @@
+# The async firmware timeout is set to 1 second (but ends up being effectively
+# 2 seconds). There are 3 test configs, each done with and without firmware
+# present, each with 2 "nowait" functions tested 5 times. Expected time for a
+# normal execution should be 2 * 3 * 2 * 2 * 5 = 120 seconds for those alone.
+# Additionally, fw_fallback may take 5 seconds for internal timeouts in each
+# of the 3 configs, so at least another 15 seconds are needed. Add another
+# 10 seconds for each testing config: 120 + 15 + 30
+timeout=165
diff --git a/tools/testing/selftests/fpu/.gitignore b/tools/testing/selftests/fpu/.gitignore
new file mode 100644
index 000000000000..d6d12ac1d9c3
--- /dev/null
+++ b/tools/testing/selftests/fpu/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+test_fpu
diff --git a/tools/testing/selftests/fpu/Makefile b/tools/testing/selftests/fpu/Makefile
new file mode 100644
index 000000000000..ea62c176ede7
--- /dev/null
+++ b/tools/testing/selftests/fpu/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+LDLIBS := -lm
+
+TEST_GEN_PROGS := test_fpu
+
+TEST_PROGS := run_test_fpu.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/fpu/run_test_fpu.sh b/tools/testing/selftests/fpu/run_test_fpu.sh
new file mode 100755
index 000000000000..d77be93ec139
--- /dev/null
+++ b/tools/testing/selftests/fpu/run_test_fpu.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Load kernel module for FPU tests
+
+uid=$(id -u)
+if [ $uid -ne 0 ]; then
+ echo "$0: Must be run as root"
+ exit 1
+fi
+
+if ! which modprobe > /dev/null 2>&1; then
+ echo "$0: You need modprobe installed"
+ exit 4
+fi
+
+if ! modinfo test_fpu > /dev/null 2>&1; then
+ echo "$0: You must have the following enabled in your kernel:"
+ echo "CONFIG_TEST_FPU=m"
+ exit 4
+fi
+
+NR_CPUS=$(getconf _NPROCESSORS_ONLN)
+if [ ! $NR_CPUS ]; then
+ NR_CPUS=1
+fi
+
+modprobe test_fpu
+
+if [ ! -e /sys/kernel/debug/selftest_helpers/test_fpu ]; then
+ mount -t debugfs none /sys/kernel/debug
+
+ if [ ! -e /sys/kernel/debug/selftest_helpers/test_fpu ]; then
+ echo "$0: Error mounting debugfs"
+ exit 4
+ fi
+fi
+
+echo "Running 1000 iterations on all CPUs... "
+for i in $(seq 1 1000); do
+ for c in $(seq 1 $NR_CPUS); do
+ ./test_fpu &
+ done
+done
+
+rmmod test_fpu
diff --git a/tools/testing/selftests/fpu/test_fpu.c b/tools/testing/selftests/fpu/test_fpu.c
new file mode 100644
index 000000000000..200238522a9d
--- /dev/null
+++ b/tools/testing/selftests/fpu/test_fpu.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* This testcase operates with the test_fpu kernel driver.
+ * It modifies the FPU control register in user mode and calls the kernel
+ * module to perform floating point operations in the kernel. The control
+ * register value should be independent between kernel and user mode.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fenv.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+const char *test_fpu_path = "/sys/kernel/debug/selftest_helpers/test_fpu";
+
+int main(void)
+{
+ char dummy[1];
+ int fd = open(test_fpu_path, O_RDONLY);
+
+ if (fd < 0) {
+ printf("[SKIP]\tcan't access %s: %s\n",
+ test_fpu_path, strerror(errno));
+ return 0;
+ }
+
+ if (read(fd, dummy, 1) < 0) {
+ printf("[FAIL]\taccess with default rounding mode failed\n");
+ return 1;
+ }
+
+ fesetround(FE_DOWNWARD);
+ if (read(fd, dummy, 1) < 0) {
+ printf("[FAIL]\taccess with downward rounding mode failed\n");
+ return 2;
+ }
+ if (fegetround() != FE_DOWNWARD) {
+ printf("[FAIL]\tusermode rounding mode clobbered\n");
+ return 3;
+ }
+
+ /* Note: the tests up to this point are quite safe and will only return
+ * an error. But the exception mask setting can cause misbehaving kernel
+ * to crash.
+ */
+ feclearexcept(FE_ALL_EXCEPT);
+ feenableexcept(FE_ALL_EXCEPT);
+ if (read(fd, dummy, 1) < 0) {
+ printf("[FAIL]\taccess with fpu exceptions unmasked failed\n");
+ return 4;
+ }
+ if (fegetexcept() != FE_ALL_EXCEPT) {
+ printf("[FAIL]\tusermode fpu exception mask clobbered\n");
+ return 5;
+ }
+
+ printf("[OK]\ttest_fpu\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index a4605b5ee66d..8ec1922e974e 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -263,10 +263,16 @@ CASENO=0
testcase() { # testfile
CASENO=$((CASENO+1))
- desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:`
+ desc=`grep "^#[ \t]*description:" $1 | cut -f2- -d:`
prlog -n "[$CASENO]$INSTANCE$desc"
}
+checkreq() { # testfile
+ requires=`grep "^#[ \t]*requires:" $1 | cut -f2- -d:`
+ # Use eval to pass quoted-patterns correctly.
+ eval check_requires "$requires"
+}
+
test_on_instance() { # testfile
grep -q "^#[ \t]*flags:.*instance" $1
}
@@ -356,7 +362,8 @@ trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
__run_test() { # testfile
# setup PID and PPID, $$ is not updated.
- (cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x; initialize_ftrace; . $1)
+ (cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x;
+ checkreq $1; initialize_ftrace; . $1)
[ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID
}
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
index 3b1f45e13a2e..13b4dabcf46e 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
@@ -1,9 +1,8 @@
#!/bin/sh
# description: Snapshot and tracing setting
+# requires: snapshot
# flags: instance
-[ ! -f snapshot ] && exit_unsupported
-
echo "Set tracing off"
echo 0 > tracing_on
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc
index 5058fbcfd90f..435d07b13407 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc
@@ -1,10 +1,9 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: trace_pipe and trace_marker
+# requires: trace_marker
# flags: instance
-[ ! -f trace_marker ] && exit_unsupported
-
echo "test input 1" > trace_marker
: "trace interface never consume the ring buffer"
diff --git a/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc
index 801ecb63e84c..e52e470a1f8f 100644
--- a/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc
+++ b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Test ftrace direct functions against kprobes
+# requires: kprobe_events
rmmod ftrace-direct ||:
if ! modprobe ftrace-direct ; then
@@ -8,11 +9,6 @@ if ! modprobe ftrace-direct ; then
exit_unresolved;
fi
-if [ ! -f kprobe_events ]; then
- echo "No kprobe_events file -please build CONFIG_KPROBE_EVENTS"
- exit_unresolved;
-fi
-
echo "Let the module run a little"
sleep 1
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
index c6d8387dbbb8..3bcd4c3624ee 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
@@ -1,16 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Generic dynamic event - add/remove kprobe events
-
-[ -f dynamic_events ] || exit_unsupported
-
-grep -q "place: \[<module>:\]<symbol>" README || exit_unsupported
-grep -q "place (kretprobe): \[<module>:\]<symbol>" README || exit_unsupported
+# requires: dynamic_events "place: [<module>:]<symbol>":README "place (kretprobe): [<module>:]<symbol>":README
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=kernel_clone
echo "p:myevent1 $PLACE" >> dynamic_events
echo "r:myevent2 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc
index 62b77b5941d0..2b94611e1a28 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc
@@ -1,10 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Generic dynamic event - add/remove synthetic events
-
-[ -f dynamic_events ] || exit_unsupported
-
-grep -q "s:\[synthetic/\]" README || exit_unsupported
+# requires: dynamic_events "s:[synthetic/]":README
echo 0 > events/enable
echo > dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
index e0842109cb57..438961971b7e 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
@@ -1,21 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Generic dynamic event - selective clear (compatibility)
-
-[ -f dynamic_events ] || exit_unsupported
-
-grep -q "place: \[<module>:\]<symbol>" README || exit_unsupported
-grep -q "place (kretprobe): \[<module>:\]<symbol>" README || exit_unsupported
-
-grep -q "s:\[synthetic/\]" README || exit_unsupported
-
-[ -f synthetic_events ] || exit_unsupported
-[ -f kprobe_events ] || exit_unsupported
+# requires: dynamic_events kprobe_events synthetic_events "place: [<module>:]<symbol>":README "place (kretprobe): [<module>:]<symbol>":README "s:[synthetic/]":README
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=kernel_clone
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
index 901922e97878..a8603bd23e0d 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
@@ -1,18 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Generic dynamic event - generic clear event
-
-[ -f dynamic_events ] || exit_unsupported
-
-grep -q "place: \[<module>:\]<symbol>" README || exit_unsupported
-grep -q "place (kretprobe): \[<module>:\]<symbol>" README || exit_unsupported
-
-grep -q "s:\[synthetic/\]" README || exit_unsupported
+# requires: dynamic_events "place: [<module>:]<symbol>":README "place (kretprobe): [<module>:]<symbol>":README "s:[synthetic/]":README
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=kernel_clone
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
index dfb0d5122f7b..cfe5bd2d4267 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event tracing - enable/disable with event level files
+# requires: set_event events/sched
# flags: instance
do_reset() {
@@ -13,11 +14,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
echo 'sched:sched_switch' > set_event
yield
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
index f0f366f18d0c..e6eb78f0b954 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event tracing - restricts events based on pid notrace filtering
+# requires: set_event events/sched set_event_pid set_event_notrace_pid
# flags: instance
do_reset() {
@@ -56,16 +57,6 @@ enable_events() {
echo 1 > tracing_on
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f set_event_pid -o ! -f set_event_notrace_pid ]; then
- echo "event pid notrace filtering is not supported"
- exit_unsupported
-fi
-
echo 0 > options/event-fork
do_reset
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
index f9cb214220b1..7f5f97dffdc3 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event tracing - restricts events based on pid
+# requires: set_event set_event_pid events/sched
# flags: instance
do_reset() {
@@ -16,16 +17,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f set_event_pid ]; then
- echo "event pid filtering is not supported"
- exit_unsupported
-fi
-
echo 0 > options/event-fork
echo 1 > events/sched/sched_switch/enable
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index 83a8c571e93a..b1ede6249866 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event tracing - enable/disable with subsystem level files
+# requires: set_event events/sched/enable
# flags: instance
do_reset() {
@@ -13,11 +14,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
echo 'sched:*' > set_event
yield
diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
index 84d7bda08d2a..93c10ea42a68 100644
--- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event tracing - enable/disable with top level files
+# requires: available_events set_event events/enable
do_reset() {
echo > set_event
@@ -12,11 +13,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
echo '*:*' > set_event
yield
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
index f59853857ad2..cf3ea42b12b0 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
@@ -1,17 +1,11 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function graph filters with stack tracer
+# requires: stack_trace set_ftrace_filter function_graph:tracer
# Make sure that function graph filtering works, and is not
# affected by other tracers enabled (like stack tracer)
-if ! grep -q function_graph available_tracers; then
- echo "no function graph tracer configured"
- exit_unsupported
-fi
-
-check_filter_file set_ftrace_filter
-
do_reset() {
if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
echo 0 > /proc/sys/kernel/stack_tracer_enabled
@@ -37,12 +31,6 @@ fi
echo function_graph > current_tracer
-if [ ! -f stack_trace ]; then
- echo "Stack tracer not configured"
- do_reset
- exit_unsupported;
-fi
-
echo "Now testing with stack tracer"
echo 1 > /proc/sys/kernel/stack_tracer_enabled
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
index d610f47edd90..b3ccdaec2a61 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
@@ -1,16 +1,10 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function graph filters
+# requires: set_ftrace_filter function_graph:tracer
# Make sure that function graph filtering works
-if ! grep -q function_graph available_tracers; then
- echo "no function graph tracer configured"
- exit_unsupported
-fi
-
-check_filter_file set_ftrace_filter
-
fail() { # msg
echo $1
exit_fail
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
index 28936f434ee5..4b994b6df5ac 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -1,16 +1,10 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function glob filters
+# requires: set_ftrace_filter function:tracer
# Make sure that function glob matching filter works.
-if ! grep -q function available_tracers; then
- echo "no function tracer configured"
- exit_unsupported
-fi
-
-check_filter_file set_ftrace_filter
-
disable_tracing
clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
index 71db68a7975f..acb17ce543d2 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
@@ -1,22 +1,11 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function pid notrace filters
+# requires: set_ftrace_notrace_pid set_ftrace_filter function:tracer
# flags: instance
# Make sure that function pid matching filter with notrace works.
-if ! grep -q function available_tracers; then
- echo "no function tracer configured"
- exit_unsupported
-fi
-
-if [ ! -f set_ftrace_notrace_pid ]; then
- echo "set_ftrace_notrace_pid not found? Is function tracer not set?"
- exit_unsupported
-fi
-
-check_filter_file set_ftrace_filter
-
do_function_fork=1
if [ ! -f options/function-fork ]; then
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index d58403c4b7cd..9f0a9687c773 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -1,23 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function pid filters
+# requires: set_ftrace_pid set_ftrace_filter function:tracer
# flags: instance
# Make sure that function pid matching filter works.
# Also test it on an instance directory
-if ! grep -q function available_tracers; then
- echo "no function tracer configured"
- exit_unsupported
-fi
-
-if [ ! -f set_ftrace_pid ]; then
- echo "set_ftrace_pid not found? Is function tracer not set?"
- exit_unsupported
-fi
-
-check_filter_file set_ftrace_filter
-
do_function_fork=1
if [ ! -f options/function-fork ]; then
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index b2aff786c1a2..98305d76bd04 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -1,13 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - stacktrace filter command
+# requires: set_ftrace_filter
# flags: instance
-check_filter_file set_ftrace_filter
+echo kernel_clone:stacktrace >> set_ftrace_filter
-echo _do_fork:stacktrace >> set_ftrace_filter
-
-grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
+grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter
(echo "forked"; sleep 1)
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
index 71fa3f49e35e..0c6cf7725110 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function trace with cpumask
+# requires: function:tracer
if ! which nproc ; then
nproc() {
@@ -15,11 +16,6 @@ if [ $NP -eq 1 ] ;then
exit_unresolved
fi
-if ! grep -q "function" available_tracers ; then
- echo "Function trace is not enabled"
- exit_unsupported
-fi
-
ORIG_CPUMASK=`cat tracing_cpumask`
do_reset() {
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
index e9b1fd534e96..3145b0f1835c 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -3,15 +3,14 @@
# description: ftrace - test for function event triggers
# flags: instance
#
+# The triggers are set within the set_ftrace_filter file
+# requires: set_ftrace_filter
+#
# Ftrace allows to add triggers to functions, such as enabling or disabling
# tracing, enabling or disabling trace events, or recording a stack trace
# within the ring buffer.
#
# This test is designed to test event triggers
-#
-
-# The triggers are set within the set_ftrace_filter file
-check_filter_file set_ftrace_filter
do_reset() {
reset_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
index 1a4b4a442d33..37c8feb9078b 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
@@ -1,8 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function trace on module
-
-check_filter_file set_ftrace_filter
+# requires: set_ftrace_filter
: "mod: allows to filter a non exist function"
echo 'non_exist_func:mod:non_exist_module' > set_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc
index 0d501058aa75..4daeffb02fd8 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc
@@ -1,8 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function profiling
-
-[ ! -f function_profile_enabled ] && exit_unsupported
+# requires: function_profile_enabled
: "Enable function profile"
echo 1 > function_profile_enabled
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
index a3dadb6b93b4..1dbd766c0cd2 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function profiler with function tracing
+# requires: function_profile_enabled set_ftrace_filter function_graph:tracer
# There was a bug after a rewrite of the ftrace infrastructure that
# caused the function_profiler not to be able to run with the function
@@ -13,17 +14,6 @@
# This test triggers those bugs on those kernels.
#
# We need function_graph and profiling to to run this test
-if ! grep -q function_graph available_tracers; then
- echo "no function graph tracer configured"
- exit_unsupported;
-fi
-
-check_filter_file set_ftrace_filter
-
-if [ ! -f function_profile_enabled ]; then
- echo "function_profile_enabled not found, function profiling enabled?"
- exit_unsupported
-fi
fail() { # mesg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index 70bad441fa7d..e96e279e0533 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -2,6 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - test reading of set_ftrace_filter
#
+# The triggers are set within the set_ftrace_filter file
+# requires: set_ftrace_filter
+#
# The set_ftrace_filter file of ftrace is used to list functions as well as
# triggers (probes) attached to functions. The code to read this file is not
# straight forward and has had various bugs in the past. This test is designed
@@ -9,9 +12,6 @@
# file in various ways (cat vs dd).
#
-# The triggers are set within the set_ftrace_filter file
-check_filter_file set_ftrace_filter
-
fail() { # mesg
echo $1
exit_fail
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
index 51e9e80bc0e6..61264e422699 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
@@ -1,15 +1,9 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - Max stack tracer
+# requires: stack_trace stack_trace_filter
# Test the basic function of max-stack usage tracing
-if [ ! -f stack_trace ]; then
- echo "Max stack tracer is not supported - please make CONFIG_STACK_TRACER=y"
- exit_unsupported
-fi
-
-check_filter_file stack_trace_filter
-
echo > stack_trace_filter
echo 0 > stack_max_size
echo 1 > /proc/sys/kernel/stack_tracer_enabled
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
index 3ed173f2944a..aee22289536b 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
@@ -3,6 +3,9 @@
# description: ftrace - test for function traceon/off triggers
# flags: instance
#
+# The triggers are set within the set_ftrace_filter file
+# requires: set_ftrace_filter
+#
# Ftrace allows to add triggers to functions, such as enabling or disabling
# tracing, enabling or disabling trace events, or recording a stack trace
# within the ring buffer.
@@ -10,9 +13,6 @@
# This test is designed to test enabling and disabling tracing triggers
#
-# The triggers are set within the set_ftrace_filter file
-check_filter_file set_ftrace_filter
-
fail() { # mesg
echo $1
exit_fail
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc
index 021c03fd885d..6c190620db47 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc
@@ -1,19 +1,15 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - test tracing error log support
+# event tracing is currently the only ftrace tracer that uses the
+# tracing error_log, hence this check
+# requires: set_event error_log
fail() { #msg
echo $1
exit_fail
}
-# event tracing is currently the only ftrace tracer that uses the
-# tracing error_log, hence this check
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
ftrace_errlog_check 'event filter parse error' '((sig >= 10 && sig < 15) || dsig ^== 17) && comm != bash' 'events/signal/signal_generate/filter'
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 61a3c7e2634d..c5dec55b7d95 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -1,10 +1,3 @@
-check_filter_file() { # check filter file introduced by dynamic ftrace
- if [ ! -f "$1" ]; then
- echo "$1 not found? Is dynamic ftrace not set?"
- exit_unsupported
- fi
-}
-
clear_trace() { # reset trace output
echo > trace
}
@@ -113,18 +106,41 @@ initialize_ftrace() { # Reset ftrace to initial-state
enable_tracing
}
+check_requires() { # Check required files and tracers
+ for i in "$@" ; do
+ r=${i%:README}
+ t=${i%:tracer}
+ if [ $t != $i ]; then
+ if ! grep -wq $t available_tracers ; then
+ echo "Required tracer $t is not configured."
+ exit_unsupported
+ fi
+ elif [ $r != $i ]; then
+ if ! grep -Fq "$r" README ; then
+ echo "Required feature pattern \"$r\" is not in README."
+ exit_unsupported
+ fi
+ elif [ ! -e $i ]; then
+ echo "Required feature interface $i doesn't exist."
+ exit_unsupported
+ fi
+ done
+}
+
LOCALHOST=127.0.0.1
yield() {
ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1
}
+# Since probe event command may include backslash, explicitly use printf "%s"
+# to NOT interpret it.
ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file
- pos=$(echo -n "${2%^*}" | wc -c) # error position
- command=$(echo "$2" | tr -d ^)
+ pos=$(printf "%s" "${2%^*}" | wc -c) # error position
+ command=$(printf "%s" "$2" | tr -d ^)
echo "Test command: $command"
echo > error_log
- (! echo "$command" >> "$3" ) 2> /dev/null
+ (! printf "%s" "$command" >> "$3" ) 2> /dev/null
grep "$1: error:" -A 3 error_log
N=$(tail -n 1 error_log | wc -c)
# " Command: " and "^\n" => 13
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
index 4fa0f79144f4..0eb47fbb3f44 100644
--- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
@@ -1,11 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Test creation and deletion of trace instances while setting an event
-
-if [ ! -d instances ] ; then
- echo "no instance directory with this kernel"
- exit_unsupported;
-fi
+# requires: instances
fail() { # mesg
rmdir foo 2>/dev/null
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance.tc b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
index b84651283bf3..607521d2592b 100644
--- a/tools/testing/selftests/ftrace/test.d/instances/instance.tc
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
@@ -1,11 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Test creation and deletion of trace instances
-
-if [ ! -d instances ] ; then
- echo "no instance directory with this kernel"
- exit_unsupported;
-fi
+# requires: instances
fail() { # mesg
rmdir x y z 2>/dev/null
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index bb1eb5a7c64e..9737cd0578a7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -1,10 +1,9 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe dynamic event - adding and removing
+# requires: kprobe_events
-[ -f kprobe_events ] || exit_unsupported # this is configurable
-
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
grep myevent kprobe_events
test -d events/kprobes/myevent
echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index 442c1a8c5edf..f9a40af76888 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -1,10 +1,9 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe dynamic event - busy event check
+# requires: kprobe_events
-[ -f kprobe_events ] || exit_unsupported
-
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
test -d events/kprobes/myevent
echo 1 > events/kprobes/myevent/enable
echo > kprobe_events && exit_fail # this must fail
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index bcdecf80a8f1..eb543d3cfe5f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -1,16 +1,15 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe dynamic event with arguments
+# requires: kprobe_events
-[ -f kprobe_events ] || exit_unsupported # this is configurable
-
-echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
+echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events
grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
test -d events/kprobes/testprobe
echo 1 > events/kprobes/testprobe/enable
( echo "forked")
-grep testprobe trace | grep '_do_fork' | \
+grep testprobe trace | grep 'kernel_clone' | \
grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
echo 0 > events/kprobes/testprobe/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
index 15c1f70fcaf9..4e5b63be51c9 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -1,12 +1,11 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event with comm arguments
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
+# requires: kprobe_events
grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
-echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events
+echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events
grep testprobe kprobe_events | grep -q 'comm=$comm'
test -d events/kprobes/testprobe
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index 46e7744f8358..a1d70588ab21 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -1,8 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event string type argument
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
+# requires: kprobe_events
case `uname -m` in
x86_64)
@@ -31,13 +30,13 @@ esac
: "Test get argument (1)"
echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\"" trace
echo 0 > events/kprobes/testprobe/enable
: "Test get argument (2)"
echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
index 2b6dd33f9076..bd25dd0ba0d0 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -1,8 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event symbol argument
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
+# requires: kprobe_events
SYMBOL="linux_proc_banner"
@@ -15,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then
fi
: "Test get basic types symbol argument"
-echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
-echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
if grep -q "x8/16/32/64" README; then
- echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+ echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
fi
-echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
@@ -28,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
grep "testprobe_bf:.* arg1=.*" trace
: "Test get string symbol argument"
-echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events
+echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index 6f0f19953193..474ca1a9a088 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -1,10 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event argument syntax
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
-
-grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
+# requires: kprobe_events "x8/16/32/64":README
PROBEFUNC="vfs_read"
GOODREG=
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 81490ecaaa92..91fcce1c241c 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -1,13 +1,10 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobes event arguments with types
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
-
-grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
+# requires: kprobe_events "x8/16/32/64":README
gen_event() { # Bitsize
- echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+ echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
}
check_types() { # s-type u-type x-type bf-type width
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
index 0f60087583d8..a30a9c07290d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
@@ -1,10 +1,8 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event user-memory access
+# requires: kprobe_events '$arg<N>':README
-[ -f kprobe_events ] || exit_unsupported # this is configurable
-
-grep -q '\$arg<N>' README || exit_unresolved # depends on arch
grep -A10 "fetcharg:" README | grep -q 'ustring' || exit_unsupported
grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
index 3ff236719b6e..1f6981ef7afa 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -1,8 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event auto/manual naming
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
+# requires: kprobe_events
:;: "Add an event on function without name" ;:
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index df5072815b87..0d179094191f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -1,37 +1,33 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe dynamic event with function tracer
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
-grep "function" available_tracers || exit_unsupported # this is configurable
-
-check_filter_file set_ftrace_filter
+# requires: kprobe_events stack_trace_filter function:tracer
# prepare
echo nop > current_tracer
-echo _do_fork > set_ftrace_filter
-echo 'p:testprobe _do_fork' > kprobe_events
+echo kernel_clone > set_ftrace_filter
+echo 'p:testprobe kernel_clone' > kprobe_events
# kprobe on / ftrace off
echo 1 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
# kprobe on / ftrace on
echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
# kprobe off / ftrace on
echo 0 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
! grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
# kprobe on / ftrace on
echo 1 > events/kprobes/testprobe/enable
@@ -39,11 +35,11 @@ echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
# kprobe on / ftrace off
echo nop > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
index d861bd776c5e..7e74ee11edf9 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
@@ -1,8 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe dynamic event - probing module
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
+# requires: kprobe_events
rmmod trace-printk ||:
if ! modprobe trace-printk ; then
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
index 44494bac86d1..45d90b6c763d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
@@ -1,13 +1,10 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Create/delete multiprobe on kprobe event
-
-[ -f kprobe_events ] || exit_unsupported
-
-grep -q "Create/append/" README || exit_unsupported
+# requires: kprobe_events "Create/append/":README
# Choose 2 symbols for target
-SYM1=_do_fork
+SYM1=kernel_clone
SYM2=do_exit
EVENT_NAME=kprobes/testevent
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index ef1e9bafb098..1b5550ef8a9b 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -1,10 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe event parser error log check
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
-
-[ -f error_log ] || exit_unsupported
+# requires: kprobe_events error_log
check_error() { # command-with-error-pos-by-^
ftrace_errlog_check 'trace_kprobe' "$1" 'kprobe_events'
@@ -89,13 +86,21 @@ esac
# multiprobe errors
if grep -q "Create/append/" README && grep -q "imm-value" README; then
-echo 'p:kprobes/testevent _do_fork' > kprobe_events
+echo 'p:kprobes/testevent kernel_clone' > kprobe_events
check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE
-echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
-check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE
-check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE
+
+# Explicitly use printf "%s" to not interpret \1
+printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events
+check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE
+check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE
+fi
+
+# %return suffix errors
+if grep -q "place (kretprobe): .*%return.*" README; then
+check_error 'p vfs_read^%hoge' # BAD_ADDR_SUFFIX
+check_error 'p ^vfs_read+10%return' # BAD_RETPROBE
fi
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index ac9ab4a12e53..7ae492c204a4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -1,18 +1,17 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kretprobe dynamic event with arguments
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
+# requires: kprobe_events
# Add new kretprobe event
-echo 'r:testprobe2 _do_fork $retval' > kprobe_events
+echo 'r:testprobe2 kernel_clone $retval' > kprobe_events
grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
test -d events/kprobes/testprobe2
echo 1 > events/kprobes/testprobe2/enable
( echo "forked")
-cat trace | grep testprobe2 | grep -q '<- _do_fork'
+cat trace | grep testprobe2 | grep -q '<- kernel_clone'
echo 0 > events/kprobes/testprobe2/enable
echo '-:testprobe2' >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
index 8e05b178519a..4f0b268c1233 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
@@ -1,9 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kretprobe dynamic event with maxactive
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
-grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version
+# requires: kprobe_events 'r[maxactive]':README
# Test if we successfully reject unknown messages
if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
new file mode 100644
index 000000000000..f07bd15cc033
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe %%return suffix test
+# requires: kprobe_events '<symbol>[+<offset>]%return':README
+
+# Test for kretprobe by "r"
+echo 'r:myprobeaccept vfs_read' > kprobe_events
+RESULT1=`cat kprobe_events`
+
+# Test for kretprobe by "%return"
+echo 'p:myprobeaccept vfs_read%return' > kprobe_events
+RESULT2=`cat kprobe_events`
+
+if [ "$RESULT1" != "$RESULT2" ]; then
+ echo "Error: %return suffix didn't make a return probe."
+ echo "r-command: $RESULT1"
+ echo "%return: $RESULT2"
+ exit_fail
+fi
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 6e3dbe5f96b7..312d23780096 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -1,8 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Register/unregister many kprobe events
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
+# requires: kprobe_events
# ftrace fentry skip size depends on the machine architecture.
# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
index a902aa0aaabc..624269c8d534 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
@@ -1,8 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe events - probe points
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
+# requires: kprobe_events
TARGET_FUNC=tracefs_create_dir
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index 0384b525cdee..c4093fc1a773 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -1,11 +1,10 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Kprobe dynamic event - adding and removing
-
-[ -f kprobe_events ] || exit_unsupported # this is configurable
+# requires: kprobe_events
! grep -q 'myevent' kprobe_profile
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
echo 1 > events/kprobes/myevent/enable
( echo "forked" )
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
index 14229d5778a0..f5e3f9e4a01f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
@@ -1,10 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Uprobe event parser error log check
-
-[ -f uprobe_events ] || exit_unsupported # this is configurable
-
-[ -f error_log ] || exit_unsupported
+# requires: uprobe_events error_log
check_error() { # command-with-error-pos-by-^
ftrace_errlog_check 'trace_uprobe' "$1" 'uprobe_events'
@@ -20,4 +17,10 @@ check_error 'p /bin/sh:10(10)^a' # BAD_REFCNT_SUFFIX
check_error 'p /bin/sh:10 ^@+ab' # BAD_FILE_OFFS
check_error 'p /bin/sh:10 ^@symbol' # SYM_ON_UPROBE
+# %return suffix error
+if grep -q "place (uprobe): .*%return.*" README; then
+check_error 'p /bin/sh:10^%hoge' # BAD_ADDR_SUFFIX
+check_error 'p /bin/sh:10(10)^%return' # BAD_REFCNT_SUFFIX
+fi
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
index 2b82c80edf69..22bff122b933 100644
--- a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
+++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: test for the preemptirqsoff tracer
+# requires: preemptoff:tracer irqsoff:tracer
MOD=preemptirq_delay_test
@@ -27,9 +28,6 @@ unres() { #msg
modprobe $MOD || unres "$MOD module not available"
rmmod $MOD
-grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled"
-grep -q "irqsoff" available_tracers || unsup "irqsoff tracer not enabled"
-
reset_tracer
# Simulate preemptoff section for half a second couple of times
diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template
index e1a5d14c4eaf..2cd8947edf72 100644
--- a/tools/testing/selftests/ftrace/test.d/template
+++ b/tools/testing/selftests/ftrace/test.d/template
@@ -1,6 +1,10 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: %HERE DESCRIBE WHAT THIS DOES%
+# requires: %HERE LIST THE REQUIRED FILES, TRACERS OR README-STRINGS%
+# The required tracer needs :tracer suffix, e.g. function:tracer
+# The required README string needs :README suffix, e.g. "x8/16/32/64":README
+# and the README string is treated as a fixed-string instead of regexp pattern.
# you have to add ".tc" extention for your testcase file
# Note that all tests are run with "errexit" option.
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
index b0893d7edda3..11be10e1bf96 100644
--- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
@@ -1,17 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Test wakeup tracer
+# requires: wakeup:tracer
if ! which chrt ; then
echo "chrt is not found. This test requires nice command."
exit_unresolved
fi
-if ! grep -wq "wakeup" available_tracers ; then
- echo "wakeup tracer is not supported"
- exit_unsupported
-fi
-
echo wakeup > current_tracer
echo 1 > tracing_on
echo 0 > tracing_max_latency
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
index b9b6669a623b..3a77198b3c69 100644
--- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
@@ -1,17 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Test wakeup RT tracer
+# requires: wakeup_rt:tracer
if ! which chrt ; then
echo "chrt is not found. This test requires chrt command."
exit_unresolved
fi
-if ! grep -wq "wakeup_rt" available_tracers ; then
- echo "wakeup_rt tracer is not supported"
- exit_unsupported
-fi
-
echo wakeup_rt > current_tracer
echo 1 > tracing_on
echo 0 > tracing_max_latency
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
index 3f2aee115f6e..1590d6bfb857 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
@@ -1,24 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger expected fail actions
+# requires: set_event snapshot "snapshot()":README
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f snapshot ]; then
- echo "snapshot is not supported"
- exit_unsupported
-fi
-
-grep -q "snapshot()" README || exit_unsupported # version issue
-
echo "Test expected snapshot action failure"
echo 'hist:keys=comm:onmatch(sched.sched_wakeup).snapshot()' >> events/sched/sched_waking/trigger && exit_fail
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
index 77be6e1f6e7b..41119e0440e9 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -1,22 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test field variable support
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic event is not supported"
- exit_unsupported
-fi
-
echo "Test field variable support"
echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
index f3eb8aacec0e..9098f1e7433f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -1,22 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event combined histogram trigger
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic event is not supported"
- exit_unsupported
-fi
-
echo "Test create synthetic event"
echo 'waking_latency u64 lat pid_t pid' > synthetic_events
@@ -34,12 +25,12 @@ echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
-echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
-echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
-echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+echo 'waking_plus_wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking_plus_wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking_plus_wakeup_latency/trigger
ping $LOCALHOST -c 3
-if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+if ! grep -q "pid:" events/synthetic/waking_plus_wakeup_latency/hist; then
fail "Failed to create combined histogram"
fi
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
index d281f056f980..3ad6e3fd8ac9 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
@@ -1,22 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test multiple actions on hist trigger
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic event is not supported"
- exit_unsupported
-fi
-
echo "Test multiple actions on hist trigger"
echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
TRIGGER1=events/sched/sched_wakeup/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
index c80007aa9f86..adaabb873ed4 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
@@ -1,19 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onchange action
+# requires: set_event "onchange(var)":README
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-grep -q "onchange(var)" README || exit_unsupported # version issue
-
echo "Test onchange action"
echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> events/sched/sched_waking/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
index a708f0e7858a..20e39471052e 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -1,22 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onmatch action
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic event is not supported"
- exit_unsupported
-fi
-
echo "Test create synthetic event"
echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
index dfce6932d8be..f4b03ab7c287 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -1,22 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic event is not supported"
- exit_unsupported
-fi
-
echo "Test create synthetic event"
echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
index 0035995c2194..71c9b5911c70 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -1,22 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onmax action
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic event is not supported"
- exit_unsupported
-fi
-
echo "Test create synthetic event"
echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
index f546c1b66a9b..67fa328b830f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
@@ -1,26 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger snapshot action
+# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f snapshot ]; then
- echo "snapshot is not supported"
- exit_unsupported
-fi
-
-grep -q "onchange(var)" README || exit_unsupported # version issue
-
-grep -q "snapshot()" README || exit_unsupported # version issue
-
echo "Test snapshot action"
echo 1 > events/sched/enable
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
index df44b14724a4..a152b558b40a 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -1,22 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test synthetic event create remove
+# requires: set_event synthetic_events
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic event is not supported"
- exit_unsupported
-fi
-
echo "Test create synthetic event"
echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
new file mode 100644
index 000000000000..3d65c856eca3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+echo "Test create synthetic event"
+
+echo 'ping_test_latency u64 lat; char filename[]' > synthetic_events
+if [ ! -d events/synthetic/ping_test_latency ]; then
+ fail "Failed to create ping_test_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event using trace action and dynamic strings"
+echo "Test histogram dynamic string variables,simple expression support and trace action"
+
+echo 'hist:key=pid:filenamevar=filename:ts0=common_timestamp.usecs' > events/sched/sched_process_exec/trigger
+echo 'hist:key=pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_process_exec).ping_test_latency($lat,$filenamevar) if comm == "ping"' > events/sched/sched_process_exit/trigger
+echo 'hist:keys=filename,lat:sort=filename,lat' > events/synthetic/ping_test_latency/trigger
+
+ping $LOCALHOST -c 5
+
+if ! grep -q "ping" events/synthetic/ping_test_latency/hist; then
+ fail "Failed to create dynamic string trace action inter-event histogram"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
index 88e6c3f43006..59216f3cfb12 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test synthetic_events syntax parser
+# requires: set_event synthetic_events
do_reset() {
reset_trigger
@@ -14,16 +15,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic event is not supported"
- exit_unsupported
-fi
-
reset_tracer
do_reset
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
new file mode 100644
index 000000000000..ada594fe16cb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser errors
+# requires: synthetic_events error_log
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events'
+}
+
+check_error 'myevent ^chr arg' # INVALID_TYPE
+check_error 'myevent ^char str[];; int v' # INVALID_TYPE
+check_error 'myevent char ^str]; int v' # INVALID_NAME
+check_error 'myevent char ^str;[]' # INVALID_NAME
+check_error 'myevent ^char str[; int v' # INVALID_TYPE
+check_error '^mye;vent char str[]' # BAD_NAME
+check_error 'myevent char str[]; ^int' # INVALID_FIELD
+check_error '^myevent' # INCOMPLETE_CMD
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
index 8021d60aafec..c126d2350a6d 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
@@ -1,24 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger trace action
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(<synthetic_event>":README
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic event is not supported"
- exit_unsupported
-fi
-
-grep -q "trace(<synthetic_event>" README || exit_unsupported # version issue
-
echo "Test create synthetic event"
echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
index eddb51e1fbf7..c226acee74bf 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test event enable/disable trigger
+# requires: set_event events/sched/sched_process_fork/trigger
# flags: instance
fail() { #msg
@@ -8,16 +9,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
FEATURE=`grep enable_event events/sched/sched_process_fork/trigger`
if [ -z "$FEATURE" ]; then
echo "event enable/disable trigger is not supported"
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
index 2dcc2296ebdd..d9a198cb0f81 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test trigger filter
+# requires: set_event events/sched/sched_process_fork/trigger
# flags: instance
fail() { #msg
@@ -8,16 +9,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
echo "Test trigger filter"
echo 1 > tracing_on
echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
index fab4431639d3..4562e13cb26b 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test histogram modifiers
+# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist
# flags: instance
fail() { #msg
@@ -8,21 +9,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/hist ]; then
- echo "hist trigger is not supported"
- exit_unsupported
-fi
-
echo "Test histogram with execname modifier"
echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc
index d44087a2f3d1..52cfe7828e8a 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc
@@ -1,23 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test histogram parser errors
-
-if [ ! -f set_event -o ! -d events/kmem ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/kmem/kmalloc/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/kmem/kmalloc/hist ]; then
- echo "hist trigger is not supported"
- exit_unsupported
-fi
-
-[ -f error_log ] || exit_unsupported
+# requires: set_event events/kmem/kmalloc/trigger events/kmem/kmalloc/hist error_log
check_error() { # command-with-error-pos-by-^
ftrace_errlog_check 'hist:kmem:kmalloc' "$1" 'events/kmem/kmalloc/trigger'
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
index 177e8d4c4744..2950bfbc6fce 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test histogram trigger
+# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist
# flags: instance
fail() { #msg
@@ -8,22 +9,7 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/hist ]; then
- echo "hist trigger is not supported"
- exit_unsupported
-fi
-
-echo "Test histogram basic tigger"
+echo "Test histogram basic trigger"
echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
index 68ff3f45c720..7129b52da947 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test multiple histogram triggers
+# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist
# flags: instance
fail() { #msg
@@ -8,21 +9,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/hist ]; then
- echo "hist trigger is not supported"
- exit_unsupported
-fi
-
echo "Test histogram multiple triggers"
echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
index ac738500d17f..33f5bdee387f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
@@ -1,27 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test snapshot-trigger
+# requires: set_event events/sched/sched_process_fork/trigger snapshot
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
-if [ ! -f snapshot ]; then
- echo "snapshot is not supported"
- exit_unsupported
-fi
-
FEATURE=`grep snapshot events/sched/sched_process_fork/trigger`
if [ -z "$FEATURE" ]; then
echo "snapshot trigger is not supported"
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
index 398c05c4d2a7..320ea9b3c6cd 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
@@ -1,29 +1,20 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test stacktrace-trigger
+# requires: set_event events/sched/sched_process_fork/trigger
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger`
if [ -z "$FEATURE" ]; then
echo "stacktrace trigger is not supported"
exit_unsupported
fi
-echo "Test stacktrace tigger"
+echo "Test stacktrace trigger"
echo 0 > trace
echo 0 > options/stacktrace
echo 'stacktrace' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
index ab6bedb25736..68f3af9d9910 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: trace_marker trigger - test histogram trigger
+# requires: set_event events/ftrace/print/trigger events/ftrace/print/hist
# flags: instance
fail() { #msg
@@ -8,27 +9,7 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -d events/ftrace/print ]; then
- echo "event trace_marker is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/ftrace/print/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/ftrace/print/hist ]; then
- echo "hist trigger is not supported"
- exit_unsupported
-fi
-
-echo "Test histogram trace_marker tigger"
+echo "Test histogram trace_marker trigger"
echo 'hist:keys=common_pid' > events/ftrace/print/trigger
for i in `seq 1 10` ; do echo "hello" > trace_marker; done
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
index df246e505af7..27da2dba9b9e 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: trace_marker trigger - test snapshot trigger
+# requires: set_event snapshot events/ftrace/print/trigger
# flags: instance
fail() { #msg
@@ -8,26 +9,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f snapshot ]; then
- echo "snapshot is not supported"
- exit_unsupported
-fi
-
-if [ ! -d events/ftrace/print ]; then
- echo "event trace_marker is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/ftrace/print/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
test_trace() {
file=$1
x=$2
@@ -46,7 +27,7 @@ test_trace() {
done
}
-echo "Test snapshot trace_marker tigger"
+echo "Test snapshot trace_marker trigger"
echo 'snapshot' > events/ftrace/print/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
index 18b4d1c2807e..531139f41e94 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: trace_marker trigger - test histogram with synthetic event against kernel event
+# requires: set_event synthetic_events events/sched/sched_waking events/ftrace/print/trigger events/ftrace/print/hist
# flags:
fail() { #msg
@@ -8,36 +9,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic events not supported"
- exit_unsupported
-fi
-
-if [ ! -d events/ftrace/print ]; then
- echo "event trace_marker is not supported"
- exit_unsupported
-fi
-
-if [ ! -d events/sched/sched_waking ]; then
- echo "event sched_waking is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/ftrace/print/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/ftrace/print/hist ]; then
- echo "hist trigger is not supported"
- exit_unsupported
-fi
-
echo "Test histogram kernel event to trace_marker latency histogram trigger"
echo 'latency u64 lat' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
index dd262d6d0db6..cc99cbb06d5e 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
@@ -1,6 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: trace_marker trigger - test histogram with synthetic event
+# requires: set_event synthetic_events events/ftrace/print/trigger events/ftrace/print/hist
# flags:
fail() { #msg
@@ -8,31 +9,6 @@ fail() { #msg
exit_fail
}
-if [ ! -f set_event ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f synthetic_events ]; then
- echo "synthetic events not supported"
- exit_unsupported
-fi
-
-if [ ! -d events/ftrace/print ]; then
- echo "event trace_marker is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/ftrace/print/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/ftrace/print/hist ]; then
- echo "hist trigger is not supported"
- exit_unsupported
-fi
-
echo "Test histogram trace_marker to trace_marker latency histogram trigger"
echo 'latency u64 lat' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
index d5d2dcbc9cab..9ca04678f4da 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
@@ -1,22 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test traceon/off trigger
+# requires: set_event events/sched/sched_process_fork/trigger
fail() { #msg
echo $1
exit_fail
}
-if [ ! -f set_event -o ! -d events/sched ]; then
- echo "event tracing is not supported"
- exit_unsupported
-fi
-
-if [ ! -f events/sched/sched_process_fork/trigger ]; then
- echo "event trigger is not supported"
- exit_unsupported
-fi
-
echo "Test traceoff trigger"
echo 1 > tracing_on
echo 'traceoff' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
index 8b2b6088540d..4a974bc03385 100755
--- a/tools/testing/selftests/gen_kselftest_tar.sh
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -49,6 +49,11 @@ main()
# directory
./kselftest_install.sh "$install_dir"
(cd "$install_work"; tar $copts "$dest"/kselftest${ext} $install_name)
+
+ # Don't put the message at the actual end as people may be parsing the
+ # "archive created" line in their scripts.
+ echo -e "\nConsider using 'make gen_tar' instead of this script\n"
+
echo "Kselftest archive kselftest${ext} created!"
# clean up top-level install work directory
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index 3702dbcc90a7..afd42387e8b2 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -63,6 +63,8 @@ ALL_TESTS="$ALL_TESTS 0008:150:1"
ALL_TESTS="$ALL_TESTS 0009:150:1"
ALL_TESTS="$ALL_TESTS 0010:1:1"
ALL_TESTS="$ALL_TESTS 0011:1:1"
+ALL_TESTS="$ALL_TESTS 0012:1:1"
+ALL_TESTS="$ALL_TESTS 0013:1:1"
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -128,7 +130,7 @@ test_reqs()
if [[ $KMOD_VERSION -le 19 ]]; then
echo "$0: You need at least kmod 20" >&2
echo "kmod <= 19 is buggy, for details see:" >&2
- echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
+ echo "https://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
exit $ksft_skip
fi
@@ -341,7 +343,7 @@ kmod_test_0001_driver()
kmod_defaults_driver
config_num_threads 1
- printf '\000' >"$DIR"/config_test_driver
+ printf $NAME >"$DIR"/config_test_driver
config_trigger ${FUNCNAME[0]}
config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
}
@@ -352,7 +354,7 @@ kmod_test_0001_fs()
kmod_defaults_fs
config_num_threads 1
- printf '\000' >"$DIR"/config_test_fs
+ printf $NAME >"$DIR"/config_test_fs
config_trigger ${FUNCNAME[0]}
config_expect_result ${FUNCNAME[0]} -EINVAL
}
@@ -470,6 +472,38 @@ kmod_test_0011()
echo "$MODPROBE" > /proc/sys/kernel/modprobe
}
+kmod_check_visibility()
+{
+ local name="$1"
+ local cmd="$2"
+
+ modprobe $DEFAULT_KMOD_DRIVER
+
+ local priv=$(eval $cmd)
+ local unpriv=$(capsh --drop=CAP_SYSLOG -- -c "$cmd")
+
+ if [ "$priv" = "$unpriv" ] || \
+ [ "${priv:0:3}" = "0x0" ] || \
+ [ "${unpriv:0:3}" != "0x0" ] ; then
+ echo "${FUNCNAME[0]}: FAIL, $name visible to unpriv: '$priv' vs '$unpriv'" >&2
+ exit 1
+ else
+ echo "${FUNCNAME[0]}: OK!"
+ fi
+}
+
+kmod_test_0012()
+{
+ kmod_check_visibility /proc/modules \
+ "grep '^${DEFAULT_KMOD_DRIVER}\b' /proc/modules | awk '{print \$NF}'"
+}
+
+kmod_test_0013()
+{
+ kmod_check_visibility '/sys/module/*/sections/*' \
+ "cat /sys/module/${DEFAULT_KMOD_DRIVER}/sections/.*text | head -n1"
+}
+
list_tests()
{
echo "Test ID list:"
@@ -489,6 +523,8 @@ list_tests()
echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
echo "0010 x $(get_test_count 0010) - test nonexistent modprobe path"
echo "0011 x $(get_test_count 0011) - test completely disabling module autoloading"
+ echo "0012 x $(get_test_count 0012) - test /proc/modules address visibility under CAP_SYSLOG"
+ echo "0013 x $(get_test_count 0013) - test /sys/module/*/sections/* visibility under CAP_SYSLOG"
}
usage()
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 0ac49d91a260..8d50483fe204 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -1,11 +1,43 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * kselftest.h: kselftest framework return codes to include from
- * selftests.
+ * kselftest.h: low-level kselftest framework to include from
+ * selftest programs. When possible, please use
+ * kselftest_harness.h instead.
*
* Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com>
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
*
+ * Using this API consists of first counting how many tests your code
+ * has to run, and then starting up the reporting:
+ *
+ * ksft_print_header();
+ * ksft_set_plan(total_number_of_tests);
+ *
+ * For each test, report any progress, debugging, etc with:
+ *
+ * ksft_print_msg(fmt, ...);
+ *
+ * and finally report the pass/fail/skip/xfail state of the test with one of:
+ *
+ * ksft_test_result(condition, fmt, ...);
+ * ksft_test_result_pass(fmt, ...);
+ * ksft_test_result_fail(fmt, ...);
+ * ksft_test_result_skip(fmt, ...);
+ * ksft_test_result_xfail(fmt, ...);
+ * ksft_test_result_error(fmt, ...);
+ *
+ * When all tests are finished, clean up and exit the program with one of:
+ *
+ * ksft_exit(condition);
+ * ksft_exit_pass();
+ * ksft_exit_fail();
+ *
+ * If the program wants to report details on why the entire program has
+ * failed, it can instead exit with a message (this is usually done when
+ * the program is aborting before finishing all tests):
+ *
+ * ksft_exit_fail_msg(fmt, ...);
+ *
*/
#ifndef __KSELFTEST_H
#define __KSELFTEST_H
@@ -36,7 +68,7 @@ struct ksft_count {
static struct ksft_count ksft_cnt;
static unsigned int ksft_plan;
-static inline int ksft_test_num(void)
+static inline unsigned int ksft_test_num(void)
{
return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail +
ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass +
@@ -74,7 +106,7 @@ static inline void ksft_print_cnts(void)
if (ksft_plan != ksft_test_num())
printf("# Planned tests != run tests (%u != %u)\n",
ksft_plan, ksft_test_num());
- printf("# Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
+ printf("# Totals: pass:%d fail:%d xfail:%d xpass:%d skip:%d error:%d\n",
ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
@@ -120,6 +152,32 @@ static inline void ksft_test_result_fail(const char *msg, ...)
va_end(args);
}
+/**
+ * ksft_test_result() - Report test success based on truth of condition
+ *
+ * @condition: if true, report test success, otherwise failure.
+ */
+#define ksft_test_result(condition, fmt, ...) do { \
+ if (!!(condition)) \
+ ksft_test_result_pass(fmt, ##__VA_ARGS__);\
+ else \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__);\
+ } while (0)
+
+static inline void ksft_test_result_xfail(const char *msg, ...)
+{
+ int saved_errno = errno;
+ va_list args;
+
+ ksft_cnt.ksft_xfail++;
+
+ va_start(args, msg);
+ printf("ok %d # XFAIL ", ksft_test_num());
+ errno = saved_errno;
+ vprintf(msg, args);
+ va_end(args);
+}
+
static inline void ksft_test_result_skip(const char *msg, ...)
{
int saved_errno = errno;
@@ -128,12 +186,13 @@ static inline void ksft_test_result_skip(const char *msg, ...)
ksft_cnt.ksft_xskip++;
va_start(args, msg);
- printf("not ok %d # SKIP ", ksft_test_num());
+ printf("ok %d # SKIP ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
+/* TODO: how does "error" differ from "fail" or "skip"? */
static inline void ksft_test_result_error(const char *msg, ...)
{
int saved_errno = errno;
@@ -156,11 +215,22 @@ static inline int ksft_exit_pass(void)
static inline int ksft_exit_fail(void)
{
- printf("Bail out!\n");
ksft_print_cnts();
exit(KSFT_FAIL);
}
+/**
+ * ksft_exit() - Exit selftest based on truth of condition
+ *
+ * @condition: if true, exit self test with success, otherwise fail.
+ */
+#define ksft_exit(condition) do { \
+ if (!!(condition)) \
+ ksft_exit_pass(); \
+ else \
+ ksft_exit_fail(); \
+ } while (0)
+
static inline int ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
@@ -190,18 +260,30 @@ static inline int ksft_exit_xpass(void)
static inline int ksft_exit_skip(const char *msg, ...)
{
- if (msg) {
- int saved_errno = errno;
- va_list args;
+ int saved_errno = errno;
+ va_list args;
+
+ va_start(args, msg);
- va_start(args, msg);
- printf("not ok %d # SKIP ", 1 + ksft_test_num());
+ /*
+ * FIXME: several tests misuse ksft_exit_skip so produce
+ * something sensible if some tests have already been run
+ * or a plan has been printed. Those tests should use
+ * ksft_test_result_skip or ksft_exit_fail_msg instead.
+ */
+ if (ksft_plan || ksft_test_num()) {
+ ksft_cnt.ksft_xskip++;
+ printf("ok %d # SKIP ", 1 + ksft_test_num());
+ } else {
+ printf("1..0 # SKIP ");
+ }
+ if (msg) {
errno = saved_errno;
vprintf(msg, args);
va_end(args);
- } else {
- ksft_print_cnts();
}
+ if (ksft_test_num())
+ ksft_print_cnts();
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 676b3a8b114d..cc9c846585f0 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -53,6 +53,10 @@ run_one()
settings="$BASE_DIR/$DIR/settings"
if [ -r "$settings" ] ; then
while read line ; do
+ # Skip comments.
+ if echo "$line" | grep -q '^#'; then
+ continue
+ fi
field=$(echo "$line" | cut -d= -f1)
value=$(echo "$line" | cut -d= -f2-)
eval "kselftest_$field"="$value"
@@ -77,10 +81,10 @@ run_one()
echo "ok $test_num $TEST_HDR_MSG") ||
(rc=$?; \
if [ $rc -eq $skip_rc ]; then \
- echo "not ok $test_num $TEST_HDR_MSG # SKIP"
+ echo "ok $test_num $TEST_HDR_MSG # SKIP"
elif [ $rc -eq $timeout_rc ]; then \
echo "#"
- echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT"
+ echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds"
else
echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
fi)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 2bb8c81fc0b4..f19804df244c 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -50,7 +50,9 @@
#ifndef __KSELFTEST_HARNESS_H
#define __KSELFTEST_HARNESS_H
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE
+#endif
#include <asm/types.h>
#include <errno.h>
#include <stdbool.h>
@@ -58,10 +60,13 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
+#include "kselftest.h"
+
#define TEST_TIMEOUT_DEFAULT 30
/* Utilities exposed to the test definitions */
@@ -104,26 +109,28 @@
/* Unconditional logger for internal use. */
#define __TH_LOG(fmt, ...) \
- fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \
+ fprintf(TH_LOG_STREAM, "# %s:%d:%s:" fmt "\n", \
__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
/**
- * XFAIL(statement, fmt, ...)
+ * SKIP(statement, fmt, ...)
*
- * @statement: statement to run after reporting XFAIL
+ * @statement: statement to run after reporting SKIP
* @fmt: format string
* @...: optional arguments
*
- * This forces a "pass" after reporting a failure with an XFAIL prefix,
+ * This forces a "pass" after reporting why something is being skipped
* and runs "statement", which is usually "return" or "goto skip".
*/
-#define XFAIL(statement, fmt, ...) do { \
+#define SKIP(statement, fmt, ...) do { \
+ snprintf(_metadata->results->reason, \
+ sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \
if (TH_LOG_ENABLED) { \
- fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \
- ##__VA_ARGS__); \
+ fprintf(TH_LOG_STREAM, "# SKIP %s\n", \
+ _metadata->results->reason); \
} \
- /* TODO: find a way to pass xfail to test runner process. */ \
_metadata->passed = 1; \
+ _metadata->skip = 1; \
_metadata->trigger = 0; \
statement; \
} while (0)
@@ -168,9 +175,17 @@
#define __TEST_IMPL(test_name, _signal) \
static void test_name(struct __test_metadata *_metadata); \
+ static inline void wrapper_##test_name( \
+ struct __test_metadata *_metadata, \
+ struct __fixture_variant_metadata *variant) \
+ { \
+ test_name(_metadata); \
+ } \
static struct __test_metadata _##test_name##_object = \
- { .name = "global." #test_name, \
- .fn = &test_name, .termsig = _signal, \
+ { .name = #test_name, \
+ .fn = &wrapper_##test_name, \
+ .fixture = &_fixture_global, \
+ .termsig = _signal, \
.timeout = TEST_TIMEOUT_DEFAULT, }; \
static void __attribute__((constructor)) _register_##test_name(void) \
{ \
@@ -187,8 +202,9 @@
*
* .. code-block:: c
*
- * FIXTURE_DATA(datatype name)
+ * FIXTURE_DATA(datatype_name)
*
+ * Almost always, you want just FIXTURE() instead (see below).
* This call may be used when the type of the fixture data
* is needed. In general, this should not be needed unless
* the *self* is being passed to a helper directly.
@@ -203,7 +219,7 @@
*
* .. code-block:: c
*
- * FIXTURE(datatype name) {
+ * FIXTURE(fixture_name) {
* type property1;
* ...
* };
@@ -212,10 +228,13 @@
* populated and cleaned up using FIXTURE_SETUP() and FIXTURE_TEARDOWN().
*/
#define FIXTURE(fixture_name) \
+ FIXTURE_VARIANT(fixture_name); \
+ static struct __fixture_metadata _##fixture_name##_fixture_object = \
+ { .name = #fixture_name, }; \
static void __attribute__((constructor)) \
_register_##fixture_name##_data(void) \
{ \
- __fixture_count++; \
+ __register_fixture(&_##fixture_name##_fixture_object); \
} \
FIXTURE_DATA(fixture_name)
@@ -227,7 +246,7 @@
*
* .. code-block:: c
*
- * FIXTURE_SETUP(fixture name) { implementation }
+ * FIXTURE_SETUP(fixture_name) { implementation }
*
* Populates the required "setup" function for a fixture. An instance of the
* datatype defined with FIXTURE_DATA() will be exposed as *self* for the
@@ -241,7 +260,10 @@
#define FIXTURE_SETUP(fixture_name) \
void fixture_name##_setup( \
struct __test_metadata __attribute__((unused)) *_metadata, \
- FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ __attribute__((unused)) *variant)
+
/**
* FIXTURE_TEARDOWN(fixture_name)
* *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
@@ -250,7 +272,7 @@
*
* .. code-block:: c
*
- * FIXTURE_TEARDOWN(fixture name) { implementation }
+ * FIXTURE_TEARDOWN(fixture_name) { implementation }
*
* Populates the required "teardown" function for a fixture. An instance of the
* datatype defined with FIXTURE_DATA() will be exposed as *self* for the
@@ -264,6 +286,59 @@
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
/**
+ * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture
+ * to declare fixture variant
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_VARIANT(fixture_name) {
+ * type property1;
+ * ...
+ * };
+ *
+ * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F()
+ * as *variant*. Variants allow the same tests to be run with different
+ * arguments.
+ */
+#define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
+
+/**
+ * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture
+ * variant to setup and register the data
+ *
+ * @fixture_name: fixture name
+ * @variant_name: name of the parameter set
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_VARIANT_ADD(fixture_name, variant_name) {
+ * .property1 = val1,
+ * ...
+ * };
+ *
+ * Defines a variant of the test fixture, provided to FIXTURE_SETUP() and
+ * TEST_F() as *variant*. Tests of each fixture will be run once for each
+ * variant.
+ */
+#define FIXTURE_VARIANT_ADD(fixture_name, variant_name) \
+ extern FIXTURE_VARIANT(fixture_name) \
+ _##fixture_name##_##variant_name##_variant; \
+ static struct __fixture_variant_metadata \
+ _##fixture_name##_##variant_name##_object = \
+ { .name = #variant_name, \
+ .data = &_##fixture_name##_##variant_name##_variant}; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##variant_name(void) \
+ { \
+ __register_fixture_variant(&_##fixture_name##_fixture_object, \
+ &_##fixture_name##_##variant_name##_object); \
+ } \
+ FIXTURE_VARIANT(fixture_name) \
+ _##fixture_name##_##variant_name##_variant =
+
+/**
* TEST_F(fixture_name, test_name) - Emits test registration and helpers for
* fixture-based test cases
*
@@ -293,24 +368,27 @@
#define __TEST_F_IMPL(fixture_name, test_name, signal, tmout) \
static void fixture_name##_##test_name( \
struct __test_metadata *_metadata, \
- FIXTURE_DATA(fixture_name) *self); \
+ FIXTURE_DATA(fixture_name) *self, \
+ const FIXTURE_VARIANT(fixture_name) *variant); \
static inline void wrapper_##fixture_name##_##test_name( \
- struct __test_metadata *_metadata) \
+ struct __test_metadata *_metadata, \
+ struct __fixture_variant_metadata *variant) \
{ \
/* fixture data is alloced, setup, and torn down per call. */ \
FIXTURE_DATA(fixture_name) self; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
- fixture_name##_setup(_metadata, &self); \
+ fixture_name##_setup(_metadata, &self, variant->data); \
/* Let setup failure terminate early. */ \
if (!_metadata->passed) \
return; \
- fixture_name##_##test_name(_metadata, &self); \
+ fixture_name##_##test_name(_metadata, &self, variant->data); \
fixture_name##_teardown(_metadata, &self); \
} \
static struct __test_metadata \
_##fixture_name##_##test_name##_object = { \
- .name = #fixture_name "." #test_name, \
+ .name = #test_name, \
.fn = &wrapper_##fixture_name##_##test_name, \
+ .fixture = &_##fixture_name##_fixture_object, \
.termsig = signal, \
.timeout = tmout, \
}; \
@@ -321,7 +399,9 @@
} \
static void fixture_name##_##test_name( \
struct __test_metadata __attribute__((unused)) *_metadata, \
- FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ __attribute__((unused)) *variant)
/**
* TEST_HARNESS_MAIN - Simple wrapper to run the test harness
@@ -600,20 +680,53 @@
__bail(_assert, _metadata->no_print, _metadata->step))
#define __INC_STEP(_metadata) \
- if (_metadata->passed && _metadata->step < 255) \
+ /* Keep "step" below 255 (which is used for "SKIP" reporting). */ \
+ if (_metadata->passed && _metadata->step < 253) \
_metadata->step++;
+#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
+
#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
/* Avoid multiple evaluation of the cases */ \
__typeof__(_expected) __exp = (_expected); \
__typeof__(_seen) __seen = (_seen); \
if (_assert) __INC_STEP(_metadata); \
if (!(__exp _t __seen)) { \
- unsigned long long __exp_print = (uintptr_t)__exp; \
- unsigned long long __seen_print = (uintptr_t)__seen; \
- __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
- _expected_str, __exp_print, #_t, \
- _seen_str, __seen_print); \
+ /* Report with actual signedness to avoid weird output. */ \
+ switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \
+ case 0: { \
+ unsigned long long __exp_print = (uintptr_t)__exp; \
+ unsigned long long __seen_print = (uintptr_t)__seen; \
+ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ case 1: { \
+ unsigned long long __exp_print = (uintptr_t)__exp; \
+ long long __seen_print = (intptr_t)__seen; \
+ __TH_LOG("Expected %s (%llu) %s %s (%lld)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ case 2: { \
+ long long __exp_print = (intptr_t)__exp; \
+ unsigned long long __seen_print = (uintptr_t)__seen; \
+ __TH_LOG("Expected %s (%lld) %s %s (%llu)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ case 3: { \
+ long long __exp_print = (intptr_t)__exp; \
+ long long __seen_print = (intptr_t)__seen; \
+ __TH_LOG("Expected %s (%lld) %s %s (%lld)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ } \
_metadata->passed = 0; \
/* Ensure the optional handler is triggered */ \
_metadata->trigger = 1; \
@@ -631,30 +744,90 @@
} \
} while (0); OPTIONAL_HANDLER(_assert)
+/* List helpers */
+#define __LIST_APPEND(head, item) \
+{ \
+ /* Circular linked list where only prev is circular. */ \
+ if (head == NULL) { \
+ head = item; \
+ item->next = NULL; \
+ item->prev = item; \
+ return; \
+ } \
+ if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { \
+ item->next = NULL; \
+ item->prev = head->prev; \
+ item->prev->next = item; \
+ head->prev = item; \
+ } else { \
+ item->next = head; \
+ item->next->prev = item; \
+ item->prev = item; \
+ head = item; \
+ } \
+}
+
+struct __test_results {
+ char reason[1024]; /* Reason for test result */
+};
+
+struct __test_metadata;
+struct __fixture_variant_metadata;
+
+/* Contains all the information about a fixture. */
+struct __fixture_metadata {
+ const char *name;
+ struct __test_metadata *tests;
+ struct __fixture_variant_metadata *variant;
+ struct __fixture_metadata *prev, *next;
+} _fixture_global __attribute__((unused)) = {
+ .name = "global",
+ .prev = &_fixture_global,
+};
+
+static struct __fixture_metadata *__fixture_list = &_fixture_global;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD 1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+static inline void __register_fixture(struct __fixture_metadata *f)
+{
+ __LIST_APPEND(__fixture_list, f);
+}
+
+struct __fixture_variant_metadata {
+ const char *name;
+ const void *data;
+ struct __fixture_variant_metadata *prev, *next;
+};
+
+static inline void
+__register_fixture_variant(struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *variant)
+{
+ __LIST_APPEND(f->variant, variant);
+}
+
/* Contains all the information for test execution and status checking. */
struct __test_metadata {
const char *name;
- void (*fn)(struct __test_metadata *);
+ void (*fn)(struct __test_metadata *,
+ struct __fixture_variant_metadata *);
pid_t pid; /* pid of test when being run */
+ struct __fixture_metadata *fixture;
int termsig;
int passed;
+ int skip; /* did SKIP get used? */
int trigger; /* extra handler after the evaluation */
int timeout; /* seconds to wait for test timeout */
bool timed_out; /* did this test timeout instead of exiting? */
__u8 step;
bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+ struct __test_results *results;
struct __test_metadata *prev, *next;
};
-/* Storage for the (global) tests to be run. */
-static struct __test_metadata *__test_list;
-static unsigned int __test_count;
-static unsigned int __fixture_count;
-static int __constructor_order;
-
-#define _CONSTRUCTOR_ORDER_FORWARD 1
-#define _CONSTRUCTOR_ORDER_BACKWARD -1
-
/*
* Since constructors are called in reverse order, reverse the test
* list so tests are run in source declaration order.
@@ -666,25 +839,7 @@ static int __constructor_order;
*/
static inline void __register_test(struct __test_metadata *t)
{
- __test_count++;
- /* Circular linked list where only prev is circular. */
- if (__test_list == NULL) {
- __test_list = t;
- t->next = NULL;
- t->prev = t;
- return;
- }
- if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
- t->next = NULL;
- t->prev = __test_list->prev;
- t->prev->next = t;
- __test_list->prev = t;
- } else {
- t->next = __test_list;
- t->next->prev = t;
- t->prev = t;
- __test_list = t;
- }
+ __LIST_APPEND(t->fixture->tests, t);
}
static inline int __bail(int for_realz, bool no_print, __u8 step)
@@ -705,12 +860,12 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
/* Sanity check handler execution environment. */
if (!t) {
fprintf(TH_LOG_STREAM,
- "no active test in SIGALRM handler!?\n");
+ "# no active test in SIGALRM handler!?\n");
abort();
}
if (sig != SIGALRM || sig != info->si_signo) {
fprintf(TH_LOG_STREAM,
- "%s: SIGALRM handler caught signal %d!?\n",
+ "# %s: SIGALRM handler caught signal %d!?\n",
t->name, sig != SIGALRM ? sig : info->si_signo);
abort();
}
@@ -731,7 +886,7 @@ void __wait_for_test(struct __test_metadata *t)
if (sigaction(SIGALRM, &action, &saved_action)) {
t->passed = 0;
fprintf(TH_LOG_STREAM,
- "%s: unable to install SIGALRM handler\n",
+ "# %s: unable to install SIGALRM handler\n",
t->name);
return;
}
@@ -743,7 +898,7 @@ void __wait_for_test(struct __test_metadata *t)
if (sigaction(SIGALRM, &saved_action, NULL)) {
t->passed = 0;
fprintf(TH_LOG_STREAM,
- "%s: unable to uninstall SIGALRM handler\n",
+ "# %s: unable to uninstall SIGALRM handler\n",
t->name);
return;
}
@@ -752,85 +907,152 @@ void __wait_for_test(struct __test_metadata *t)
if (t->timed_out) {
t->passed = 0;
fprintf(TH_LOG_STREAM,
- "%s: Test terminated by timeout\n", t->name);
+ "# %s: Test terminated by timeout\n", t->name);
} else if (WIFEXITED(status)) {
- t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
if (t->termsig != -1) {
+ t->passed = 0;
fprintf(TH_LOG_STREAM,
- "%s: Test exited normally "
- "instead of by signal (code: %d)\n",
- t->name,
- WEXITSTATUS(status));
- } else if (!t->passed) {
- fprintf(TH_LOG_STREAM,
- "%s: Test failed at step #%d\n",
+ "# %s: Test exited normally instead of by signal (code: %d)\n",
t->name,
WEXITSTATUS(status));
+ } else {
+ switch (WEXITSTATUS(status)) {
+ /* Success */
+ case 0:
+ t->passed = 1;
+ break;
+ /* SKIP */
+ case 255:
+ t->passed = 1;
+ t->skip = 1;
+ break;
+ /* Other failure, assume step report. */
+ default:
+ t->passed = 0;
+ fprintf(TH_LOG_STREAM,
+ "# %s: Test failed at step #%d\n",
+ t->name,
+ WEXITSTATUS(status));
+ }
}
} else if (WIFSIGNALED(status)) {
t->passed = 0;
if (WTERMSIG(status) == SIGABRT) {
fprintf(TH_LOG_STREAM,
- "%s: Test terminated by assertion\n",
+ "# %s: Test terminated by assertion\n",
t->name);
} else if (WTERMSIG(status) == t->termsig) {
t->passed = 1;
} else {
fprintf(TH_LOG_STREAM,
- "%s: Test terminated unexpectedly "
- "by signal %d\n",
+ "# %s: Test terminated unexpectedly by signal %d\n",
t->name,
WTERMSIG(status));
}
} else {
fprintf(TH_LOG_STREAM,
- "%s: Test ended in some other way [%u]\n",
+ "# %s: Test ended in some other way [%u]\n",
t->name,
status);
}
}
-void __run_test(struct __test_metadata *t)
+void __run_test(struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *variant,
+ struct __test_metadata *t)
{
+ /* reset test struct */
t->passed = 1;
+ t->skip = 0;
t->trigger = 0;
- printf("[ RUN ] %s\n", t->name);
+ t->step = 0;
+ t->no_print = 0;
+ memset(t->results->reason, 0, sizeof(t->results->reason));
+
+ ksft_print_msg(" RUN %s%s%s.%s ...\n",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+ /* Make sure output buffers are flushed before fork */
+ fflush(stdout);
+ fflush(stderr);
+
t->pid = fork();
if (t->pid < 0) {
- printf("ERROR SPAWNING TEST CHILD\n");
+ ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
t->passed = 0;
} else if (t->pid == 0) {
- t->fn(t);
- /* return the step that failed or 0 */
- _exit(t->passed ? 0 : t->step);
+ t->fn(t, variant);
+ if (t->skip)
+ _exit(255);
+ /* Pass is exit 0 */
+ if (t->passed)
+ _exit(0);
+ /* Something else happened, report the step. */
+ _exit(t->step);
} else {
__wait_for_test(t);
}
- printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
+ ksft_print_msg(" %4s %s%s%s.%s\n", t->passed ? "OK" : "FAIL",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+ if (t->skip)
+ ksft_test_result_skip("%s\n", t->results->reason[0] ?
+ t->results->reason : "unknown");
+ else
+ ksft_test_result(t->passed, "%s%s%s.%s\n",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
}
static int test_harness_run(int __attribute__((unused)) argc,
char __attribute__((unused)) **argv)
{
+ struct __fixture_variant_metadata no_variant = { .name = "", };
+ struct __fixture_variant_metadata *v;
+ struct __fixture_metadata *f;
+ struct __test_results *results;
struct __test_metadata *t;
int ret = 0;
+ unsigned int case_count = 0, test_count = 0;
unsigned int count = 0;
unsigned int pass_count = 0;
- /* TODO(wad) add optional arguments similar to gtest. */
- printf("[==========] Running %u tests from %u test cases.\n",
- __test_count, __fixture_count + 1);
- for (t = __test_list; t; t = t->next) {
- count++;
- __run_test(t);
- if (t->passed)
- pass_count++;
- else
- ret = 1;
+ for (f = __fixture_list; f; f = f->next) {
+ for (v = f->variant ?: &no_variant; v; v = v->next) {
+ case_count++;
+ for (t = f->tests; t; t = t->next)
+ test_count++;
+ }
}
- printf("[==========] %u / %u tests passed.\n", pass_count, count);
- printf("[ %s ]\n", (ret ? "FAILED" : "PASSED"));
- return ret;
+
+ results = mmap(NULL, sizeof(*results), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+
+ ksft_print_header();
+ ksft_set_plan(test_count);
+ ksft_print_msg("Starting %u tests from %u test cases.\n",
+ test_count, case_count);
+ for (f = __fixture_list; f; f = f->next) {
+ for (v = f->variant ?: &no_variant; v; v = v->next) {
+ for (t = f->tests; t; t = t->next) {
+ count++;
+ t->results = results;
+ __run_test(f, v, t);
+ t->results = NULL;
+ if (t->passed)
+ pass_count++;
+ else
+ ret = 1;
+ }
+ }
+ }
+ munmap(results, sizeof(*results));
+
+ ksft_print_msg("%s: %u / %u tests passed.\n", ret ? "FAILED" : "PASSED",
+ pass_count, count);
+ ksft_exit(ret == 0);
+
+ /* unreachable */
+ return KSFT_FAIL;
}
static void __attribute__((constructor)) __constructor_order_first(void)
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index a9b2b48947ff..307ceaadbbb9 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -3,14 +3,16 @@
/s390x/resets
/s390x/sync_regs_test
/x86_64/cr4_cpuid_sync_test
+/x86_64/debug_regs
/x86_64/evmcs_test
/x86_64/hyperv_cpuid
/x86_64/mmio_warning_test
/x86_64/platform_info_test
-/x86_64/set_memory_region_test
/x86_64/set_sregs_test
/x86_64/smm_test
/x86_64/state_test
+/x86_64/user_msr_test
+/x86_64/vmx_preemption_timer_test
/x86_64/svm_vmcall_test
/x86_64/sync_regs_test
/x86_64/vmx_close_while_nested_test
@@ -22,4 +24,5 @@
/demand_paging_test
/dirty_log_test
/kvm_create_max_vcpus
+/set_memory_region_test
/steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 42f4f49f2a48..7ebe71fbca53 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -43,10 +43,10 @@ TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
-TEST_GEN_PROGS_x86_64 += x86_64/set_memory_region_test
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
@@ -55,16 +55,20 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
+TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
+TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_aarch64 += set_memory_region_test
TEST_GEN_PROGS_aarch64 += steal_time
TEST_GEN_PROGS_s390x = s390x/memop
@@ -73,6 +77,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
@@ -80,7 +85,11 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+ifeq ($(ARCH),x86_64)
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
+else
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+endif
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 92e184a422ee..919e161dd289 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -10,6 +10,7 @@
#include "test_util.h"
#include "asm/kvm.h"
+#include "linux/list.h"
#include "linux/kvm.h"
#include <sys/ioctl.h>
@@ -113,6 +114,7 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
uint32_t data_memslot, uint32_t pgd_memslot);
@@ -256,6 +258,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
unsigned int vm_get_page_size(struct kvm_vm *vm);
unsigned int vm_get_page_shift(struct kvm_vm *vm);
unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+int vm_get_fd(struct kvm_vm *vm);
unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
@@ -311,13 +314,30 @@ void ucall_uninit(struct kvm_vm *vm);
void ucall(uint64_t cmd, int nargs, ...);
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
+ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define GUEST_ASSERT(_condition) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2, \
- "Failed guest assert: " \
- #_condition, __LINE__); \
+#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, 2 + _nargs, \
+ "Failed guest assert: " \
+ #_condition, __LINE__, _args); \
} while (0)
+#define GUEST_ASSERT(_condition) \
+ __GUEST_ASSERT((_condition), 0, 0)
+
+#define GUEST_ASSERT_1(_condition, arg1) \
+ __GUEST_ASSERT((_condition), 1, (arg1))
+
+#define GUEST_ASSERT_2(_condition, arg1, arg2) \
+ __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+
+#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
+ __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+
+#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
+ __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 7428513a4c68..82b7fe16a824 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc)
static inline uint64_t rdtsc(void)
{
uint32_t eax, edx;
-
+ uint64_t tsc_val;
/*
* The lfence is to wait (on Intel CPUs) until all previous
- * instructions have been executed.
+ * instructions have been executed. If software requires RDTSC to be
+ * executed prior to execution of any subsequent instruction, it can
+ * execute LFENCE immediately after RDTSC
*/
- __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
- return ((uint64_t)edx) << 32 | eax;
+ __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+ tsc_val = ((uint64_t)edx) << 32 | eax;
+ return tsc_val;
}
static inline uint64_t rdtscp(uint32_t *aux)
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index cd037917fece..b7531c83b8ae 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -33,6 +33,17 @@ struct svm_test_data {
struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+bool nested_svm_supported(void);
void nested_svm_check_supported(void);
+static inline bool cpu_has_svm(void)
+{
+ u32 eax = 0x80000001, ecx;
+
+ asm("cpuid" :
+ "=a" (eax), "=c" (ecx) : "0" (eax) : "ebx", "edx");
+
+ return ecx & CPUID_SVM;
+}
+
#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 3d27069b9ed9..54d624dd6c10 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -48,7 +48,7 @@
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
#define SECONDARY_EXEC_DESC 0x00000004
-#define SECONDARY_EXEC_RDTSCP 0x00000008
+#define SECONDARY_EXEC_ENABLE_RDTSCP 0x00000008
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
@@ -575,11 +575,35 @@ struct vmx_pages {
void *eptp;
};
+union vmx_basic {
+ u64 val;
+ struct {
+ u32 revision;
+ u32 size:13,
+ reserved1:3,
+ width:1,
+ dual:1,
+ type:4,
+ insouts:1,
+ ctrl:1,
+ vm_entry_exception_ctrl:1,
+ reserved2:7;
+ };
+};
+
+union vmx_ctrl_msr {
+ u64 val;
+ struct {
+ u32 set, clr;
+ };
+};
+
struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
+bool nested_vmx_supported(void);
void nested_vmx_check_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 9622431069bc..74776ee228f2 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -161,6 +161,9 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficient Memory");
+ INIT_LIST_HEAD(&vm->vcpus);
+ INIT_LIST_HEAD(&vm->userspace_mem_regions);
+
vm->mode = mode;
vm->type = 0;
@@ -192,11 +195,18 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
case VM_MODE_PXXV48_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
- TEST_ASSERT(vm->va_bits == 48, "Linear address width "
- "(%d bits) not supported", vm->va_bits);
+ /*
+ * Ignore KVM support for 5-level paging (vm->va_bits == 57),
+ * it doesn't take effect unless a CR4.LA57 is set, which it
+ * isn't for this VM_MODE.
+ */
+ TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
+ "Linear address width (%d bits) not supported",
+ vm->va_bits);
pr_debug("Guest physical address width detected: %d\n",
vm->pa_bits);
vm->pgtable_levels = 4;
+ vm->va_bits = 48;
#else
TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
#endif
@@ -258,8 +268,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
if (vmp->has_irqchip)
vm_create_irqchip(vmp);
- for (region = vmp->userspace_mem_region_head; region;
- region = region->next) {
+ list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i\n"
@@ -319,8 +328,7 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{
struct userspace_mem_region *region;
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
uint64_t existing_start = region->region.guest_phys_addr;
uint64_t existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1;
@@ -378,11 +386,11 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
*/
struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
{
- struct vcpu *vcpup;
+ struct vcpu *vcpu;
- for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
- if (vcpup->id == vcpuid)
- return vcpup;
+ list_for_each_entry(vcpu, &vm->vcpus, list) {
+ if (vcpu->id == vcpuid)
+ return vcpu;
}
return NULL;
@@ -392,18 +400,16 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
* VM VCPU Remove
*
* Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
+ * vcpu - VCPU to remove
*
* Output Args: None
*
* Return: None, TEST_ASSERT failures for all error conditions
*
- * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ * Removes a vCPU from a VM and frees its resources.
*/
-static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+static void vm_vcpu_rm(struct vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
ret = munmap(vcpu->state, sizeof(*vcpu->state));
@@ -413,21 +419,17 @@ static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
"errno: %i", ret, errno);
- if (vcpu->next)
- vcpu->next->prev = vcpu->prev;
- if (vcpu->prev)
- vcpu->prev->next = vcpu->next;
- else
- vm->vcpu_head = vcpu->next;
+ list_del(&vcpu->list);
free(vcpu);
}
void kvm_vm_release(struct kvm_vm *vmp)
{
+ struct vcpu *vcpu, *tmp;
int ret;
- while (vmp->vcpu_head)
- vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+ list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
+ vm_vcpu_rm(vcpu);
ret = close(vmp->fd);
TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
@@ -438,35 +440,38 @@ void kvm_vm_release(struct kvm_vm *vmp)
" vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
}
+static void __vm_mem_region_delete(struct kvm_vm *vm,
+ struct userspace_mem_region *region)
+{
+ int ret;
+
+ list_del(&region->list);
+
+ region->region.memory_size = 0;
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+
+ sparsebit_free(&region->unused_phy_pages);
+ ret = munmap(region->mmap_start, region->mmap_size);
+ TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+
+ free(region);
+}
+
/*
* Destroys and frees the VM pointed to by vmp.
*/
void kvm_vm_free(struct kvm_vm *vmp)
{
- int ret;
+ struct userspace_mem_region *region, *tmp;
if (vmp == NULL)
return;
/* Free userspace_mem_regions. */
- while (vmp->userspace_mem_region_head) {
- struct userspace_mem_region *region
- = vmp->userspace_mem_region_head;
-
- region->region.memory_size = 0;
- ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
- &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
-
- vmp->userspace_mem_region_head = region->next;
- sparsebit_free(&region->unused_phy_pages);
- ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
- ret, errno);
-
- free(region);
- }
+ list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
+ __vm_mem_region_delete(vmp, region);
/* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid);
@@ -612,12 +617,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
(uint64_t) region->region.memory_size);
/* Confirm no region with the requested slot already exists. */
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
- if (region->region.slot == slot)
- break;
- }
- if (region != NULL)
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ if (region->region.slot != slot)
+ continue;
+
TEST_FAIL("A mem region with the requested slot "
"already exists.\n"
" requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
@@ -626,6 +629,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
region->region.slot,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
+ }
/* Allocate and initialize new mem region structure. */
region = calloc(1, sizeof(*region));
@@ -686,10 +690,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
guest_paddr, (uint64_t) region->region.memory_size);
/* Add to linked-list of memory regions. */
- if (vm->userspace_mem_region_head)
- vm->userspace_mem_region_head->prev = region;
- region->next = vm->userspace_mem_region_head;
- vm->userspace_mem_region_head = region;
+ list_add(&region->list, &vm->userspace_mem_regions);
}
/*
@@ -712,20 +713,17 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
if (region->region.slot == memslot)
- break;
- }
- if (region == NULL) {
- fprintf(stderr, "No mem region with the requested slot found,\n"
- " requested slot: %u\n", memslot);
- fputs("---- vm dump ----\n", stderr);
- vm_dump(stderr, vm, 2);
- TEST_FAIL("Mem region not found");
+ return region;
}
- return region;
+ fprintf(stderr, "No mem region with the requested slot found,\n"
+ " requested slot: %u\n", memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ TEST_FAIL("Mem region not found");
+ return NULL;
}
/*
@@ -789,6 +787,24 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
}
/*
+ * VM Memory Region Delete
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * slot - Slot of the memory region to delete
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Delete a memory region.
+ */
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
+{
+ __vm_mem_region_delete(vm, memslot2region(vm, slot));
+}
+
+/*
* VCPU mmap Size
*
* Input Args: None
@@ -863,10 +879,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
"vcpu id: %u errno: %i", vcpuid, errno);
/* Add to linked-list of VCPUs. */
- if (vm->vcpu_head)
- vm->vcpu_head->prev = vcpu;
- vcpu->next = vm->vcpu_head;
- vm->vcpu_head = vcpu;
+ list_add(&vcpu->list, &vm->vcpus);
}
/*
@@ -1059,8 +1072,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
{
struct userspace_mem_region *region;
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
if ((gpa >= region->region.guest_phys_addr)
&& (gpa <= (region->region.guest_phys_addr
+ region->region.memory_size - 1)))
@@ -1092,8 +1105,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{
struct userspace_mem_region *region;
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
if ((hva >= region->host_mem)
&& (hva <= (region->host_mem
+ region->region.memory_size - 1)))
@@ -1529,8 +1542,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
fprintf(stream, "%*sMem Regions:\n", indent, "");
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "",
(uint64_t) region->region.guest_phys_addr,
@@ -1549,7 +1561,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump(stream, vm, indent + 4);
}
fprintf(stream, "%*sVCPUs:\n", indent, "");
- for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+ list_for_each_entry(vcpu, &vm->vcpus, list)
vcpu_dump(stream, vm, vcpu->id, indent + 2);
}
@@ -1743,6 +1755,11 @@ unsigned int vm_get_max_gfn(struct kvm_vm *vm)
return vm->max_gfn;
}
+int vm_get_fd(struct kvm_vm *vm)
+{
+ return vm->fd;
+}
+
static unsigned int vm_calc_num_pages(unsigned int num_pages,
unsigned int page_shift,
unsigned int new_page_shift,
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index ca56a0133127..2ef446520748 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -13,7 +13,6 @@
#define KVM_DEV_PATH "/dev/kvm"
struct userspace_mem_region {
- struct userspace_mem_region *next, *prev;
struct kvm_userspace_memory_region region;
struct sparsebit *unused_phy_pages;
int fd;
@@ -21,10 +20,11 @@ struct userspace_mem_region {
void *host_mem;
void *mmap_start;
size_t mmap_size;
+ struct list_head list;
};
struct vcpu {
- struct vcpu *next, *prev;
+ struct list_head list;
uint32_t id;
int fd;
struct kvm_run *state;
@@ -41,8 +41,8 @@ struct kvm_vm {
unsigned int pa_bits;
unsigned int va_bits;
uint64_t max_gfn;
- struct vcpu *vcpu_head;
- struct userspace_mem_region *userspace_mem_region_head;
+ struct list_head vcpus;
+ struct list_head userspace_mem_regions;
struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped;
bool has_irqchip;
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 8d94961bd046..a88c5d665725 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -233,7 +233,10 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
{
- struct vcpu *vcpu = vm->vcpu_head;
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ if (!vcpu)
+ return;
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index c42401068373..3a5c72ed2b79 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -148,14 +148,18 @@ void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
: "r15", "memory");
}
-void nested_svm_check_supported(void)
+bool nested_svm_supported(void)
{
struct kvm_cpuid_entry2 *entry =
kvm_get_supported_cpuid_entry(0x80000001);
- if (!(entry->ecx & CPUID_SVM)) {
+ return entry->ecx & CPUID_SVM;
+}
+
+void nested_svm_check_supported(void)
+{
+ if (!nested_svm_supported()) {
print_skip("nested SVM not enabled");
exit(KSFT_SKIP);
}
}
-
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 4ae104f6ce69..f1e00d43eea2 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -379,11 +379,16 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-void nested_vmx_check_supported(void)
+bool nested_vmx_supported(void)
{
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & CPUID_VMX)) {
+ return entry->ecx & CPUID_VMX;
+}
+
+void nested_vmx_check_supported(void)
+{
+ if (!nested_vmx_supported()) {
print_skip("nested VMX not enabled");
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
new file mode 100644
index 000000000000..b3ece55a2da6
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/compiler.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+/*
+ * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
+ * 2MB sized and aligned region so that the initial region corresponds to
+ * exactly one large page.
+ */
+#define MEM_REGION_SIZE 0x200000
+
+#ifdef __x86_64__
+/*
+ * Somewhat arbitrary location and slot, intended to not overlap anything.
+ */
+#define MEM_REGION_GPA 0xc0000000
+#define MEM_REGION_SLOT 10
+
+static const uint64_t MMIO_VAL = 0xbeefull;
+
+extern const uint64_t final_rip_start;
+extern const uint64_t final_rip_end;
+
+static sem_t vcpu_ready;
+
+static inline uint64_t guest_spin_on_val(uint64_t spin_val)
+{
+ uint64_t val;
+
+ do {
+ val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
+ } while (val == spin_val);
+
+ GUEST_SYNC(0);
+ return val;
+}
+
+static void *vcpu_worker(void *data)
+{
+ struct kvm_vm *vm = data;
+ struct kvm_run *run;
+ struct ucall uc;
+ uint64_t cmd;
+
+ /*
+ * Loop until the guest is done. Re-enter the guest on all MMIO exits,
+ * which will occur if the guest attempts to access a memslot after it
+ * has been deleted or while it is being moved .
+ */
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (1) {
+ vcpu_run(vm, VCPU_ID);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ cmd = get_ucall(vm, VCPU_ID, &uc);
+ if (cmd != UCALL_SYNC)
+ break;
+
+ sem_post(&vcpu_ready);
+ continue;
+ }
+
+ if (run->exit_reason != KVM_EXIT_MMIO)
+ break;
+
+ TEST_ASSERT(!run->mmio.is_write, "Unexpected exit mmio write");
+ TEST_ASSERT(run->mmio.len == 8,
+ "Unexpected exit mmio size = %u", run->mmio.len);
+
+ TEST_ASSERT(run->mmio.phys_addr == MEM_REGION_GPA,
+ "Unexpected exit mmio address = 0x%llx",
+ run->mmio.phys_addr);
+ memcpy(run->mmio.data, &MMIO_VAL, 8);
+ }
+
+ if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
+ TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2]);
+
+ return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+ struct timespec ts;
+
+ TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+ "clock_gettime() failed: %d\n", errno);
+
+ ts.tv_sec += 2;
+ TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+ "sem_timedwait() failed: %d\n", errno);
+
+ /* Wait for the vCPU thread to reenter the guest. */
+ usleep(100000);
+}
+
+static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
+{
+ struct kvm_vm *vm;
+ uint64_t *hva;
+ uint64_t gpa;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / getpagesize(), 0);
+
+ /*
+ * Allocate and map two pages so that the GPA accessed by guest_code()
+ * stays valid across the memslot move.
+ */
+ gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+
+ /* Ditto for the host mapping so that both pages can be zeroed. */
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+ memset(hva, 0, 2 * 4096);
+
+ pthread_create(vcpu_thread, NULL, vcpu_worker, vm);
+
+ /* Ensure the guest thread is spun up. */
+ wait_for_vcpu();
+
+ return vm;
+}
+
+
+static void guest_code_move_memory_region(void)
+{
+ uint64_t val;
+
+ GUEST_SYNC(0);
+
+ /*
+ * Spin until the memory region is moved to a misaligned address. This
+ * may or may not trigger MMIO, as the window where the memslot is
+ * invalid is quite small.
+ */
+ val = guest_spin_on_val(0);
+ GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+
+ /* Spin until the memory region is realigned. */
+ val = guest_spin_on_val(MMIO_VAL);
+ GUEST_ASSERT_1(val == 1, val);
+
+ GUEST_DONE();
+}
+
+static void test_move_memory_region(void)
+{
+ pthread_t vcpu_thread;
+ struct kvm_vm *vm;
+ uint64_t *hva;
+
+ vm = spawn_vm(&vcpu_thread, guest_code_move_memory_region);
+
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+
+ /*
+ * Shift the region's base GPA. The guest should not see "2" as the
+ * hva->gpa translation is misaligned, i.e. the guest is accessing a
+ * different host pfn.
+ */
+ vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
+ WRITE_ONCE(*hva, 2);
+
+ /*
+ * The guest _might_ see an invalid memslot and trigger MMIO, but it's
+ * a tiny window. Spin and defer the sync until the memslot is
+ * restored and guest behavior is once again deterministic.
+ */
+ usleep(100000);
+
+ /*
+ * Note, value in memory needs to be changed *before* restoring the
+ * memslot, else the guest could race the update and see "2".
+ */
+ WRITE_ONCE(*hva, 1);
+
+ /* Restore the original base, the guest should see "1". */
+ vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
+ wait_for_vcpu();
+ /* Defered sync from when the memslot was misaligned (above). */
+ wait_for_vcpu();
+
+ pthread_join(vcpu_thread, NULL);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code_delete_memory_region(void)
+{
+ uint64_t val;
+
+ GUEST_SYNC(0);
+
+ /* Spin until the memory region is deleted. */
+ val = guest_spin_on_val(0);
+ GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+ /* Spin until the memory region is recreated. */
+ val = guest_spin_on_val(MMIO_VAL);
+ GUEST_ASSERT_1(val == 0, val);
+
+ /* Spin until the memory region is deleted. */
+ val = guest_spin_on_val(0);
+ GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+ asm("1:\n\t"
+ ".pushsection .rodata\n\t"
+ ".global final_rip_start\n\t"
+ "final_rip_start: .quad 1b\n\t"
+ ".popsection");
+
+ /* Spin indefinitely (until the code memslot is deleted). */
+ guest_spin_on_val(MMIO_VAL);
+
+ asm("1:\n\t"
+ ".pushsection .rodata\n\t"
+ ".global final_rip_end\n\t"
+ "final_rip_end: .quad 1b\n\t"
+ ".popsection");
+
+ GUEST_ASSERT_1(0, 0);
+}
+
+static void test_delete_memory_region(void)
+{
+ pthread_t vcpu_thread;
+ struct kvm_regs regs;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+
+ vm = spawn_vm(&vcpu_thread, guest_code_delete_memory_region);
+
+ /* Delete the memory region, the guest should not die. */
+ vm_mem_region_delete(vm, MEM_REGION_SLOT);
+ wait_for_vcpu();
+
+ /* Recreate the memory region. The guest should see "0". */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / getpagesize(), 0);
+ wait_for_vcpu();
+
+ /* Delete the region again so that there's only one memslot left. */
+ vm_mem_region_delete(vm, MEM_REGION_SLOT);
+ wait_for_vcpu();
+
+ /*
+ * Delete the primary memslot. This should cause an emulation error or
+ * shutdown due to the page tables getting nuked.
+ */
+ vm_mem_region_delete(vm, 0);
+
+ pthread_join(vcpu_thread, NULL);
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN ||
+ run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Unexpected exit reason = %d", run->exit_reason);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+
+ /*
+ * On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already,
+ * so the instruction pointer would point to the reset vector.
+ */
+ if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
+ TEST_ASSERT(regs.rip >= final_rip_start &&
+ regs.rip < final_rip_end,
+ "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
+ final_rip_start, final_rip_end, regs.rip);
+
+ kvm_vm_free(vm);
+}
+
+static void test_zero_memory_regions(void)
+{
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+
+ pr_info("Testing KVM_RUN with zero added memory regions\n");
+
+ vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+
+ TEST_ASSERT(!ioctl(vm_get_fd(vm), KVM_SET_NR_MMU_PAGES, 64),
+ "KVM_SET_NR_MMU_PAGES failed, errno = %d\n", errno);
+ vcpu_run(vm, VCPU_ID);
+
+ run = vcpu_state(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Unexpected exit_reason = %u\n", run->exit_reason);
+
+ kvm_vm_free(vm);
+}
+#endif /* __x86_64__ */
+
+/*
+ * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
+ * tentative to add further slots should fail.
+ */
+static void test_add_max_memory_regions(void)
+{
+ int ret;
+ struct kvm_vm *vm;
+ uint32_t max_mem_slots;
+ uint32_t slot;
+ uint64_t guest_addr = 0x0;
+ uint64_t mem_reg_npages;
+ void *mem;
+
+ max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+ TEST_ASSERT(max_mem_slots > 0,
+ "KVM_CAP_NR_MEMSLOTS should be greater than 0");
+ pr_info("Allowed number of memory slots: %i\n", max_mem_slots);
+
+ vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+
+ mem_reg_npages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, MEM_REGION_SIZE);
+
+ /* Check it can be added memory slots up to the maximum allowed */
+ pr_info("Adding slots 0..%i, each memory region with %dK size\n",
+ (max_mem_slots - 1), MEM_REGION_SIZE >> 10);
+ for (slot = 0; slot < max_mem_slots; slot++) {
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ guest_addr, slot, mem_reg_npages,
+ 0);
+ guest_addr += MEM_REGION_SIZE;
+ }
+
+ /* Check it cannot be added memory slots beyond the limit */
+ mem = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+ ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION,
+ &(struct kvm_userspace_memory_region) {slot, 0, guest_addr,
+ MEM_REGION_SIZE, (uint64_t) mem});
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Adding one more memory slot should fail with EINVAL");
+
+ munmap(mem, MEM_REGION_SIZE);
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+#ifdef __x86_64__
+ int i, loops;
+#endif
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+#ifdef __x86_64__
+ /*
+ * FIXME: the zero-memslot test fails on aarch64 and s390x because
+ * KVM_RUN fails with ENOEXEC or EFAULT.
+ */
+ test_zero_memory_regions();
+#endif
+
+ test_add_max_memory_regions();
+
+#ifdef __x86_64__
+ if (argc > 1)
+ loops = atoi(argv[1]);
+ else
+ loops = 10;
+
+ pr_info("Testing MOVE of in-use region, %d loops\n", loops);
+ for (i = 0; i < loops; i++)
+ test_move_memory_region();
+
+ pr_info("Testing DELETE of in-use region, %d loops\n", loops);
+ for (i = 0; i < loops; i++)
+ test_delete_memory_region();
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 8162c58a1234..2fc6b3af81a1 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -40,11 +40,11 @@ static void guest_code(void)
/* Single step test, covers 2 basic instructions and 2 emulated */
asm volatile("ss_start: "
- "xor %%rax,%%rax\n\t"
+ "xor %%eax,%%eax\n\t"
"cpuid\n\t"
"movl $0x1a0,%%ecx\n\t"
"rdmsr\n\t"
- : : : "rax", "ecx");
+ : : : "eax", "ebx", "ecx", "edx");
/* DR6.BD test */
asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
@@ -73,7 +73,7 @@ int main(void)
int i;
/* Instruction lengths starting at ss_start */
int ss_size[4] = {
- 3, /* xor */
+ 2, /* xor */
2, /* cpuid */
5, /* mov */
2, /* rdmsr */
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index e6e62e5e75b2..757928199f19 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -94,9 +94,10 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- if (!kvm_check_cap(KVM_CAP_NESTED_STATE) ||
+ if (!nested_vmx_supported() ||
+ !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("capabilities not available");
+ print_skip("Enlightened VMCS is unsupported");
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 83323f3d7ca0..745b708c2d3b 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -26,18 +26,18 @@ static void guest_code(void)
{
}
-static int smt_possible(void)
+static bool smt_possible(void)
{
char buf[16];
FILE *f;
- bool res = 1;
+ bool res = true;
f = fopen("/sys/devices/system/cpu/smt/control", "r");
if (f) {
if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
if (!strncmp(buf, "forceoff", 8) ||
!strncmp(buf, "notsupported", 12))
- res = 0;
+ res = false;
}
fclose(f);
}
@@ -46,29 +46,31 @@ static int smt_possible(void)
}
static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
- int evmcs_enabled)
+ bool evmcs_enabled)
{
int i;
+ int nent = 9;
+ u32 test_val;
- if (!evmcs_enabled)
- TEST_ASSERT(hv_cpuid_entries->nent == 6,
- "KVM_GET_SUPPORTED_HV_CPUID should return 6 entries"
- " when Enlightened VMCS is disabled (returned %d)",
- hv_cpuid_entries->nent);
- else
- TEST_ASSERT(hv_cpuid_entries->nent == 7,
- "KVM_GET_SUPPORTED_HV_CPUID should return 7 entries"
- " when Enlightened VMCS is enabled (returned %d)",
- hv_cpuid_entries->nent);
+ if (evmcs_enabled)
+ nent += 1; /* 0x4000000A */
+
+ TEST_ASSERT(hv_cpuid_entries->nent == nent,
+ "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+ " with evmcs=%d (returned %d)",
+ nent, evmcs_enabled, hv_cpuid_entries->nent);
for (i = 0; i < hv_cpuid_entries->nent; i++) {
struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
TEST_ASSERT((entry->function >= 0x40000000) &&
- (entry->function <= 0x4000000A),
+ (entry->function <= 0x40000082),
"function %x is our of supported range",
entry->function);
+ TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
+ "0x4000000A leaf should not be reported");
+
TEST_ASSERT(entry->index == 0,
".index field should be zero");
@@ -78,12 +80,23 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
!entry->padding[2], "padding should be zero");
- if (entry->function == 0x40000004) {
- int nononarchcs = !!(entry->eax & (1UL << 18));
+ switch (entry->function) {
+ case 0x40000000:
+ test_val = 0x40000082;
- TEST_ASSERT(nononarchcs == !smt_possible(),
+ TEST_ASSERT(entry->eax == test_val,
+ "Wrong max leaf report in 0x40000000.EAX: %x"
+ " (evmcs=%d)",
+ entry->eax, evmcs_enabled
+ );
+ break;
+ case 0x40000004:
+ test_val = entry->eax & (1UL << 18);
+
+ TEST_ASSERT(!!test_val == !smt_possible(),
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
+ break;
}
/*
@@ -133,8 +146,9 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
- int rv;
+ int rv, stage;
struct kvm_cpuid2 *hv_cpuid_entries;
+ bool evmcs_enabled;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
@@ -145,36 +159,32 @@ int main(int argc, char *argv[])
exit(KSFT_SKIP);
}
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- test_hv_cpuid_e2big(vm);
-
- hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
- if (!hv_cpuid_entries)
- return 1;
-
- test_hv_cpuid(hv_cpuid_entries, 0);
-
- free(hv_cpuid_entries);
+ for (stage = 0; stage < 3; stage++) {
+ evmcs_enabled = false;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ switch (stage) {
+ case 0:
+ test_hv_cpuid_e2big(vm);
+ continue;
+ case 1:
+ break;
+ case 2:
+ if (!nested_vmx_supported() ||
+ !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+ print_skip("Enlightened VMCS is unsupported");
+ continue;
+ }
+ vcpu_enable_evmcs(vm, VCPU_ID);
+ evmcs_enabled = true;
+ break;
+ }
- if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("Enlightened VMCS is unsupported");
- goto vm_free;
+ hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
+ test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
+ free(hv_cpuid_entries);
+ kvm_vm_free(vm);
}
- vcpu_enable_evmcs(vm, VCPU_ID);
-
- hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
- if (!hv_cpuid_entries)
- return 1;
-
- test_hv_cpuid(hv_cpuid_entries, 1);
-
- free(hv_cpuid_entries);
-
-vm_free:
- kvm_vm_free(vm);
-
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c
deleted file mode 100644
index c6691cff4e19..000000000000
--- a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <pthread.h>
-#include <sched.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include <linux/compiler.h>
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-#define VCPU_ID 0
-
-/*
- * Somewhat arbitrary location and slot, intended to not overlap anything. The
- * location and size are specifically 2mb sized/aligned so that the initial
- * region corresponds to exactly one large page.
- */
-#define MEM_REGION_GPA 0xc0000000
-#define MEM_REGION_SIZE 0x200000
-#define MEM_REGION_SLOT 10
-
-static void guest_code(void)
-{
- uint64_t val;
-
- do {
- val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
- } while (!val);
-
- if (val != 1)
- ucall(UCALL_ABORT, 1, val);
-
- GUEST_DONE();
-}
-
-static void *vcpu_worker(void *data)
-{
- struct kvm_vm *vm = data;
- struct kvm_run *run;
- struct ucall uc;
- uint64_t cmd;
-
- /*
- * Loop until the guest is done. Re-enter the guest on all MMIO exits,
- * which will occur if the guest attempts to access a memslot while it
- * is being moved.
- */
- run = vcpu_state(vm, VCPU_ID);
- do {
- vcpu_run(vm, VCPU_ID);
- } while (run->exit_reason == KVM_EXIT_MMIO);
-
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason = %d", run->exit_reason);
-
- cmd = get_ucall(vm, VCPU_ID, &uc);
- TEST_ASSERT(cmd == UCALL_DONE, "Unexpected val in guest = %lu", uc.args[0]);
- return NULL;
-}
-
-static void test_move_memory_region(void)
-{
- pthread_t vcpu_thread;
- struct kvm_vm *vm;
- uint64_t *hva;
- uint64_t gpa;
-
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
- MEM_REGION_GPA, MEM_REGION_SLOT,
- MEM_REGION_SIZE / getpagesize(), 0);
-
- /*
- * Allocate and map two pages so that the GPA accessed by guest_code()
- * stays valid across the memslot move.
- */
- gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
- TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
-
- virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
-
- /* Ditto for the host mapping so that both pages can be zeroed. */
- hva = addr_gpa2hva(vm, MEM_REGION_GPA);
- memset(hva, 0, 2 * 4096);
-
- pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
-
- /* Ensure the guest thread is spun up. */
- usleep(100000);
-
- /*
- * Shift the region's base GPA. The guest should not see "2" as the
- * hva->gpa translation is misaligned, i.e. the guest is accessing a
- * different host pfn.
- */
- vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
- WRITE_ONCE(*hva, 2);
-
- usleep(100000);
-
- /*
- * Note, value in memory needs to be changed *before* restoring the
- * memslot, else the guest could race the update and see "2".
- */
- WRITE_ONCE(*hva, 1);
-
- /* Restore the original base, the guest should see "1". */
- vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
-
- pthread_join(vcpu_thread, NULL);
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- int i, loops;
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- if (argc > 1)
- loops = atoi(argv[1]);
- else
- loops = 10;
-
- for (i = 0; i < loops; i++)
- test_move_memory_region();
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 8230b6bc6b8f..ae39a220609f 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -17,6 +17,7 @@
#include "kvm_util.h"
#include "vmx.h"
+#include "svm_util.h"
#define VCPU_ID 1
@@ -46,10 +47,10 @@ uint8_t smi_handler[] = {
0x0f, 0xaa, /* rsm */
};
-void sync_with_host(uint64_t phase)
+static inline void sync_with_host(uint64_t phase)
{
asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
- : : "a" (phase));
+ : "+a" (phase));
}
void self_smi(void)
@@ -58,7 +59,7 @@ void self_smi(void)
APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
}
-void guest_code(struct vmx_pages *vmx_pages)
+void guest_code(void *arg)
{
uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
@@ -72,8 +73,11 @@ void guest_code(struct vmx_pages *vmx_pages)
sync_with_host(4);
- if (vmx_pages) {
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ if (arg) {
+ if (cpu_has_svm())
+ generic_svm_setup(arg, NULL, NULL);
+ else
+ GUEST_ASSERT(prepare_for_vmx_operation(arg));
sync_with_host(5);
@@ -87,7 +91,7 @@ void guest_code(struct vmx_pages *vmx_pages)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva = 0;
+ vm_vaddr_t nested_gva = 0;
struct kvm_regs regs;
struct kvm_vm *vm;
@@ -114,13 +118,17 @@ int main(int argc, char *argv[])
vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
- } else {
- pr_info("will skip SMM test with VMX enabled\n");
- vcpu_args_set(vm, VCPU_ID, 1, 0);
+ if (nested_svm_supported())
+ vcpu_alloc_svm(vm, &nested_gva);
+ else if (nested_vmx_supported())
+ vcpu_alloc_vmx(vm, &nested_gva);
}
+ if (!nested_gva)
+ pr_info("will skip SMM test with VMX enabled\n");
+
+ vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+
for (stage = 1;; stage++) {
_vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 5b1a016edf55..f6c8b9042f8a 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -18,14 +18,46 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#define VCPU_ID 5
+#define L2_GUEST_STACK_SIZE 256
-void l2_guest_code(void)
+void svm_l2_guest_code(void)
{
+ GUEST_SYNC(4);
+ /* Exit to L1 */
+ vmcall();
GUEST_SYNC(6);
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
- /* Exit to L1 */
+static void svm_l1_guest_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ GUEST_ASSERT(svm->vmcb_gpa);
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, svm_l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(3);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(5);
+ vmcb->save.rip += 3;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(7);
+}
+
+void vmx_l2_guest_code(void)
+{
+ GUEST_SYNC(6);
+
+ /* Exit to L1 */
vmcall();
/* L1 has now set up a shadow VMCS for us. */
@@ -42,10 +74,9 @@ void l2_guest_code(void)
vmcall();
}
-void l1_guest_code(struct vmx_pages *vmx_pages)
+static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
GUEST_ASSERT(vmx_pages->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -56,7 +87,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_SYNC(4);
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
- prepare_vmcs(vmx_pages, l2_guest_code,
+ prepare_vmcs(vmx_pages, vmx_l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
GUEST_SYNC(5);
@@ -106,20 +137,24 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(vmresume());
}
-void guest_code(struct vmx_pages *vmx_pages)
+static void __attribute__((__flatten__)) guest_code(void *arg)
{
GUEST_SYNC(1);
GUEST_SYNC(2);
- if (vmx_pages)
- l1_guest_code(vmx_pages);
+ if (arg) {
+ if (cpu_has_svm())
+ svm_l1_guest_code(arg);
+ else
+ vmx_l1_guest_code(arg);
+ }
GUEST_DONE();
}
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva = 0;
+ vm_vaddr_t nested_gva = 0;
struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
@@ -136,13 +171,17 @@ int main(int argc, char *argv[])
vcpu_regs_get(vm, VCPU_ID, &regs1);
if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
- } else {
- pr_info("will skip nested state checks\n");
- vcpu_args_set(vm, VCPU_ID, 1, 0);
+ if (nested_svm_supported())
+ vcpu_alloc_svm(vm, &nested_gva);
+ else if (nested_vmx_supported())
+ vcpu_alloc_vmx(vm, &nested_gva);
}
+ if (!nested_gva)
+ pr_info("will skip nested state checks\n");
+
+ vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+
for (stage = 1;; stage++) {
_vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
new file mode 100644
index 000000000000..f8e761149daa
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+
+#define UNITY (1ull << 30)
+#define HOST_ADJUST (UNITY * 64)
+#define GUEST_STEP (UNITY * 4)
+#define ROUND(x) ((x + UNITY / 2) & -UNITY)
+#define rounded_rdmsr(x) ROUND(rdmsr(x))
+#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x))
+
+#define GUEST_ASSERT_EQ(a, b) do { \
+ __typeof(a) _a = (a); \
+ __typeof(b) _b = (b); \
+ if (_a != _b) \
+ ucall(UCALL_ABORT, 4, \
+ "Failed guest assert: " \
+ #a " == " #b, __LINE__, _a, _b); \
+ } while(0)
+
+static void guest_code(void)
+{
+ u64 val = 0;
+
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
+ val = 1ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
+ GUEST_SYNC(2);
+ val = 2ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC_ADJUST, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Host: setting the TSC offset. */
+ GUEST_SYNC(3);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+ * host-side offset and affect both MSRs.
+ */
+ GUEST_SYNC(4);
+ val = 3ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC_ADJUST, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+ * offset is now visible in MSR_IA32_TSC_ADJUST.
+ */
+ GUEST_SYNC(5);
+ val = 4ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+ GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
+{
+ struct ucall uc;
+
+ vcpu_args_set(vm, vcpuid, 1, vcpuid);
+
+ vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
+
+ switch (get_ucall(vm, vcpuid, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s at %s:%ld\n" \
+ "\tvalues: %#lx, %#lx", (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ }
+}
+
+int main(void)
+{
+ struct kvm_vm *vm;
+ uint64_t val;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ val = 0;
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
+ run_vcpu(vm, VCPU_ID, 1);
+ val = 1ull * GUEST_STEP;
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
+ run_vcpu(vm, VCPU_ID, 2);
+ val = 2ull * GUEST_STEP;
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Host: writes to MSR_IA32_TSC set the host-side offset
+ * and therefore do not change MSR_IA32_TSC_ADJUST.
+ */
+ vcpu_set_msr(vm, 0, MSR_IA32_TSC, HOST_ADJUST + val);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ run_vcpu(vm, VCPU_ID, 3);
+
+ /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */
+ vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ ASSERT_EQ(vcpu_get_msr(vm, 0, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+
+ /* Restore previous value. */
+ vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, val);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+ * host-side offset and affect both MSRs.
+ */
+ run_vcpu(vm, VCPU_ID, 4);
+ val = 3ull * GUEST_STEP;
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+ * offset is now visible in MSR_IA32_TSC_ADJUST.
+ */
+ run_vcpu(vm, VCPU_ID, 5);
+ val = 4ull * GUEST_STEP;
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
new file mode 100644
index 000000000000..cbe1b08890ff
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tests for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER
+ *
+ * Copyright (C) 2020, Amazon Inc.
+ *
+ * This is a functional test to verify that we can deflect MSR events
+ * into user space.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 5
+
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+ u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+ bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+ memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+ memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+ memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+ memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+ memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+ deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+ deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+ deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter = {
+ .flags = KVM_MSR_FILTER_DEFAULT_DENY,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .base = 0x00000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_00000000,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE,
+ .base = 0x00000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_00000000_write,
+ }, {
+ .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+ .base = 0x40000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_40000000,
+ }, {
+ .flags = KVM_MSR_FILTER_READ,
+ .base = 0xc0000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_c0000000_read,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE,
+ .base = 0xc0000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_c0000000,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+ .base = 0xdeadbeef,
+ .nmsrs = 1,
+ .bitmap = bitmap_deadbeef,
+ },
+ },
+};
+
+struct kvm_msr_filter no_filter = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+static void guest_msr_calls(bool trapped)
+{
+ /* This goes into the in-kernel emulation */
+ wrmsr(MSR_SYSCALL_MASK, 0);
+
+ if (trapped) {
+ /* This goes into user space emulation */
+ GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+ } else {
+ GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+ }
+
+ /* If trapped == true, this goes into user space emulation */
+ wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+ /* This goes into the in-kernel emulation */
+ rdmsr(MSR_IA32_POWER_CTL);
+
+ /* Invalid MSR, should always be handled by user space exit */
+ GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+ wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code(void)
+{
+ guest_msr_calls(true);
+
+ /*
+ * Disable msr filtering, so that the kernel
+ * handles everything in the next round
+ */
+ GUEST_SYNC(0);
+
+ guest_msr_calls(false);
+
+ GUEST_DONE();
+}
+
+static int handle_ucall(struct kvm_vm *vm)
+{
+ struct ucall uc;
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("Guest assertion not met");
+ break;
+ case UCALL_SYNC:
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter);
+ break;
+ case UCALL_DONE:
+ return 1;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+ run->msr.data = run->msr.index;
+ msr_reads++;
+
+ if (run->msr.index == MSR_SYSCALL_MASK ||
+ run->msr.index == MSR_GS_BASE) {
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+ "MSR read trap w/o access fault");
+ }
+
+ if (run->msr.index == 0xdeadbeef) {
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+ "MSR deadbeef read trap w/o inval fault");
+ }
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+ /* ignore */
+ msr_writes++;
+
+ if (run->msr.index == MSR_IA32_POWER_CTL) {
+ TEST_ASSERT(run->msr.data == 0x1234,
+ "MSR data for MSR_IA32_POWER_CTL incorrect");
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+ "MSR_IA32_POWER_CTL trap w/o access fault");
+ }
+
+ if (run->msr.index == 0xdeadbeef) {
+ TEST_ASSERT(run->msr.data == 0x1234,
+ "MSR data for deadbeef incorrect");
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+ "deadbeef trap w/o inval fault");
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_X86_USER_SPACE_MSR,
+ .args[0] = KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER,
+ };
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ run = vcpu_state(vm, VCPU_ID);
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, &cap);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ prepare_bitmaps();
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter);
+
+ while (1) {
+ rc = _vcpu_run(vm, VCPU_ID);
+
+ TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+
+ switch (run->exit_reason) {
+ case KVM_EXIT_X86_RDMSR:
+ handle_rdmsr(run);
+ break;
+ case KVM_EXIT_X86_WRMSR:
+ handle_wrmsr(run);
+ break;
+ case KVM_EXIT_IO:
+ if (handle_ucall(vm))
+ goto done;
+ break;
+ }
+
+ }
+
+done:
+ TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+ TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
new file mode 100644
index 000000000000..a7737af1224f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID 5
+#define PREEMPTION_TIMER_VALUE 100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
+void l2_guest_code(void)
+{
+ u64 vmx_pt_delta;
+
+ vmcall();
+ l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+ /*
+ * Wait until the 1st threshold has passed
+ */
+ do {
+ l2_vmx_pt_finish = rdtsc();
+ vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+ vmx_pt_rate;
+ } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+ /*
+ * Force L2 through Save and Restore cycle
+ */
+ GUEST_SYNC(1);
+
+ l2_save_restore_done = 1;
+
+ /*
+ * Now wait for the preemption timer to fire and
+ * exit to L1
+ */
+ while ((l2_vmx_pt_finish = rdtsc()))
+ ;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 l1_vmx_pt_start;
+ u64 l1_vmx_pt_finish;
+ u64 l1_tsc_deadline, l2_tsc_deadline;
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Check for Preemption timer support
+ */
+ basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+ ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+ : MSR_IA32_VMX_PINBASED_CTLS);
+ ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+ : MSR_IA32_VMX_EXIT_CTLS);
+
+ if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+ !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+ return;
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+
+ /*
+ * Turn on PIN control and resume the guest
+ */
+ GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+ vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+ PIN_BASED_VMX_PREEMPTION_TIMER));
+
+ GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+ PREEMPTION_TIMER_VALUE));
+
+ vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+ l2_save_restore_done = 0;
+
+ l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+ GUEST_ASSERT(!vmresume());
+
+ l1_vmx_pt_finish = rdtsc();
+
+ /*
+ * Ensure exit from L2 happens after L2 goes through
+ * save and restore
+ */
+ GUEST_ASSERT(l2_save_restore_done);
+
+ /*
+ * Ensure the exit from L2 is due to preemption timer expiry
+ */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+ l1_tsc_deadline = l1_vmx_pt_start +
+ (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+ l2_tsc_deadline = l2_vmx_pt_start +
+ (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+ /*
+ * Sync with the host and pass the l1|l2 pt_expiry_finish times and
+ * tsc deadlines so that host can verify they are as expected
+ */
+ GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+ l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ if (vmx_pages)
+ l1_guest_code(vmx_pages);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+
+ struct kvm_regs regs1, regs2;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ /*
+ * AMD currently does not implement any VMX features, so for now we
+ * just early out.
+ */
+ nested_vmx_check_supported();
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ } else {
+ pr_info("will skip vmx preemption timer checks\n");
+ goto done;
+ }
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+ /*
+ * If this stage 2 then we should verify the vmx pt expiry
+ * is as expected.
+ * From L1's perspective verify Preemption timer hasn't
+ * expired too early.
+ * From L2's perspective verify Preemption timer hasn't
+ * expired too late.
+ */
+ if (stage == 2) {
+
+ pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+ stage, uc.args[2], uc.args[3]);
+
+ pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+ stage, uc.args[4], uc.args[5]);
+
+ TEST_ASSERT(uc.args[2] >= uc.args[3],
+ "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+ stage, uc.args[2], uc.args[3]);
+
+ TEST_ASSERT(uc.args[4] < uc.args[5],
+ "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+ stage, uc.args[4], uc.args[5]);
+ }
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ free(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index 54cdefdfb49d..d59f3eb67c8f 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -76,10 +76,8 @@ void set_default_state(struct kvm_nested_state *state)
void set_default_vmx_state(struct kvm_nested_state *state, int size)
{
memset(state, 0, size);
- state->flags = KVM_STATE_NESTED_GUEST_MODE |
- KVM_STATE_NESTED_RUN_PENDING;
if (have_evmcs)
- state->flags |= KVM_STATE_NESTED_EVMCS;
+ state->flags = KVM_STATE_NESTED_EVMCS;
state->format = 0;
state->size = size;
state->hdr.vmx.vmxon_pa = 0x1000;
@@ -148,6 +146,11 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->hdr.vmx.smm.flags = 1;
test_nested_state_expect_einval(vm, state);
+ /* Invalid flags are rejected. */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.flags = ~0;
+ test_nested_state_expect_einval(vm, state);
+
/* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
@@ -185,20 +188,41 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
test_nested_state_expect_einval(vm, state);
- /* Size must be large enough to fit kvm_nested_state and vmcs12. */
+ /*
+ * Size must be large enough to fit kvm_nested_state and vmcs12
+ * if VMCS12 physical address is set
+ */
set_default_vmx_state(state, state_sz);
state->size = sizeof(*state);
+ state->flags = 0;
+ test_nested_state_expect_einval(vm, state);
+
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ state->flags = 0;
+ state->hdr.vmx.vmcs12_pa = -1;
test_nested_state(vm, state);
- /* vmxon_pa cannot be the same address as vmcs_pa. */
+ /*
+ * KVM_SET_NESTED_STATE succeeds with invalid VMCS
+ * contents but L2 not running.
+ */
set_default_vmx_state(state, state_sz);
- state->hdr.vmx.vmxon_pa = 0;
- state->hdr.vmx.vmcs12_pa = 0;
+ state->flags = 0;
+ test_nested_state(vm, state);
+
+ /* Invalid flags are rejected, even if no VMCS loaded. */
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ state->flags = 0;
+ state->hdr.vmx.vmcs12_pa = -1;
+ state->hdr.vmx.flags = ~0;
test_nested_state_expect_einval(vm, state);
- /* The revision id for vmcs12 must be VMCS12_REVISION. */
+ /* vmxon_pa cannot be the same address as vmcs_pa. */
set_default_vmx_state(state, state_sz);
- set_revision_id_for_vmcs12(state, 0);
+ state->hdr.vmx.vmxon_pa = 0;
+ state->hdr.vmx.vmcs12_pa = 0;
test_nested_state_expect_einval(vm, state);
/*
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index b0556c752443..30848ca36555 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -47,9 +47,9 @@ ARCH ?= $(SUBARCH)
khdr:
ifndef KSFT_KHDR_INSTALL_DONE
ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
- make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
+ $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
else
- make --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \
+ $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \
ARCH=$(ARCH) -C $(top_srcdir) headers_install
endif
endif
@@ -59,9 +59,8 @@ else
all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
endif
-.ONESHELL:
define RUN_TESTS
- @BASE_DIR="$(selfdir)"; \
+ BASE_DIR="$(selfdir)"; \
. $(selfdir)/kselftest/runner.sh; \
if [ "X$(summary)" != "X" ]; then \
per_test_logging=1; \
@@ -71,22 +70,21 @@ endef
run_tests: all
ifdef building_out_of_srctree
- @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
- @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+ @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \
+ rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \
fi
- @if [ "X$(TEST_PROGS)" != "X" ]; then
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
- else
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+ @if [ "X$(TEST_PROGS)" != "X" ]; then \
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) ; \
+ else \
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \
fi
else
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+ @$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
endif
define INSTALL_SINGLE_RULE
$(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
- $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
- $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+ $(if $(INSTALL_LIST),rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
endef
define INSTALL_RULE
@@ -109,9 +107,8 @@ endif
emit_tests:
for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
BASENAME_TEST=`basename $$TEST`; \
- echo " \\"; \
- echo -n " \"$$BASENAME_TEST\""; \
- done; \
+ echo "$(COLLECTION):$$BASENAME_TEST"; \
+ done
# define if isn't already. It is undefined in make O= case.
ifeq ($(RM),)
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index 14a77ea4a8da..b80ee3f6e265 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -2,3 +2,4 @@ CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_STRSCPY=m
+CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README
index 621d325425c2..0942dd5826f8 100644
--- a/tools/testing/selftests/livepatch/README
+++ b/tools/testing/selftests/livepatch/README
@@ -6,8 +6,8 @@ This is a small set of sanity tests for the kernel livepatching.
The test suite loads and unloads several test kernel modules to verify
livepatch behavior. Debug information is logged to the kernel's message
-buffer and parsed for expected messages. (Note: the tests will clear
-the message buffer between individual tests.)
+buffer and parsed for expected messages. (Note: the tests will compare
+the message buffer for only the duration of each individual test.)
Config
@@ -35,9 +35,9 @@ Adding tests
------------
See the common functions.sh file for the existing collection of utility
-functions, most importantly setup_config() and check_result(). The
-latter function greps the kernel's ring buffer for "livepatch:" and
-"test_klp" strings, so tests be sure to include one of those strings for
-result comparison. Other utility functions include general module
-loading and livepatch loading helpers (waiting for patch transitions,
-sysfs entries, etc.)
+functions, most importantly setup_config(), start_test() and
+check_result(). The latter function greps the kernel's ring buffer for
+"livepatch:" and "test_klp" strings, so tests be sure to include one of
+those strings for result comparison. Other utility functions include
+general module loading and livepatch loading helpers (waiting for patch
+transitions, sysfs entries, etc.)
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 2aab9791791d..846c7ed71556 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -41,6 +41,17 @@ function die() {
exit 1
}
+# save existing dmesg so we can detect new content
+function save_dmesg() {
+ SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX)
+ dmesg > "$SAVED_DMESG"
+}
+
+# cleanup temporary dmesg file from save_dmesg()
+function cleanup_dmesg_file() {
+ rm -f "$SAVED_DMESG"
+}
+
function push_config() {
DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \
awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}')
@@ -64,10 +75,16 @@ function set_dynamic_debug() {
}
function set_ftrace_enabled() {
- result=$(sysctl kernel.ftrace_enabled="$1" 2>&1 | paste --serial --delimiters=' ')
+ result=$(sysctl -q kernel.ftrace_enabled="$1" 2>&1 && \
+ sysctl kernel.ftrace_enabled 2>&1)
echo "livepatch: $result" > /dev/kmsg
}
+function cleanup() {
+ pop_config
+ cleanup_dmesg_file
+}
+
# setup_config - save the current config and set a script exit trap that
# restores the original config. Setup the dynamic debug
# for verbose livepatching output and turn on
@@ -77,7 +94,7 @@ function setup_config() {
push_config
set_dynamic_debug
set_ftrace_enabled 1
- trap pop_config EXIT INT TERM HUP
+ trap cleanup EXIT INT TERM HUP
}
# loop_until(cmd) - loop a command until it is successful or $MAX_RETRIES,
@@ -243,13 +260,28 @@ function set_pre_patch_ret {
die "failed to set pre_patch_ret parameter for $mod module"
}
+function start_test {
+ local test="$1"
+
+ save_dmesg
+ echo -n "TEST: $test ... "
+ log "===== TEST: $test ====="
+}
+
# check_result() - verify dmesg output
# TODO - better filter, out of order msgs, etc?
function check_result {
local expect="$*"
local result
- result=$(dmesg | grep -v 'tainting' | grep -e 'livepatch:' -e 'test_klp' | sed 's/^\[[ 0-9.]*\] //')
+ # Note: when comparing dmesg output, the kernel log timestamps
+ # help differentiate repeated testing runs. Remove them with a
+ # post-comparison sed filter.
+
+ result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \
+ grep -e 'livepatch:' -e 'test_klp' | \
+ grep -v '\(tainting\|taints\) kernel' | \
+ sed 's/^\[[ 0-9.]*\] //')
if [[ "$expect" == "$result" ]] ; then
echo "ok"
@@ -257,4 +289,6 @@ function check_result {
echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n"
die "livepatch kselftest(s) failed"
fi
+
+ cleanup_dmesg_file
}
diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh
index a35289b13c9c..90b26dbb2626 100755
--- a/tools/testing/selftests/livepatch/test-callbacks.sh
+++ b/tools/testing/selftests/livepatch/test-callbacks.sh
@@ -12,8 +12,6 @@ MOD_TARGET_BUSY=test_klp_callbacks_busy
setup_config
-# TEST: target module before livepatch
-#
# Test a combination of loading a kernel module and a livepatch that
# patches a function in the first module. Load the target module
# before the livepatch module. Unload them in the same order.
@@ -28,8 +26,7 @@ setup_config
# unpatching transition starts. klp_objects are reverted, post-patch
# callbacks execute and the transition completes.
-echo -n "TEST: target module before livepatch ... "
-dmesg -C
+start_test "target module before livepatch"
load_mod $MOD_TARGET
load_lp $MOD_LIVEPATCH
@@ -63,8 +60,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
$MOD_TARGET: ${MOD_TARGET}_exit"
-# TEST: module_coming notifier
-#
# This test is similar to the previous test, but (un)load the livepatch
# module before the target kernel module. This tests the livepatch
# core's module_coming handler.
@@ -78,8 +73,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit"
# - On livepatch disable, all currently loaded klp_objects' (vmlinux and
# $MOD_TARGET) pre/post-unpatch callbacks are executed.
-echo -n "TEST: module_coming notifier ... "
-dmesg -C
+start_test "module_coming notifier"
load_lp $MOD_LIVEPATCH
load_mod $MOD_TARGET
@@ -114,8 +108,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
$MOD_TARGET: ${MOD_TARGET}_exit"
-# TEST: module_going notifier
-#
# Test loading the livepatch after a targeted kernel module, then unload
# the kernel module before disabling the livepatch. This tests the
# livepatch core's module_going handler.
@@ -129,8 +121,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit"
# - When the livepatch is disabled, pre and post-unpatch callbacks are
# run for the remaining klp_object, vmlinux.
-echo -n "TEST: module_going notifier ... "
-dmesg -C
+start_test "module_going notifier"
load_mod $MOD_TARGET
load_lp $MOD_LIVEPATCH
@@ -165,8 +156,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: module_coming and module_going notifiers
-#
# This test is similar to the previous test, however the livepatch is
# loaded first. This tests the livepatch core's module_coming and
# module_going handlers.
@@ -180,8 +169,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# from the $MOD_TARGET klp_object. As such, only pre and
# post-unpatch callbacks are executed when this occurs.
-echo -n "TEST: module_coming and module_going notifiers ... "
-dmesg -C
+start_test "module_coming and module_going notifiers"
load_lp $MOD_LIVEPATCH
load_mod $MOD_TARGET
@@ -217,8 +205,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: target module not present
-#
# A simple test of loading a livepatch without one of its patch target
# klp_objects ever loaded ($MOD_TARGET).
#
@@ -227,8 +213,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# - As expected, only pre/post-(un)patch handlers are executed for
# vmlinux.
-echo -n "TEST: target module not present ... "
-dmesg -C
+start_test "target module not present"
load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
@@ -252,8 +237,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: pre-patch callback -ENODEV
-#
# Test a scenario where a vmlinux pre-patch callback returns a non-zero
# status (ie, failure).
#
@@ -265,8 +248,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# The result is that the insmod command refuses to load the livepatch
# module.
-echo -n "TEST: pre-patch callback -ENODEV ... "
-dmesg -C
+start_test "pre-patch callback -ENODEV"
load_mod $MOD_TARGET
load_failing_mod $MOD_LIVEPATCH pre_patch_ret=-19
@@ -288,8 +270,6 @@ modprobe: ERROR: could not insert '$MOD_LIVEPATCH': No such device
$MOD_TARGET: ${MOD_TARGET}_exit"
-# TEST: module_coming + pre-patch callback -ENODEV
-#
# Similar to the previous test, setup a livepatch such that its vmlinux
# pre-patch callback returns success. However, when a targeted kernel
# module is later loaded, have the livepatch return a failing status
@@ -307,8 +287,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit"
#
# - Pre/post-unpatch callbacks are run for the vmlinux klp_object.
-echo -n "TEST: module_coming + pre-patch callback -ENODEV ... "
-dmesg -C
+start_test "module_coming + pre-patch callback -ENODEV"
load_lp $MOD_LIVEPATCH
set_pre_patch_ret $MOD_LIVEPATCH -19
@@ -341,8 +320,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: multiple target modules
-#
# Test loading multiple targeted kernel modules. This test-case is
# mainly for comparing with the next test-case.
#
@@ -353,12 +330,9 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# module. Post-patch callbacks are executed and the transition
# completes quickly.
-echo -n "TEST: multiple target modules ... "
-dmesg -C
+start_test "multiple target modules"
-load_mod $MOD_TARGET_BUSY sleep_secs=0
-# give $MOD_TARGET_BUSY::busymod_work_func() a chance to run
-sleep 5
+load_mod $MOD_TARGET_BUSY block_transition=N
load_lp $MOD_LIVEPATCH
load_mod $MOD_TARGET
unload_mod $MOD_TARGET
@@ -366,9 +340,9 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY sleep_secs=0
+check_result "% modprobe $MOD_TARGET_BUSY block_transition=N
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
-$MOD_TARGET_BUSY: busymod_work_func, sleeping 0 seconds ...
+$MOD_TARGET_BUSY: busymod_work_func enter
$MOD_TARGET_BUSY: busymod_work_func exit
% modprobe $MOD_LIVEPATCH
livepatch: enabling patch '$MOD_LIVEPATCH'
@@ -404,11 +378,8 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
-
-# TEST: busy target module
-#
# A similar test as the previous one, but force the "busy" kernel module
-# to do longer work.
+# to block the livepatch transition.
#
# The livepatching core will refuse to patch a task that is currently
# executing a to-be-patched function -- the consistency model stalls the
@@ -417,8 +388,7 @@ $MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
# function for a long time. Meanwhile, load and unload other target
# kernel modules while the livepatch transition is in progress.
#
-# - Load the "busy" kernel module, this time make it do 10 seconds worth
-# of work.
+# - Load the "busy" kernel module, this time make its work function loop
#
# - Meanwhile, the livepatch is loaded. Notice that the patch
# transition does not complete as the targeted "busy" module is
@@ -435,23 +405,25 @@ $MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
# klp_object's post-patch callbacks executed, the remaining
# klp_object's pre-unpatch callbacks are skipped.
-echo -n "TEST: busy target module ... "
-dmesg -C
+start_test "busy target module"
-load_mod $MOD_TARGET_BUSY sleep_secs=10
+load_mod $MOD_TARGET_BUSY block_transition=Y
load_lp_nowait $MOD_LIVEPATCH
-# Don't wait for transition, load $MOD_TARGET while the transition
-# is still stalled in $MOD_TARGET_BUSY::busymod_work_func()
-sleep 5
+
+# Wait until the livepatch reports in-transition state, i.e. that it's
+# stalled on $MOD_TARGET_BUSY::busymod_work_func()
+loop_until 'grep -q '^1$' /sys/kernel/livepatch/$MOD_LIVEPATCH/transition' ||
+ die "failed to stall transition"
+
load_mod $MOD_TARGET
unload_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY sleep_secs=10
+check_result "% modprobe $MOD_TARGET_BUSY block_transition=Y
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
-$MOD_TARGET_BUSY: busymod_work_func, sleeping 10 seconds ...
+$MOD_TARGET_BUSY: busymod_work_func enter
% modprobe $MOD_LIVEPATCH
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
@@ -479,8 +451,6 @@ $MOD_TARGET_BUSY: busymod_work_func exit
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
-# TEST: multiple livepatches
-#
# Test loading multiple livepatches. This test-case is mainly for comparing
# with the next test-case.
#
@@ -488,8 +458,7 @@ $MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
# execute as each patch progresses through its (un)patching
# transition.
-echo -n "TEST: multiple livepatches ... "
-dmesg -C
+start_test "multiple livepatches"
load_lp $MOD_LIVEPATCH
load_lp $MOD_LIVEPATCH2
@@ -532,8 +501,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: atomic replace
-#
# Load multiple livepatches, but the second as an 'atomic-replace'
# patch. When the latter loads, the original livepatch should be
# disabled and *none* of its pre/post-unpatch callbacks executed. On
@@ -548,8 +515,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# - Once the atomic replace module is loaded, only its pre and post
# unpatch callbacks are executed.
-echo -n "TEST: atomic replace ... "
-dmesg -C
+start_test "atomic replace"
load_lp $MOD_LIVEPATCH
load_lp $MOD_LIVEPATCH2 replace=1
diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh
index e2a76887f40a..552e165512f4 100755
--- a/tools/testing/selftests/livepatch/test-ftrace.sh
+++ b/tools/testing/selftests/livepatch/test-ftrace.sh
@@ -9,13 +9,11 @@ MOD_LIVEPATCH=test_klp_livepatch
setup_config
-# TEST: livepatch interaction with ftrace_enabled sysctl
# - turn ftrace_enabled OFF and verify livepatches can't load
# - turn ftrace_enabled ON and verify livepatch can load
# - verify that ftrace_enabled can't be turned OFF while a livepatch is loaded
-echo -n "TEST: livepatch interaction with ftrace_enabled sysctl ... "
-dmesg -C
+start_test "livepatch interaction with ftrace_enabled sysctl"
set_ftrace_enabled 0
load_failing_mod $MOD_LIVEPATCH
@@ -53,7 +51,7 @@ livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
livepatch: '$MOD_LIVEPATCH': patching complete
-livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy kernel.ftrace_enabled = 0
+livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy
% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index 493e3df415a1..5fe79ac34be1 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -10,13 +10,11 @@ MOD_REPLACE=test_klp_atomic_replace
setup_config
-# TEST: basic function patching
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
# - unload the livepatch and make sure the patch was removed
-echo -n "TEST: basic function patching ... "
-dmesg -C
+start_test "basic function patching"
load_lp $MOD_LIVEPATCH
@@ -47,15 +45,13 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: multiple livepatches
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
# - load another livepatch and verify that both livepatches are active
# - unload the second livepatch and verify that the first is still active
# - unload the first livepatch and verify none are active
-echo -n "TEST: multiple livepatches ... "
-dmesg -C
+start_test "multiple livepatches"
load_lp $MOD_LIVEPATCH
@@ -109,7 +105,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: atomic replace livepatch
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
# - load an atomic replace livepatch and verify that only the second is active
@@ -117,8 +112,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# is still active
# - remove the atomic replace livepatch and verify that none are active
-echo -n "TEST: atomic replace livepatch ... "
-dmesg -C
+start_test "atomic replace livepatch"
load_lp $MOD_LIVEPATCH
diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh
index 1aae73299114..e04cb354f56b 100755
--- a/tools/testing/selftests/livepatch/test-shadow-vars.sh
+++ b/tools/testing/selftests/livepatch/test-shadow-vars.sh
@@ -9,52 +9,71 @@ MOD_TEST=test_klp_shadow_vars
setup_config
-# TEST: basic shadow variable API
# - load a module that exercises the shadow variable API
-echo -n "TEST: basic shadow variable API ... "
-dmesg -C
+start_test "basic shadow variable API"
load_mod $MOD_TEST
unload_mod $MOD_TEST
check_result "% modprobe $MOD_TEST
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
-$MOD_TEST: shadow_ctor: PTR6 -> PTR1
-$MOD_TEST: klp_shadow_alloc(obj=PTR5, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR1 = PTR6
-$MOD_TEST: shadow_ctor: PTR8 -> PTR2
-$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR2 = PTR8
-$MOD_TEST: shadow_ctor: PTR10 -> PTR3
-$MOD_TEST: klp_shadow_alloc(obj=PTR5, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR3 = PTR10
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR6
-$MOD_TEST: got expected PTR6 -> PTR1 result
+$MOD_TEST: shadow_ctor: PTR3 -> PTR2
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR1, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR2 = PTR3
+$MOD_TEST: shadow_ctor: PTR6 -> PTR5
+$MOD_TEST: klp_shadow_alloc(obj=PTR1, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR5 = PTR6
+$MOD_TEST: shadow_ctor: PTR8 -> PTR7
+$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR7 = PTR8
+$MOD_TEST: shadow_ctor: PTR11 -> PTR10
+$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR10 = PTR11
+$MOD_TEST: shadow_ctor: PTR13 -> PTR12
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR14, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR12 = PTR13
+$MOD_TEST: shadow_ctor: PTR16 -> PTR15
+$MOD_TEST: klp_shadow_alloc(obj=PTR14, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR15 = PTR16
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR3
+$MOD_TEST: got expected PTR3 -> PTR2 result
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1235) = PTR6
+$MOD_TEST: got expected PTR6 -> PTR5 result
$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1234) = PTR8
-$MOD_TEST: got expected PTR8 -> PTR2 result
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1235) = PTR10
-$MOD_TEST: got expected PTR10 -> PTR3 result
-$MOD_TEST: shadow_ctor: PTR11 -> PTR4
-$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR12, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR4 = PTR11
-$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR12, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR4 = PTR11
-$MOD_TEST: got expected PTR11 -> PTR4 result
-$MOD_TEST: shadow_dtor(obj=PTR5, shadow_data=PTR6)
-$MOD_TEST: klp_shadow_free(obj=PTR5, id=0x1234, dtor=PTR13)
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0
+$MOD_TEST: got expected PTR8 -> PTR7 result
+$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1235) = PTR11
+$MOD_TEST: got expected PTR11 -> PTR10 result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1234) = PTR13
+$MOD_TEST: got expected PTR13 -> PTR12 result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1235) = PTR16
+$MOD_TEST: got expected PTR16 -> PTR15 result
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR1, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR2 = PTR3
+$MOD_TEST: got expected PTR3 -> PTR2 result
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR7 = PTR8
+$MOD_TEST: got expected PTR8 -> PTR7 result
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR14, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR12 = PTR13
+$MOD_TEST: got expected PTR13 -> PTR12 result
+$MOD_TEST: shadow_dtor(obj=PTR1, shadow_data=PTR3)
+$MOD_TEST: klp_shadow_free(obj=PTR1, id=0x1234, dtor=PTR17)
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
$MOD_TEST: shadow_dtor(obj=PTR9, shadow_data=PTR8)
-$MOD_TEST: klp_shadow_free(obj=PTR9, id=0x1234, dtor=PTR13)
+$MOD_TEST: klp_shadow_free(obj=PTR9, id=0x1234, dtor=PTR17)
$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
-$MOD_TEST: shadow_dtor(obj=PTR12, shadow_data=PTR11)
-$MOD_TEST: klp_shadow_free(obj=PTR12, id=0x1234, dtor=PTR13)
-$MOD_TEST: klp_shadow_get(obj=PTR12, id=0x1234) = PTR0
+$MOD_TEST: shadow_dtor(obj=PTR14, shadow_data=PTR13)
+$MOD_TEST: klp_shadow_free(obj=PTR14, id=0x1234, dtor=PTR17)
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1235) = PTR10
-$MOD_TEST: got expected PTR10 -> PTR3 result
-$MOD_TEST: shadow_dtor(obj=PTR5, shadow_data=PTR10)
-$MOD_TEST: klp_shadow_free_all(id=0x1235, dtor=PTR13)
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0
-$MOD_TEST: shadow_get() got expected NULL result
-% rmmod test_klp_shadow_vars"
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1235) = PTR6
+$MOD_TEST: got expected PTR6 -> PTR5 result
+$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1235) = PTR11
+$MOD_TEST: got expected PTR11 -> PTR10 result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1235) = PTR16
+$MOD_TEST: got expected PTR16 -> PTR15 result
+$MOD_TEST: klp_shadow_free_all(id=0x1235, dtor=PTR0)
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1235) = PTR0
+$MOD_TEST: got expected NULL result
+$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1235) = PTR0
+$MOD_TEST: got expected NULL result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1235) = PTR0
+$MOD_TEST: got expected NULL result
+% rmmod $MOD_TEST"
exit 0
diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh
index a08212708115..38656721c958 100755
--- a/tools/testing/selftests/livepatch/test-state.sh
+++ b/tools/testing/selftests/livepatch/test-state.sh
@@ -10,10 +10,10 @@ MOD_LIVEPATCH3=test_klp_state3
setup_config
-# TEST: Loading and removing a module that modifies the system state
-echo -n "TEST: system state modification ... "
-dmesg -C
+# Load and remove a module that modifies the system state
+
+start_test "system state modification"
load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
@@ -41,10 +41,9 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: Take over system state change by a cumulative patch
+# Take over system state change by a cumulative patch
-echo -n "TEST: taking over system state modification ... "
-dmesg -C
+start_test "taking over system state modification"
load_lp $MOD_LIVEPATCH
load_lp $MOD_LIVEPATCH2
@@ -85,10 +84,9 @@ livepatch: '$MOD_LIVEPATCH2': unpatching complete
% rmmod $MOD_LIVEPATCH2"
-# TEST: Take over system state change by a cumulative patch
+# Take over system state change by a cumulative patch
-echo -n "TEST: compatible cumulative livepatches ... "
-dmesg -C
+start_test "compatible cumulative livepatches"
load_lp $MOD_LIVEPATCH2
load_lp $MOD_LIVEPATCH3
@@ -142,10 +140,9 @@ livepatch: '$MOD_LIVEPATCH2': unpatching complete
% rmmod $MOD_LIVEPATCH3"
-# TEST: Failure caused by incompatible cumulative livepatches
+# Failure caused by incompatible cumulative livepatches
-echo -n "TEST: incompatible cumulative livepatches ... "
-dmesg -C
+start_test "incompatible cumulative livepatches"
load_lp $MOD_LIVEPATCH2
load_failing_mod $MOD_LIVEPATCH
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index ee64ff8df8f4..bb7a1775307b 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -8,6 +8,7 @@
#
set -e
TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
+CLEAR_ONCE=/sys/kernel/debug/clear_warn_once
KSELFTEST_SKIP_TEST=4
# Verify we have LKDTM available in the kernel.
@@ -67,6 +68,11 @@ cleanup() {
}
trap cleanup EXIT
+# Reset WARN_ONCE counters so we trip it each time this runs.
+if [ -w $CLEAR_ONCE ] ; then
+ echo 1 > $CLEAR_ONCE
+fi
+
# Save existing dmesg so we can detect new content below
dmesg > "$DMESG"
@@ -76,7 +82,7 @@ dmesg > "$DMESG"
($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
# Record and dump the results
-dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true
+dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true
cat "$LOG"
# Check for expected output
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 92ca32143ae5..74a8d329a72c 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -9,11 +9,11 @@ EXCEPTION
#CORRUPT_STACK_STRONG Crashes entire system on success
CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption
-CORRUPT_USER_DS Invalid address limit on user-mode return
STACK_GUARD_PAGE_LEADING
STACK_GUARD_PAGE_TRAILING
UNSET_SMEP CR4 bits went missing
DOUBLE_FAULT
+CORRUPT_PAC
UNALIGNED_LOAD_STORE_WRITE
#OVERWRITE_ALLOCATION Corrupts memory on failure
#WRITE_AFTER_FREE Corrupts memory on failure
@@ -66,6 +66,5 @@ USERCOPY_STACK_FRAME_TO
USERCOPY_STACK_FRAME_FROM
USERCOPY_STACK_BEYOND
USERCOPY_KERNEL
-USERCOPY_KERNEL_DS
STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
CFI_FORWARD_PROTO
diff --git a/tools/testing/selftests/mincore/.gitignore b/tools/testing/selftests/mincore/.gitignore
new file mode 100644
index 000000000000..15c4dfc2df00
--- /dev/null
+++ b/tools/testing/selftests/mincore/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+mincore_selftest
diff --git a/tools/testing/selftests/mincore/Makefile b/tools/testing/selftests/mincore/Makefile
new file mode 100644
index 000000000000..38c7db1e8926
--- /dev/null
+++ b/tools/testing/selftests/mincore/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+CFLAGS += -Wall
+
+TEST_GEN_PROGS := mincore_selftest
+include ../lib.mk
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
new file mode 100644
index 000000000000..5a1e85ff5d32
--- /dev/null
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * kselftest suite for mincore().
+ *
+ * Copyright (C) 2020 Collabora, Ltd.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+/* Default test file size: 4MB */
+#define MB (1UL << 20)
+#define FILE_SIZE (4 * MB)
+
+
+/*
+ * Tests the user interface. This test triggers most of the documented
+ * error conditions in mincore().
+ */
+TEST(basic_interface)
+{
+ int retval;
+ int page_size;
+ unsigned char vec[1];
+ char *addr;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Query a 0 byte sized range */
+ retval = mincore(0, 0, vec);
+ EXPECT_EQ(0, retval);
+
+ /* Addresses in the specified range are invalid or unmapped */
+ errno = 0;
+ retval = mincore(NULL, page_size, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(ENOMEM, errno);
+
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+
+ /* <addr> argument is not page-aligned */
+ errno = 0;
+ retval = mincore(addr + 1, page_size, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* <length> argument is too large */
+ errno = 0;
+ retval = mincore(addr, -1, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(ENOMEM, errno);
+
+ /* <vec> argument points to an illegal address */
+ errno = 0;
+ retval = mincore(addr, page_size, NULL);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(EFAULT, errno);
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a private anonymous page mapping.
+ * Check that the page is not loaded into memory right after the mapping
+ * but after accessing it (on-demand allocation).
+ * Then free the page and check that it's not memory-resident.
+ */
+TEST(check_anonymous_locked_pages)
+{
+ unsigned char vec[1];
+ char *addr;
+ int retval;
+ int page_size;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Map one page and check it's not memory-resident */
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page found in memory before use");
+ }
+
+ /* Touch the page and check again. It should now be in memory */
+ addr[0] = 1;
+ mlock(addr, page_size);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[0]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ /*
+ * It shouldn't be memory-resident after unlocking it and
+ * marking it as unneeded.
+ */
+ munlock(addr, page_size);
+ madvise(addr, page_size, MADV_DONTNEED);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page in memory after being zapped");
+ }
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Check mincore() behavior on huge pages.
+ * This test will be skipped if the mapping fails (ie. if there are no
+ * huge pages available).
+ *
+ * Make sure the system has at least one free huge page, check
+ * "HugePages_Free" in /proc/meminfo.
+ * Increment /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages if
+ * needed.
+ */
+TEST(check_huge_pages)
+{
+ unsigned char vec[1];
+ char *addr;
+ int retval;
+ int page_size;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ if (errno == ENOMEM)
+ SKIP(return, "No huge pages available.");
+ else
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page found in memory before use");
+ }
+
+ addr[0] = 1;
+ mlock(addr, page_size);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[0]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ munlock(addr, page_size);
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a file-backed page.
+ * No pages should be loaded into memory right after the mapping. Then,
+ * accessing any address in the mapping range should load the page
+ * containing the address and a number of subsequent pages (readahead).
+ *
+ * The actual readahead settings depend on the test environment, so we
+ * can't make a lot of assumptions about that. This test covers the most
+ * general cases.
+ */
+TEST(check_file_mmap)
+{
+ unsigned char *vec;
+ int vec_size;
+ char *addr;
+ int retval;
+ int page_size;
+ int fd;
+ int i;
+ int ra_pages = 0;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ vec_size = FILE_SIZE / page_size;
+ if (FILE_SIZE % page_size)
+ vec_size++;
+
+ vec = calloc(vec_size, sizeof(unsigned char));
+ ASSERT_NE(NULL, vec) {
+ TH_LOG("Can't allocate array");
+ }
+
+ errno = 0;
+ fd = open(".", O_TMPFILE | O_RDWR, 0600);
+ ASSERT_NE(-1, fd) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ errno = 0;
+ retval = fallocate(fd, 0, 0, FILE_SIZE);
+ ASSERT_EQ(0, retval) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+
+ /*
+ * Map the whole file, the pages shouldn't be fetched yet.
+ */
+ errno = 0;
+ addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ for (i = 0; i < vec_size; i++) {
+ ASSERT_EQ(0, vec[i]) {
+ TH_LOG("Unexpected page in memory");
+ }
+ }
+
+ /*
+ * Touch a page in the middle of the mapping. We expect the next
+ * few pages (the readahead window) to be populated too.
+ */
+ addr[FILE_SIZE / 2] = 1;
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ i = FILE_SIZE / 2 / page_size + 1;
+ while (i < vec_size && vec[i]) {
+ ra_pages++;
+ i++;
+ }
+ EXPECT_GT(ra_pages, 0) {
+ TH_LOG("No read-ahead pages found in memory");
+ }
+
+ EXPECT_LT(i, vec_size) {
+ TH_LOG("Read-ahead pages reached the end of the file");
+ }
+ /*
+ * End of the readahead window. The rest of the pages shouldn't
+ * be in memory.
+ */
+ if (i < vec_size) {
+ while (i < vec_size && !vec[i])
+ i++;
+ EXPECT_EQ(vec_size, i) {
+ TH_LOG("Unexpected page in memory beyond readahead window");
+ }
+ }
+
+ munmap(addr, FILE_SIZE);
+ close(fd);
+ free(vec);
+}
+
+
+/*
+ * Test mincore() behavior on a page backed by a tmpfs file. This test
+ * performs the same steps as the previous one. However, we don't expect
+ * any readahead in this case.
+ */
+TEST(check_tmpfs_mmap)
+{
+ unsigned char *vec;
+ int vec_size;
+ char *addr;
+ int retval;
+ int page_size;
+ int fd;
+ int i;
+ int ra_pages = 0;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ vec_size = FILE_SIZE / page_size;
+ if (FILE_SIZE % page_size)
+ vec_size++;
+
+ vec = calloc(vec_size, sizeof(unsigned char));
+ ASSERT_NE(NULL, vec) {
+ TH_LOG("Can't allocate array");
+ }
+
+ errno = 0;
+ fd = open("/dev/shm", O_TMPFILE | O_RDWR, 0600);
+ ASSERT_NE(-1, fd) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ errno = 0;
+ retval = fallocate(fd, 0, 0, FILE_SIZE);
+ ASSERT_EQ(0, retval) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+
+ /*
+ * Map the whole file, the pages shouldn't be fetched yet.
+ */
+ errno = 0;
+ addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ for (i = 0; i < vec_size; i++) {
+ ASSERT_EQ(0, vec[i]) {
+ TH_LOG("Unexpected page in memory");
+ }
+ }
+
+ /*
+ * Touch a page in the middle of the mapping. We expect only
+ * that page to be fetched into memory.
+ */
+ addr[FILE_SIZE / 2] = 1;
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ i = FILE_SIZE / 2 / page_size + 1;
+ while (i < vec_size && vec[i]) {
+ ra_pages++;
+ i++;
+ }
+ ASSERT_EQ(ra_pages, 0) {
+ TH_LOG("Read-ahead pages found in memory");
+ }
+
+ munmap(addr, FILE_SIZE);
+ close(fd);
+ free(vec);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 742c499328b2..61ae899cfc17 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+ipsec
msg_zerocopy
socket
psock_fanout
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3f386eb9e7d7..ef352477cac6 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -16,6 +16,11 @@ TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
TEST_PROGS += route_localnet.sh
TEST_PROGS += reuseaddr_ports_exhausted.sh
TEST_PROGS += txtimestamp.sh
+TEST_PROGS += vrf-xfrm-tests.sh
+TEST_PROGS += rxtimestamp.sh
+TEST_PROGS += devlink_port_split.py
+TEST_PROGS += drop_monitor_tests.sh
+TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -26,6 +31,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
+TEST_GEN_FILES += ipsec
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3b42c06b5985..4d5df8e1eee7 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -24,10 +24,13 @@ CONFIG_IP_NF_NAT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NFT_NAT=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_NETEM=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_KALLSYMS=y
+CONFIG_TRACEPOINTS=y
+CONFIG_NET_DROP_MONITOR=m
+CONFIG_NETDEVSIM=m
+CONFIG_NET_FOU=m
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
new file mode 100755
index 000000000000..58bb7e9b88ce
--- /dev/null
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -0,0 +1,277 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+
+from subprocess import PIPE, Popen
+import json
+import time
+import argparse
+import collections
+import sys
+
+#
+# Test port split configuration using devlink-port lanes attribute.
+# The test is skipped in case the attribute is not available.
+#
+# First, check that all the ports with 1 lane fail to split.
+# Second, check that all the ports with more than 1 lane can be split
+# to all valid configurations (e.g., split to 2, split to 4 etc.)
+#
+
+
+Port = collections.namedtuple('Port', 'bus_info name')
+
+
+def run_command(cmd, should_fail=False):
+ """
+ Run a command in subprocess.
+ Return: Tuple of (stdout, stderr).
+ """
+
+ p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
+ stdout, stderr = p.communicate()
+ stdout, stderr = stdout.decode(), stderr.decode()
+
+ if stderr != "" and not should_fail:
+ print("Error sending command: %s" % cmd)
+ print(stdout)
+ print(stderr)
+ return stdout, stderr
+
+
+class devlink_ports(object):
+ """
+ Class that holds information on the devlink ports, required to the tests;
+ if_names: A list of interfaces in the devlink ports.
+ """
+
+ def get_if_names(dev):
+ """
+ Get a list of physical devlink ports.
+ Return: Array of tuples (bus_info/port, if_name).
+ """
+
+ arr = []
+
+ cmd = "devlink -j port show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ ports = json.loads(stdout)['port']
+
+ for port in ports:
+ if dev in port:
+ if ports[port]['flavour'] == 'physical':
+ arr.append(Port(bus_info=port, name=ports[port]['netdev']))
+
+ return arr
+
+ def __init__(self, dev):
+ self.if_names = devlink_ports.get_if_names(dev)
+
+
+def get_max_lanes(port):
+ """
+ Get the $port's maximum number of lanes.
+ Return: number of lanes, e.g. 1, 2, 4 and 8.
+ """
+
+ cmd = "devlink -j port show %s" % port
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ if 'lanes' in values:
+ lanes = values['lanes']
+ else:
+ lanes = 0
+ return lanes
+
+
+def get_split_ability(port):
+ """
+ Get the $port split ability.
+ Return: split ability, true or false.
+ """
+
+ cmd = "devlink -j port show %s" % port.name
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ return values['splittable']
+
+
+def split(k, port, should_fail=False):
+ """
+ Split $port into $k ports.
+ If should_fail == True, the split should fail. Otherwise, should pass.
+ Return: Array of sub ports after splitting.
+ If the $port wasn't split, the array will be empty.
+ """
+
+ cmd = "devlink port split %s count %s" % (port.bus_info, k)
+ stdout, stderr = run_command(cmd, should_fail=should_fail)
+
+ if should_fail:
+ if not test(stderr != "", "%s is unsplittable" % port.name):
+ print("split an unsplittable port %s" % port.name)
+ return create_split_group(port, k)
+ else:
+ if stderr == "":
+ return create_split_group(port, k)
+ print("didn't split a splittable port %s" % port.name)
+
+ return []
+
+
+def unsplit(port):
+ """
+ Unsplit $port.
+ """
+
+ cmd = "devlink port unsplit %s" % port
+ stdout, stderr = run_command(cmd)
+ test(stderr == "", "Unsplit port %s" % port)
+
+
+def exists(port, dev):
+ """
+ Check if $port exists in the devlink ports.
+ Return: True is so, False otherwise.
+ """
+
+ return any(dev_port.name == port
+ for dev_port in devlink_ports.get_if_names(dev))
+
+
+def exists_and_lanes(ports, lanes, dev):
+ """
+ Check if every port in the list $ports exists in the devlink ports and has
+ $lanes number of lanes after splitting.
+ Return: True if both are True, False otherwise.
+ """
+
+ for port in ports:
+ max_lanes = get_max_lanes(port)
+ if not exists(port, dev):
+ print("port %s doesn't exist in devlink ports" % port)
+ return False
+ if max_lanes != lanes:
+ print("port %s has %d lanes, but %s were expected"
+ % (port, lanes, max_lanes))
+ return False
+ return True
+
+
+def test(cond, msg):
+ """
+ Check $cond and print a message accordingly.
+ Return: True is pass, False otherwise.
+ """
+
+ if cond:
+ print("TEST: %-60s [ OK ]" % msg)
+ else:
+ print("TEST: %-60s [FAIL]" % msg)
+
+ return cond
+
+
+def create_split_group(port, k):
+ """
+ Create the split group for $port.
+ Return: Array with $k elements, which are the split port group.
+ """
+
+ return list(port.name + "s" + str(i) for i in range(k))
+
+
+def split_unsplittable_port(port, k):
+ """
+ Test that splitting of unsplittable port fails.
+ """
+
+ # split to max
+ new_split_group = split(k, port, should_fail=True)
+
+ if new_split_group != []:
+ unsplit(port.bus_info)
+
+
+def split_splittable_port(port, k, lanes, dev):
+ """
+ Test that splitting of splittable port passes correctly.
+ """
+
+ new_split_group = split(k, port)
+
+ # Once the split command ends, it takes some time to the sub ifaces'
+ # to get their names. Use udevadm to continue only when all current udev
+ # events are handled.
+ cmd = "udevadm settle"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ if new_split_group != []:
+ test(exists_and_lanes(new_split_group, lanes/k, dev),
+ "split port %s into %s" % (port.name, k))
+
+ unsplit(port.bus_info)
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='A test for port splitting.')
+ parser.add_argument('--dev',
+ help='The devlink handle of the device under test. ' +
+ 'The default is the first registered devlink ' +
+ 'handle.')
+
+ return parser
+
+
+def main(cmdline=None):
+ parser = make_parser()
+ args = parser.parse_args(cmdline)
+
+ dev = args.dev
+ if not dev:
+ cmd = "devlink -j dev show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ devs = json.loads(stdout)['dev']
+ dev = list(devs.keys())[0]
+
+ cmd = "devlink dev show %s" % dev
+ stdout, stderr = run_command(cmd)
+ if stderr != "":
+ print("devlink device %s can not be found" % dev)
+ sys.exit(1)
+
+ ports = devlink_ports(dev)
+
+ for port in ports.if_names:
+ max_lanes = get_max_lanes(port.name)
+
+ # If max lanes is 0, do not test port splitting at all
+ if max_lanes == 0:
+ continue
+
+ # If 1 lane, shouldn't be able to split
+ elif max_lanes == 1:
+ test(not get_split_ability(port),
+ "%s should not be able to split" % port.name)
+ split_unsplittable_port(port, max_lanes)
+
+ # Else, splitting should pass and all the split ports should exist.
+ else:
+ lane = max_lanes
+ test(get_split_ability(port),
+ "%s should be able to split" % port.name)
+ while lane > 1:
+ split_splittable_port(port, lane, max_lanes, dev)
+
+ lane //= 2
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh
new file mode 100755
index 000000000000..b7650e30d18b
--- /dev/null
+++ b/tools/testing/selftests/net/drop_monitor_tests.sh
@@ -0,0 +1,215 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking drop monitor functionality.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="
+ sw_drops
+ hw_drops
+"
+
+IP="ip -netns ns1"
+TC="tc -netns ns1"
+DEVLINK="devlink -N ns1"
+NS_EXEC="ip netns exec ns1"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ fi
+}
+
+setup()
+{
+ modprobe netdevsim &> /dev/null
+
+ set -e
+ ip netns add ns1
+ $IP link add dummy10 up type dummy
+
+ $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ udevadm settle
+ local netdev=$($NS_EXEC ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
+ $IP link set dev $netdev up
+
+ set +e
+}
+
+cleanup()
+{
+ $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ ip netns del ns1
+}
+
+sw_drops_test()
+{
+ echo
+ echo "Software drops test"
+
+ setup
+
+ local dir=$(mktemp -d)
+
+ $TC qdisc add dev dummy10 clsact
+ $TC filter add dev dummy10 egress pref 1 handle 101 proto ip \
+ flower dst_ip 192.0.2.10 action drop
+
+ $NS_EXEC mausezahn dummy10 -a 00:11:22:33:44:55 -b 00:aa:bb:cc:dd:ee \
+ -A 192.0.2.1 -B 192.0.2.10 -t udp sp=12345,dp=54321 -c 0 -q \
+ -d 100msec &
+ timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) != 0))
+ log_test $? 0 "Capturing active software drops"
+
+ rm ${dir}/packets.pcap
+
+ { kill %% && wait %%; } 2>/dev/null
+ timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0))
+ log_test $? 0 "Capturing inactive software drops"
+
+ rm -r $dir
+
+ cleanup
+}
+
+hw_drops_test()
+{
+ echo
+ echo "Hardware drops test"
+
+ setup
+
+ local dir=$(mktemp -d)
+
+ $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action trap
+ timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+ | wc -l) != 0))
+ log_test $? 0 "Capturing active hardware drops"
+
+ rm ${dir}/packets.pcap
+
+ $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action drop
+ timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+ | wc -l) == 0))
+ log_test $? 0 "Capturing inactive hardware drops"
+
+ rm -r $dir
+
+ cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v devlink)" ]; then
+ echo "SKIP: Could not run test without devlink tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tshark)" ]; then
+ echo "SKIP: Could not run test without tshark tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v dwdump)" ]; then
+ echo "SKIP: Could not run test without dwdump tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v udevadm)" ]; then
+ echo "SKIP: Could not run test without udevadm tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v timeout)" ]; then
+ echo "SKIP: Could not run test without timeout tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+tshark -G fields 2> /dev/null | grep -q net_dm
+if [ $? -ne 0 ]; then
+ echo "SKIP: tshark too old, missing net_dm dissector"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ sw_drops|sw) sw_drops_test;;
+ hw_drops|hw) hw_drops_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
index 9dc35a16e415..51df5e305855 100755
--- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -144,7 +144,7 @@ setup()
cleanup()
{
- for n in h1 r1 h2 h3 h4
+ for n in h0 r1 h1 h2 h3
do
ip netns del ${n} 2>/dev/null
done
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 6560ed796ac4..eb693a3b7b4a 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,8 +19,8 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime"
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
TESTS="${ALL_TESTS}"
@@ -146,35 +146,36 @@ setup()
create_ns remote
IP="ip -netns me"
+ BRIDGE="bridge -netns me"
set -e
$IP li add veth1 type veth peer name veth2
$IP li set veth1 up
$IP addr add 172.16.1.1/24 dev veth1
- $IP -6 addr add 2001:db8:91::1/64 dev veth1
+ $IP -6 addr add 2001:db8:91::1/64 dev veth1 nodad
$IP li add veth3 type veth peer name veth4
$IP li set veth3 up
$IP addr add 172.16.2.1/24 dev veth3
- $IP -6 addr add 2001:db8:92::1/64 dev veth3
+ $IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad
$IP li set veth2 netns peer up
ip -netns peer addr add 172.16.1.2/24 dev veth2
- ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2
+ ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
$IP li set veth4 netns peer up
ip -netns peer addr add 172.16.2.2/24 dev veth4
- ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4
+ ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
ip -netns remote li add veth5 type veth peer name veth6
ip -netns remote li set veth5 up
ip -netns remote addr add dev veth5 172.16.101.1/24
- ip -netns remote addr add dev veth5 2001:db8:101::1/64
+ ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
ip -netns remote li set veth6 netns peer up
ip -netns peer addr add dev veth6 172.16.101.2/24
- ip -netns peer addr add dev veth6 2001:db8:101::2/64
+ ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
set +e
}
@@ -248,11 +249,261 @@ check_route6()
local expected="$2"
local out
- out=$($IP -6 route ls match ${pfx} 2>/dev/null)
+ out=$($IP -6 route ls match ${pfx} 2>/dev/null | sed -e 's/pref medium//')
check_output "${out}" "${expected}"
}
+check_large_grp()
+{
+ local ipv=$1
+ local ecmp=$2
+ local grpnum=100
+ local nhidstart=100
+ local grpidstart=1000
+ local iter=0
+ local nhidstr=""
+ local grpidstr=""
+ local grpstr=""
+ local ipstr=""
+
+ if [ $ipv -eq 4 ]; then
+ ipstr="172.16.1."
+ else
+ ipstr="2001:db8:91::"
+ fi
+
+ #
+ # Create $grpnum groups with specified $ecmp and dump them
+ #
+
+ # create nexthops with different gateways
+ iter=2
+ while [ $iter -le $(($ecmp + 1)) ]
+ do
+ nhidstr="$(($nhidstart + $iter))"
+ run_cmd "$IP nexthop add id $nhidstr via $ipstr$iter dev veth1"
+ check_nexthop "id $nhidstr" "id $nhidstr via $ipstr$iter dev veth1 scope link"
+
+ if [ $iter -le $ecmp ]; then
+ grpstr+="$nhidstr/"
+ else
+ grpstr+="$nhidstr"
+ fi
+ ((iter++))
+ done
+
+ # create duplicate large ecmp groups
+ iter=0
+ while [ $iter -le $grpnum ]
+ do
+ grpidstr="$(($grpidstart + $iter))"
+ run_cmd "$IP nexthop add id $grpidstr group $grpstr"
+ check_nexthop "id $grpidstr" "id $grpidstr group $grpstr"
+ ((iter++))
+ done
+
+ # dump large groups
+ run_cmd "$IP nexthop list"
+ log_test $? 0 "Dump large (x$ecmp) ecmp groups"
+}
+
+start_ip_monitor()
+{
+ local mtype=$1
+
+ # start the monitor in the background
+ tmpfile=`mktemp /var/run/nexthoptestXXX`
+ mpid=`($IP monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
+ sleep 0.2
+ echo "$mpid $tmpfile"
+}
+
+stop_ip_monitor()
+{
+ local mpid=$1
+ local tmpfile=$2
+ local el=$3
+
+ # check the monitor results
+ kill $mpid
+ lines=`wc -l $tmpfile | cut "-d " -f1`
+ test $lines -eq $el
+ rc=$?
+ rm -rf $tmpfile
+
+ return $rc
+}
+
+check_nexthop_fdb_support()
+{
+ $IP nexthop help 2>&1 | grep -q fdb
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing fdb nexthop support"
+ return $ksft_skip
+ fi
+}
+
+ipv6_fdb_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv6 fdb groups functional"
+ echo "--------------------------"
+
+ check_nexthop_fdb_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ # create group with multiple nexthops
+ run_cmd "$IP nexthop add id 61 via 2001:db8:91::2 fdb"
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::3 fdb"
+ run_cmd "$IP nexthop add id 102 group 61/62 fdb"
+ check_nexthop "id 102" "id 102 group 61/62 fdb"
+ log_test $? 0 "Fdb Nexthop group with multiple nexthops"
+
+ ## get nexthop group
+ run_cmd "$IP nexthop get id 102"
+ check_nexthop "id 102" "id 102 group 61/62 fdb"
+ log_test $? 0 "Get Fdb nexthop group by id"
+
+ # fdb nexthop group can only contain fdb nexthops
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::4"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::5"
+ run_cmd "$IP nexthop add id 103 group 63/64 fdb"
+ log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
+
+ # Non fdb nexthop group can not contain fdb nexthops
+ run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 fdb"
+ run_cmd "$IP nexthop add id 66 via 2001:db8:91::6 fdb"
+ run_cmd "$IP nexthop add id 104 group 65/66"
+ log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
+
+ # fdb nexthop cannot have blackhole
+ run_cmd "$IP nexthop add id 67 blackhole fdb"
+ log_test $? 2 "Fdb Nexthop with blackhole"
+
+ # fdb nexthop with oif
+ run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 dev veth1 fdb"
+ log_test $? 2 "Fdb Nexthop with oif"
+
+ # fdb nexthop with onlink
+ run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 onlink fdb"
+ log_test $? 2 "Fdb Nexthop with onlink"
+
+ # fdb nexthop with encap
+ run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb"
+ log_test $? 2 "Fdb Nexthop with encap"
+
+ run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
+ run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
+ log_test $? 0 "Fdb mac add with nexthop group"
+
+ ## fdb nexthops can only reference nexthop groups and not nexthops
+ run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 61 self"
+ log_test $? 255 "Fdb mac add with nexthop"
+
+ run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 66"
+ log_test $? 2 "Route add with fdb nexthop"
+
+ run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103"
+ log_test $? 2 "Route add with fdb nexthop group"
+
+ run_cmd "$IP nexthop del id 61"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 0 "Fdb entry after deleting a single nexthop"
+
+ run_cmd "$IP nexthop del id 102"
+ log_test $? 0 "Fdb nexthop delete"
+
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 254 "Fdb entry after deleting a nexthop group"
+
+ $IP link del dev vx10
+}
+
+ipv4_fdb_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv4 fdb groups functional"
+ echo "--------------------------"
+
+ check_nexthop_fdb_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ # create group with multiple nexthops
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 fdb"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 fdb"
+ run_cmd "$IP nexthop add id 102 group 12/13 fdb"
+ check_nexthop "id 102" "id 102 group 12/13 fdb"
+ log_test $? 0 "Fdb Nexthop group with multiple nexthops"
+
+ # get nexthop group
+ run_cmd "$IP nexthop get id 102"
+ check_nexthop "id 102" "id 102 group 12/13 fdb"
+ log_test $? 0 "Get Fdb nexthop group by id"
+
+ # fdb nexthop group can only contain fdb nexthops
+ run_cmd "$IP nexthop add id 14 via 172.16.1.2"
+ run_cmd "$IP nexthop add id 15 via 172.16.1.3"
+ run_cmd "$IP nexthop add id 103 group 14/15 fdb"
+ log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
+
+ # Non fdb nexthop group can not contain fdb nexthops
+ run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb"
+ run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb"
+ run_cmd "$IP nexthop add id 104 group 14/15"
+ log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
+
+ # fdb nexthop cannot have blackhole
+ run_cmd "$IP nexthop add id 18 blackhole fdb"
+ log_test $? 2 "Fdb Nexthop with blackhole"
+
+ # fdb nexthop with oif
+ run_cmd "$IP nexthop add id 16 via 172.16.1.2 dev veth1 fdb"
+ log_test $? 2 "Fdb Nexthop with oif"
+
+ # fdb nexthop with onlink
+ run_cmd "$IP nexthop add id 16 via 172.16.1.2 onlink fdb"
+ log_test $? 2 "Fdb Nexthop with onlink"
+
+ # fdb nexthop with encap
+ run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb"
+ log_test $? 2 "Fdb Nexthop with encap"
+
+ run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
+ run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
+ log_test $? 0 "Fdb mac add with nexthop group"
+
+ # fdb nexthops can only reference nexthop groups and not nexthops
+ run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self"
+ log_test $? 255 "Fdb mac add with nexthop"
+
+ run_cmd "$IP ro add 172.16.0.0/22 nhid 15"
+ log_test $? 2 "Route add with fdb nexthop"
+
+ run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
+ log_test $? 2 "Route add with fdb nexthop group"
+
+ run_cmd "$IP nexthop del id 12"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 0 "Fdb entry after deleting a single nexthop"
+
+ run_cmd "$IP nexthop del id 102"
+ log_test $? 0 "Fdb nexthop delete"
+
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 254 "Fdb entry after deleting a nexthop group"
+
+ $IP link del dev vx10
+}
+
################################################################################
# basic operations (add, delete, replace) on nexthops and nexthop groups
#
@@ -423,8 +674,6 @@ ipv6_fcnal_runtime()
echo "IPv6 functional runtime"
echo "-----------------------"
- sleep 5
-
#
# IPv6 - the basics
#
@@ -481,12 +730,12 @@ ipv6_fcnal_runtime()
run_cmd "$IP -6 nexthop add id 85 dev veth1"
run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85"
log_test $? 0 "IPv6 route with device only nexthop"
- check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024 pref medium"
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024"
run_cmd "$IP nexthop add id 123 group 81/85"
run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123"
log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw"
- check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1 pref medium"
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1"
#
# IPv6 route with v4 nexthop - not allowed
@@ -504,6 +753,36 @@ ipv6_fcnal_runtime()
run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop"
+ run_cmd "$IP nexthop add id 86 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 87"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after removing v4 gateways"
+
+ run_cmd "$IP ro delete 2001:db8:101::1/128"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop replace id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 88 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 87 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after replacing v4 gateways"
+
$IP nexthop flush >/dev/null 2>&1
#
@@ -512,6 +791,19 @@ ipv6_fcnal_runtime()
run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+ # rpfilter and default route
+ $IP nexthop flush >/dev/null 2>&1
+ run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
+ run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 93 group 91/92"
+ run_cmd "$IP -6 ro add default nhid 91"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ log_test $? 0 "Nexthop with default route and rpfilter"
+ run_cmd "$IP -6 ro replace default nhid 93"
+ run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ log_test $? 0 "Nexthop with multipath default route and rpfilter"
+
# TO-DO:
# existing route with old nexthop; append route with new nexthop
# existing route with old nexthop; replace route with new
@@ -519,6 +811,75 @@ ipv6_fcnal_runtime()
# route with src address and using nexthop - not allowed
}
+ipv6_large_grp()
+{
+ local ecmp=32
+
+ echo
+ echo "IPv6 large groups (x$ecmp)"
+ echo "---------------------"
+
+ check_large_grp 6 $ecmp
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
+ipv6_del_add_loop1()
+{
+ while :; do
+ $IP nexthop del id 100
+ $IP nexthop add id 100 via 2001:db8:91::2 dev veth1
+ done >/dev/null 2>&1
+}
+
+ipv6_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101
+ done >/dev/null 2>&1
+}
+
+ipv6_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv6 runtime torture"
+ echo "--------------------"
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101"
+ run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+ run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+ ipv6_del_add_loop1 &
+ pid1=$!
+ ipv6_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+ # if we did not crash, success
+ log_test 0 0 "IPv6 torture test"
+}
+
+
ipv4_fcnal()
{
local rc
@@ -866,6 +1227,11 @@ ipv4_fcnal_runtime()
$IP neigh sh | grep 'dev veth1'
fi
+ run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+ run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "IPv4 default route with IPv6 gateway"
+
#
# MPLS as an example of LWT encap
#
@@ -880,6 +1246,241 @@ ipv4_fcnal_runtime()
log_test $? 0 "IPv4 route with MPLS encap, v6 gw - check"
}
+ipv4_large_grp()
+{
+ local ecmp=32
+
+ echo
+ echo "IPv4 large groups (x$ecmp)"
+ echo "---------------------"
+
+ check_large_grp 4 $ecmp
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
+sysctl_nexthop_compat_mode_check()
+{
+ local sysctlname="net.ipv4.nexthop_compat_mode"
+ local lprefix=$1
+
+ IPE="ip netns exec me"
+
+ $IPE sysctl -q $sysctlname 2>&1 >/dev/null
+ if [ $? -ne 0 ]; then
+ echo "SKIP: kernel lacks nexthop compat mode sysctl control"
+ return $ksft_skip
+ fi
+
+ out=$($IPE sysctl $sysctlname 2>/dev/null)
+ log_test $? 0 "$lprefix default nexthop compat mode check"
+ check_output "${out}" "$sysctlname = 1"
+}
+
+sysctl_nexthop_compat_mode_set()
+{
+ local sysctlname="net.ipv4.nexthop_compat_mode"
+ local mode=$1
+ local lprefix=$2
+
+ IPE="ip netns exec me"
+
+ out=$($IPE sysctl -w $sysctlname=$mode)
+ log_test $? 0 "$lprefix set compat mode - $mode"
+ check_output "${out}" "net.ipv4.nexthop_compat_mode = $mode"
+}
+
+ipv6_compat_mode()
+{
+ local rc
+
+ echo
+ echo "IPv6 nexthop api compat mode test"
+ echo "--------------------------------"
+
+ sysctl_nexthop_compat_mode_check "IPv6"
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 122 group 62/63"
+ ipmout=$(start_ip_monitor route)
+
+ run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122"
+ # route add notification should contain expanded nexthops
+ stop_ip_monitor $ipmout 3
+ log_test $? 0 "IPv6 compat mode on - route add notification"
+
+ # route dump should contain expanded nexthops
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop via 2001:db8:91::3 dev veth1 weight 1"
+ log_test $? 0 "IPv6 compat mode on - route dump"
+
+ # change in nexthop group should generate route notification
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop replace id 122 group 62/64"
+ stop_ip_monitor $ipmout 3
+
+ log_test $? 0 "IPv6 compat mode on - nexthop change"
+
+ # set compat mode off
+ sysctl_nexthop_compat_mode_set 0 "IPv6"
+
+ run_cmd "$IP -6 ro del 2001:db8:101::1/128 nhid 122"
+
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 122 group 62/63"
+ ipmout=$(start_ip_monitor route)
+
+ run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122"
+ # route add notification should not contain expanded nexthops
+ stop_ip_monitor $ipmout 1
+ log_test $? 0 "IPv6 compat mode off - route add notification"
+
+ # route dump should not contain expanded nexthops
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024"
+ log_test $? 0 "IPv6 compat mode off - route dump"
+
+ # change in nexthop group should not generate route notification
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop replace id 122 group 62/64"
+ stop_ip_monitor $ipmout 0
+ log_test $? 0 "IPv6 compat mode off - nexthop change"
+
+ # nexthop delete should not generate route notification
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop del id 122"
+ stop_ip_monitor $ipmout 0
+ log_test $? 0 "IPv6 compat mode off - nexthop delete"
+
+ # set compat mode back on
+ sysctl_nexthop_compat_mode_set 1 "IPv6"
+}
+
+ipv4_compat_mode()
+{
+ local rc
+
+ echo
+ echo "IPv4 nexthop api compat mode"
+ echo "----------------------------"
+
+ sysctl_nexthop_compat_mode_check "IPv4"
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ run_cmd "$IP nexthop add id 21 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 22 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 122 group 21/22"
+ ipmout=$(start_ip_monitor route)
+
+ run_cmd "$IP ro add 172.16.101.1/32 nhid 122"
+ stop_ip_monitor $ipmout 3
+
+ # route add notification should contain expanded nexthops
+ log_test $? 0 "IPv4 compat mode on - route add notification"
+
+ # route dump should contain expanded nexthops
+ check_route "172.16.101.1" "172.16.101.1 nhid 122 nexthop via 172.16.1.2 dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+ log_test $? 0 "IPv4 compat mode on - route dump"
+
+ # change in nexthop group should generate route notification
+ run_cmd "$IP nexthop add id 23 via 172.16.1.3 dev veth1"
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop replace id 122 group 21/23"
+ stop_ip_monitor $ipmout 3
+ log_test $? 0 "IPv4 compat mode on - nexthop change"
+
+ sysctl_nexthop_compat_mode_set 0 "IPv4"
+
+ # cleanup
+ run_cmd "$IP ro del 172.16.101.1/32 nhid 122"
+
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP ro add 172.16.101.1/32 nhid 122"
+ stop_ip_monitor $ipmout 1
+ # route add notification should not contain expanded nexthops
+ log_test $? 0 "IPv4 compat mode off - route add notification"
+
+ # route dump should not contain expanded nexthops
+ check_route "172.16.101.1" "172.16.101.1 nhid 122"
+ log_test $? 0 "IPv4 compat mode off - route dump"
+
+ # change in nexthop group should not generate route notification
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop replace id 122 group 21/22"
+ stop_ip_monitor $ipmout 0
+ log_test $? 0 "IPv4 compat mode off - nexthop change"
+
+ # nexthop delete should not generate route notification
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop del id 122"
+ stop_ip_monitor $ipmout 0
+ log_test $? 0 "IPv4 compat mode off - nexthop delete"
+
+ sysctl_nexthop_compat_mode_set 1 "IPv4"
+}
+
+ipv4_del_add_loop1()
+{
+ while :; do
+ $IP nexthop del id 100
+ $IP nexthop add id 100 via 172.16.1.2 dev veth1
+ done >/dev/null 2>&1
+}
+
+ipv4_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101
+ done >/dev/null 2>&1
+}
+
+ipv4_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv4 runtime torture"
+ echo "--------------------"
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101"
+ run_cmd "$IP route add 172.16.101.1 nhid 102"
+ run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+ ipv4_del_add_loop1 &
+ pid1=$!
+ ipv4_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+ # if we did not crash, success
+ log_test 0 0 "IPv4 torture test"
+}
+
basic()
{
echo
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 155d48bd4d9e..9c12c4fd3afc 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -5,7 +5,7 @@
# Defines
if [[ ! -v DEVLINK_DEV ]]; then
- DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
+ DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \
| jq -r '.port | keys[]' | cut -d/ -f-2)
if [ -z "$DEVLINK_DEV" ]; then
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
@@ -98,6 +98,11 @@ devlink_resource_size_set()
check_err $? "Failed setting path $path to size $size"
}
+devlink_resource_occ_get()
+{
+ devlink_resource_get "$@" | jq '.["occ"]'
+}
+
devlink_reload()
{
local still_pending
@@ -112,6 +117,12 @@ devlink_reload()
declare -A DEVLINK_ORIG
+# Changing pool type from static to dynamic causes reinterpretation of threshold
+# values. They therefore need to be saved before pool type is changed, then the
+# pool type can be changed, and then the new values need to be set up. Therefore
+# instead of saving the current state implicitly in the _set call, provide
+# functions for all three primitives: save, set, and restore.
+
devlink_port_pool_threshold()
{
local port=$1; shift
@@ -121,14 +132,21 @@ devlink_port_pool_threshold()
| jq '.port_pool."'"$port"'"[].threshold'
}
-devlink_port_pool_th_set()
+devlink_port_pool_th_save()
{
local port=$1; shift
local pool=$1; shift
- local th=$1; shift
local key="port_pool($port,$pool).threshold"
DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool)
+}
+
+devlink_port_pool_th_set()
+{
+ local port=$1; shift
+ local pool=$1; shift
+ local th=$1; shift
+
devlink sb port pool set $port pool $pool th $th
}
@@ -137,8 +155,13 @@ devlink_port_pool_th_restore()
local port=$1; shift
local pool=$1; shift
local key="port_pool($port,$pool).threshold"
+ local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]}
+ if [[ -z $orig ]]; then
+ echo "WARNING: Mismatched devlink_port_pool_th_restore"
+ else
+ devlink sb port pool set $port pool $pool th $orig
+ fi
}
devlink_pool_size_thtype()
@@ -149,14 +172,20 @@ devlink_pool_size_thtype()
| jq -r '.pool[][] | (.size, .thtype)'
}
+devlink_pool_size_thtype_save()
+{
+ local pool=$1; shift
+ local key="pool($pool).size_thtype"
+
+ DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
+}
+
devlink_pool_size_thtype_set()
{
local pool=$1; shift
local thtype=$1; shift
local size=$1; shift
- local key="pool($pool).size_thtype"
- DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype
}
@@ -166,8 +195,12 @@ devlink_pool_size_thtype_restore()
local key="pool($pool).size_thtype"
local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb pool set "$DEVLINK_DEV" pool $pool \
- size ${orig[0]} thtype ${orig[1]}
+ if [[ -z ${orig[0]} ]]; then
+ echo "WARNING: Mismatched devlink_pool_size_thtype_restore"
+ else
+ devlink sb pool set "$DEVLINK_DEV" pool $pool \
+ size ${orig[0]} thtype ${orig[1]}
+ fi
}
devlink_tc_bind_pool_th()
@@ -180,6 +213,16 @@ devlink_tc_bind_pool_th()
| jq -r '.tc_bind[][] | (.pool, .threshold)'
}
+devlink_tc_bind_pool_th_save()
+{
+ local port=$1; shift
+ local tc=$1; shift
+ local dir=$1; shift
+ local key="tc_bind($port,$dir,$tc).pool_th"
+
+ DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
+}
+
devlink_tc_bind_pool_th_set()
{
local port=$1; shift
@@ -187,9 +230,7 @@ devlink_tc_bind_pool_th_set()
local dir=$1; shift
local pool=$1; shift
local th=$1; shift
- local key="tc_bind($port,$dir,$tc).pool_th"
- DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
devlink sb tc bind set $port tc $tc type $dir pool $pool th $th
}
@@ -201,8 +242,12 @@ devlink_tc_bind_pool_th_restore()
local key="tc_bind($port,$dir,$tc).pool_th"
local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb tc bind set $port tc $tc type $dir \
- pool ${orig[0]} th ${orig[1]}
+ if [[ -z ${orig[0]} ]]; then
+ echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore"
+ else
+ devlink sb tc bind set $port tc $tc type $dir \
+ pool ${orig[0]} th ${orig[1]}
+ fi
}
devlink_traps_num_get()
@@ -365,7 +410,9 @@ devlink_trap_group_stats_idle_test()
devlink_trap_exception_test()
{
local trap_name=$1; shift
- local group_name=$1; shift
+ local group_name
+
+ group_name=$(devlink_trap_group_get $trap_name)
devlink_trap_stats_idle_test $trap_name
check_fail $? "Trap stats idle when packets should have been trapped"
@@ -377,9 +424,11 @@ devlink_trap_exception_test()
devlink_trap_drop_test()
{
local trap_name=$1; shift
- local group_name=$1; shift
local dev=$1; shift
local handle=$1; shift
+ local group_name
+
+ group_name=$(devlink_trap_group_get $trap_name)
# This is the common part of all the tests. It checks that stats are
# initially idle, then non-idle after changing the trap action and
@@ -390,7 +439,6 @@ devlink_trap_drop_test()
devlink_trap_group_stats_idle_test $group_name
check_err $? "Trap group stats not idle with initial drop action"
-
devlink_trap_action_set $trap_name "trap"
devlink_trap_stats_idle_test $trap_name
check_fail $? "Trap stats idle after setting action to trap"
@@ -420,6 +468,29 @@ devlink_trap_drop_cleanup()
tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
}
+devlink_trap_stats_test()
+{
+ local test_name=$1; shift
+ local trap_name=$1; shift
+ local send_one="$@"
+ local t0_packets
+ local t1_packets
+
+ RET=0
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ $send_one && sleep 1
+
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ if [[ $t1_packets -eq $t0_packets ]]; then
+ check_err 1 "Trap stats did not increase"
+ fi
+
+ log_test "$test_name"
+}
+
devlink_trap_policers_num_get()
{
devlink -j -p trap policer show | jq '.[]["'$DEVLINK_DEV'"] | length'
@@ -478,3 +549,9 @@ devlink_cpu_port_get()
echo "$DEVLINK_DEV/$cpu_dl_port_num"
}
+
+devlink_cell_size_get()
+{
+ devlink sb pool show "$DEVLINK_DEV" pool 0 -j \
+ | jq '.pool[][].cell_size'
+}
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
index eb8e2a23bbb4..dbb9fcf759e0 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -50,23 +50,6 @@ cleanup()
h1_destroy
}
-different_speeds_get()
-{
- local dev1=$1; shift
- local dev2=$1; shift
- local with_mode=$1; shift
- local adver=$1; shift
-
- local -a speeds_arr
-
- speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
- if [[ ${#speeds_arr[@]} < 2 ]]; then
- check_err 1 "cannot check different speeds. There are not enough speeds"
- fi
-
- echo ${speeds_arr[0]} ${speeds_arr[1]}
-}
-
same_speeds_autoneg_off()
{
# Check that when each of the reported speeds is forced, the links come
@@ -252,8 +235,6 @@ check_highest_speed_is_chosen()
fi
local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
- # Remove the first speed, h1 does not advertise this speed.
- unset speeds_arr[0]
max_speed=${speeds_arr[0]}
for current in ${speeds_arr[@]}; do
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
new file mode 100755
index 000000000000..4b42dfd4efd1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ autoneg
+ autoneg_force_mode
+ no_cable
+"
+
+NUM_NETIFS=2
+source lib.sh
+source ethtool_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+ swp3=$NETIF_NO_CABLE
+}
+
+ethtool_extended_state_check()
+{
+ local dev=$1; shift
+ local expected_ext_state=$1; shift
+ local expected_ext_substate=${1:-""}; shift
+
+ local ext_state=$(ethtool $dev | grep "Link detected" \
+ | cut -d "(" -f2 | cut -d ")" -f1)
+ local ext_substate=$(echo $ext_state | cut -sd "," -f2 \
+ | sed -e 's/^[[:space:]]*//')
+ ext_state=$(echo $ext_state | cut -d "," -f1)
+
+ [[ $ext_state == $expected_ext_state ]]
+ check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\""
+
+ [[ $ext_substate == $expected_ext_substate ]]
+ check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+}
+
+autoneg()
+{
+ RET=0
+
+ ip link set dev $swp1 up
+
+ sleep 4
+ ethtool_extended_state_check $swp1 "Autoneg" "No partner detected"
+
+ log_test "Autoneg, No partner detected"
+
+ ip link set dev $swp1 down
+}
+
+autoneg_force_mode()
+{
+ RET=0
+
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0))
+ local speed1=${speeds_arr[0]}
+ local speed2=${speeds_arr[1]}
+
+ ethtool_set $swp1 speed $speed1 autoneg off
+ ethtool_set $swp2 speed $speed2 autoneg off
+
+ sleep 4
+ ethtool_extended_state_check $swp1 "Autoneg" \
+ "No partner detected during force mode"
+
+ ethtool_extended_state_check $swp2 "Autoneg" \
+ "No partner detected during force mode"
+
+ log_test "Autoneg, No partner detected during force mode"
+
+ ethtool -s $swp2 autoneg on
+ ethtool -s $swp1 autoneg on
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+}
+
+no_cable()
+{
+ RET=0
+
+ ip link set dev $swp3 up
+
+ sleep 1
+ ethtool_extended_state_check $swp3 "No cable"
+
+ log_test "No cable"
+
+ ip link set dev $swp3 down
+}
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
index 925d229a59d8..9188e624dec0 100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
@@ -67,3 +67,20 @@ common_speeds_get()
<(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \
<(printf '%s\n' "${dev2_speeds[@]}" | sort -u)
}
+
+different_speeds_get()
+{
+ local dev1=$1; shift
+ local dev2=$1; shift
+ local with_mode=$1; shift
+ local adver=$1; shift
+
+ local -a speeds_arr
+
+ speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
+ if [[ ${#speeds_arr[@]} < 2 ]]; then
+ check_err 1 "cannot check different speeds. There are not enough speeds"
+ fi
+
+ echo ${speeds_arr[0]} ${speeds_arr[1]}
+}
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index e2adb533c8fc..b802c14d2950 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -14,6 +14,9 @@ NETIFS[p6]=veth5
NETIFS[p7]=veth6
NETIFS[p8]=veth7
+# Port that does not have a cable connected.
+NETIF_NO_CABLE=eth8
+
##############################################################################
# Defines
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 977fc2b326a2..927f9ba49e08 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1227,3 +1227,46 @@ stop_traffic()
# Suppress noise from killing mausezahn.
{ kill %% && wait %%; } 2>/dev/null
}
+
+tcpdump_start()
+{
+ local if_name=$1; shift
+ local ns=$1; shift
+
+ capfile=$(mktemp)
+ capout=$(mktemp)
+
+ if [ -z $ns ]; then
+ ns_cmd=""
+ else
+ ns_cmd="ip netns exec ${ns}"
+ fi
+
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ $ns_cmd tcpdump -e -n -Q in -i $if_name \
+ -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+ cappid=$!
+
+ sleep 1
+}
+
+tcpdump_stop()
+{
+ $ns_cmd kill $cappid
+ sleep 1
+}
+
+tcpdump_cleanup()
+{
+ rm $capfile $capout
+}
+
+tcpdump_show()
+{
+ tcpdump -e -n -r $capfile 2>&1
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index 00797597fcf5..13db1cb50e57 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -29,11 +29,9 @@ mirror_test()
local pref=$1; shift
local expect=$1; shift
- local ping_timeout=$((PING_TIMEOUT * 5))
local t0=$(tc_rule_stats_get $dev $pref)
- ip vrf exec $vrf_name \
- ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.5 -w $ping_timeout \
- &> /dev/null
+ $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
+ -c 10 -d 100msec -t icmp type=8
sleep 0.5
local t1=$(tc_rule_stats_get $dev $pref)
local delta=$((t1 - t0))
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index b50081855913..55eeacf59241 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -20,10 +20,14 @@
ALL_TESTS="
ping_ipv4
+ ping_ipv6
test_ip_dsfield
test_ip_dscp
test_ip_ecn
test_ip_dscp_ecn
+ test_ip6_dsfield
+ test_ip6_dscp
+ test_ip6_ecn
"
NUM_NETIFS=4
@@ -107,6 +111,11 @@ ping_ipv4()
ping_test $h1 192.0.2.2
}
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
do_test_pedit_dsfield_common()
{
local pedit_locus=$1; shift
@@ -123,7 +132,12 @@ do_test_pedit_dsfield_common()
local pkts
pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
tc_rule_handle_stats_get "dev $h2 ingress" 101)
- check_err $? "Expected to get 10 packets, but got $pkts."
+ check_err $? "Expected to get 10 packets on test probe, but got $pkts."
+
+ pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+ ((pkts >= 10))
+ check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
log_test "$pedit_locus pedit $pedit_action"
}
@@ -228,6 +242,63 @@ test_ip_dscp_ecn()
do_test_ip_dscp_ecn "dev $swp2 egress"
}
+do_test_ip6_dsfield()
+{
+ local locus=$1; shift
+ local dsfield
+
+ for dsfield in 0 1 2 3 128 252 253 254 255; do
+ do_test_pedit_dsfield "$locus" \
+ "ip6 traffic_class set $dsfield" \
+ ipv6 "ip_tos $dsfield" \
+ "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+ done
+}
+
+test_ip6_dsfield()
+{
+ do_test_ip6_dsfield "dev $swp1 ingress"
+ do_test_ip6_dsfield "dev $swp2 egress"
+}
+
+do_test_ip6_dscp()
+{
+ local locus=$1; shift
+ local dscp
+
+ for dscp in 0 1 2 3 32 61 62 63; do
+ do_test_pedit_dsfield "$locus" \
+ "ip6 traffic_class set $((dscp << 2)) retain 0xfc" \
+ ipv6 "ip_tos $(((dscp << 2) | 1))" \
+ "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+ done
+}
+
+test_ip6_dscp()
+{
+ do_test_ip6_dscp "dev $swp1 ingress"
+ do_test_ip6_dscp "dev $swp2 egress"
+}
+
+do_test_ip6_ecn()
+{
+ local locus=$1; shift
+ local ecn
+
+ for ecn in 0 1 2 3; do
+ do_test_pedit_dsfield "$locus" \
+ "ip6 traffic_class set $ecn retain 0x3" \
+ ipv6 "ip_tos $((124 | $ecn))" \
+ "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+ done
+}
+
+test_ip6_ecn()
+{
+ do_test_ip6_ecn "dev $swp1 ingress"
+ do_test_ip6_ecn "dev $swp2 egress"
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
new file mode 100755
index 000000000000..5f20d289ee43
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on egress of $swp2, the
+# traffic is acted upon by a pedit action. An ingress filter installed on $h2 verifies that the
+# packet looks like expected.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_udp_sport
+ test_udp_dport
+ test_tcp_sport
+ test_tcp_dport
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_l4port_one()
+{
+ local pedit_locus=$1; shift
+ local pedit_prot=$1; shift
+ local pedit_action=$1; shift
+ local match_prot=$1; shift
+ local match_flower=$1; shift
+ local mz_flags=$1; shift
+ local saddr=$1; shift
+ local daddr=$1; shift
+
+ tc filter add $pedit_locus handle 101 pref 1 \
+ flower action pedit ex munge $pedit_action
+ tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+ flower skip_hw $match_flower action pass
+
+ RET=0
+
+ $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \
+ -a own -b $h2mac -q -t $pedit_prot sp=54321,dp=12345
+
+ local pkts
+ pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+ tc_rule_handle_stats_get "dev $h2 ingress" 101)
+ check_err $? "Expected to get 10 packets, but got $pkts."
+
+ pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+ ((pkts >= 10))
+ check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+ log_test "$pedit_locus pedit $pedit_action"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_l4port()
+{
+ local locus=$1; shift
+ local prot=$1; shift
+ local pedit_port=$1; shift
+ local flower_port=$1; shift
+ local port
+
+ for port in 1 11111 65535; do
+ do_test_pedit_l4port_one "$locus" "$prot" \
+ "$prot $pedit_port set $port" \
+ ip "ip_proto $prot $flower_port $port" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_udp_sport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" udp sport src_port
+ do_test_pedit_l4port "dev $swp2 egress" udp sport src_port
+}
+
+test_udp_dport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" udp dport dst_port
+ do_test_pedit_l4port "dev $swp2 egress" udp dport dst_port
+}
+
+test_tcp_sport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" tcp sport src_port
+ do_test_pedit_l4port "dev $swp2 egress" tcp sport src_port
+}
+
+test_tcp_dport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" tcp dport dst_port
+ do_test_pedit_l4port "dev $swp2 egress" tcp dport dst_port
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
new file mode 100755
index 000000000000..e714bae473fb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -0,0 +1,492 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends one stream of traffic from H1 through a TBF shaper, to a RED
+# within TBF shaper on $swp3. The two shapers have the same configuration, and
+# thus the resulting stream should fill all available bandwidth on the latter
+# shaper. A second stream is sent from H2 also via $swp3, and used to inject
+# additional traffic. Since all available bandwidth is taken, this traffic has
+# to go to backlog.
+#
+# +--------------------------+ +--------------------------+
+# | H1 | | H2 |
+# | + $h1 | | + $h2 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | TBF 10Mbps | | | |
+# +-----|--------------------+ +-----|--------------------+
+# | |
+# +-----|------------------------------------------------|--------------------+
+# | SW | | |
+# | +--|------------------------------------------------|----------------+ |
+# | | + $swp1 + $swp2 | |
+# | | BR | |
+# | | | |
+# | | + $swp3 | |
+# | | | TBF 10Mbps / RED | |
+# | +--------------------------------|-----------------------------------+ |
+# | | |
+# +-----------------------------------|---------------------------------------+
+# |
+# +-----|--------------------+
+# | H3 | |
+# | + $h1 |
+# | 192.0.2.3/28 |
+# | |
+# +--------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ecn_test
+ ecn_nodrop_test
+ red_test
+ red_qevent_test
+ ecn_qevent_test
+"
+
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+BACKLOG=30000
+PKTSZ=1400
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+ mtu_set $h1 10000
+ tc qdisc replace dev $h1 root handle 1: tbf \
+ rate 10Mbit burst 10K limit 1M
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 root
+ mtu_restore $h1
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+ mtu_set $h2 10000
+}
+
+h2_destroy()
+{
+ mtu_restore $h2
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.3/28
+ mtu_set $h3 10000
+}
+
+h3_destroy()
+{
+ mtu_restore $h3
+ simple_if_fini $h3 192.0.2.3/28
+}
+
+switch_create()
+{
+ ip link add dev br up type bridge
+ ip link set dev $swp1 up master br
+ ip link set dev $swp2 up master br
+ ip link set dev $swp3 up master br
+
+ mtu_set $swp1 10000
+ mtu_set $swp2 10000
+ mtu_set $swp3 10000
+
+ tc qdisc replace dev $swp3 root handle 1: tbf \
+ rate 10Mbit burst 10K limit 1M
+ ip link add name _drop_test up type dummy
+}
+
+switch_destroy()
+{
+ ip link del dev _drop_test
+ tc qdisc del dev $swp3 root
+
+ mtu_restore $h3
+ mtu_restore $h2
+ mtu_restore $h1
+
+ ip link set dev $swp3 down nomaster
+ ip link set dev $swp2 down nomaster
+ ip link set dev $swp1 down nomaster
+ ip link del dev br
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ h2=${NETIFS[p3]}
+ swp2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ h3_mac=$(mac_get $h3)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.3 " from host 1"
+ ping_test $h2 192.0.2.3 " from host 2"
+}
+
+get_qdisc_backlog()
+{
+ qdisc_stats_get $swp3 11: .backlog
+}
+
+get_nmarked()
+{
+ qdisc_stats_get $swp3 11: .marked
+}
+
+get_qdisc_npackets()
+{
+ qdisc_stats_get $swp3 11: .packets
+}
+
+get_nmirrored()
+{
+ link_stats_get _drop_test tx packets
+}
+
+send_packets()
+{
+ local proto=$1; shift
+ local pkts=$1; shift
+
+ $MZ $h2 -p $PKTSZ -a own -b $h3_mac -A 192.0.2.2 -B 192.0.2.3 -t $proto -q -c $pkts "$@"
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+ local size=$1; shift
+ local proto=$1; shift
+
+ local i=0
+
+ while :; do
+ local cur=$(get_qdisc_backlog)
+ local diff=$((size - cur))
+ local pkts=$(((diff + PKTSZ - 1) / PKTSZ))
+
+ if ((cur >= size)); then
+ echo $cur
+ return 0
+ elif ((i++ > 10)); then
+ echo $cur
+ return 1
+ fi
+
+ send_packets $proto $pkts "$@"
+ sleep 1
+ done
+}
+
+check_marking()
+{
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets)
+ local nmarked_0=$(get_nmarked)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets)
+ local nmarked_1=$(get_nmarked)
+
+ local nmarked_d=$((nmarked_1 - nmarked_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmarked_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+check_mirroring()
+{
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets)
+ local nmirrored_0=$(get_nmirrored)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets)
+ local nmirrored_1=$(get_nmirrored)
+
+ local nmirrored_d=$((nmirrored_1 - nmirrored_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmirrored_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+ecn_test_common()
+{
+ local name=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Build the below-the-limit backlog using UDP. We could use TCP just
+ # fine, but this way we get a proof that UDP is accepted when queue
+ # length is below the limit. The main stream is using TCP, and if the
+ # limit is misconfigured, we would see this traffic being ECN marked.
+ RET=0
+ backlog=$(build_backlog $((2 * limit / 3)) udp)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "$name backlog < limit"
+
+ # Now push TCP, because non-TCP traffic would be early-dropped after the
+ # backlog crosses the limit, and we want to make sure that the backlog
+ # is above the limit.
+ RET=0
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+ log_test "$name backlog > limit"
+}
+
+do_ecn_test()
+{
+ local limit=$1; shift
+ local name=ECN
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ ecn_test_common "$name" $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, it should all get dropped, and backlog
+ # building should fail.
+ RET=0
+ build_backlog $((2 * limit)) udp >/dev/null
+ check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+ log_test "$name backlog > limit: UDP early-dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_nodrop_test()
+{
+ local limit=$1; shift
+ local name="ECN nodrop"
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ ecn_test_common "$name" $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, in nodrop mode, make sure it goes to
+ # backlog as well.
+ RET=0
+ build_backlog $((2 * limit)) udp >/dev/null
+ check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+ log_test "$name backlog > limit: UDP not dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_test()
+{
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Use ECN-capable TCP to verify there's no marking even though the queue
+ # is above limit.
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+
+ # Pushing below the queue limit should work.
+ RET=0
+ backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "RED backlog < limit"
+
+ # Pushing above should not.
+ RET=0
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_fail $? "Traffic went into backlog instead of being early-dropped"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "RED backlog > limit"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_qevent_test()
+{
+ local limit=$1; shift
+ local backlog
+ local base
+ local now
+ local pct
+
+ RET=0
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t udp -q &
+ sleep 1
+
+ tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+ action mirred egress mirror dev _drop_test
+
+ # Push to the queue until it's at the limit. The configured limit is
+ # rounded by the qdisc, so this is the best we can do to get to the real
+ # limit.
+ build_backlog $((3 * limit / 2)) udp >/dev/null
+
+ base=$(get_nmirrored)
+ send_packets udp 100
+ sleep 1
+ now=$(get_nmirrored)
+ ((now >= base + 100))
+ check_err $? "Dropped packets not observed: 100 expected, $((now - base)) seen"
+
+ tc filter del block 10 pref 1234 handle 102 matchall
+
+ base=$(get_nmirrored)
+ send_packets udp 100
+ sleep 1
+ now=$(get_nmirrored)
+ ((now == base))
+ check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen"
+
+ log_test "RED early_dropped packets mirrored"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_qevent_test()
+{
+ local limit=$1; shift
+ local name=ECN
+
+ RET=0
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+ action mirred egress mirror dev _drop_test
+
+ backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_mirroring "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected == 0."
+
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_mirroring ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected >= 95."
+
+ tc filter del block 10 pref 1234 handle 102 matchall
+
+ log_test "ECN marked packets mirrored"
+
+ stop_traffic
+ sleep 1
+}
+
+install_qdisc()
+{
+ local -a args=("$@")
+
+ tc qdisc replace dev $swp3 parent 1:1 handle 11: red \
+ limit 1M avpkt $PKTSZ probability 1 \
+ min $BACKLOG max $((BACKLOG + 1)) burst 38 "${args[@]}"
+ sleep 1
+}
+
+uninstall_qdisc()
+{
+ tc qdisc del dev $swp3 parent 1:1
+}
+
+ecn_test()
+{
+ install_qdisc ecn
+ do_ecn_test $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+ install_qdisc ecn nodrop
+ do_ecn_nodrop_test $BACKLOG
+ uninstall_qdisc
+}
+
+red_test()
+{
+ install_qdisc
+ do_red_test $BACKLOG
+ uninstall_qdisc
+}
+
+red_qevent_test()
+{
+ install_qdisc qevent early_drop block 10
+ do_red_qevent_test $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_qevent_test()
+{
+ install_qdisc ecn qevent mark block 10
+ do_ecn_qevent_test $BACKLOG
+ uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 813d02d1939d..d9eca227136b 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -2,7 +2,8 @@
# SPDX-License-Identifier: GPL-2.0
ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
- mirred_egress_mirror_test gact_trap_test"
+ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
+ gact_trap_test"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
@@ -50,6 +51,9 @@ switch_destroy()
mirred_egress_test()
{
local action=$1
+ local protocol=$2
+ local classifier=$3
+ local classifier_args=$4
RET=0
@@ -62,9 +66,9 @@ mirred_egress_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched without redirect rule inserted"
- tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
- $tcflags dst_ip 192.0.2.2 action mirred egress $action \
- dev $swp2
+ tc filter add dev $swp1 ingress protocol $protocol pref 1 handle 101 \
+ $classifier $tcflags $classifier_args \
+ action mirred egress $action dev $swp2
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip -q
@@ -72,10 +76,11 @@ mirred_egress_test()
tc_check_packets "dev $h2 ingress" 101 1
check_err $? "Did not match incoming $action packet"
- tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $swp1 ingress protocol $protocol pref 1 handle 101 \
+ $classifier
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
- log_test "mirred egress $action ($tcflags)"
+ log_test "mirred egress $classifier $action ($tcflags)"
}
gact_drop_and_ok_test()
@@ -187,12 +192,17 @@ cleanup()
mirred_egress_redirect_test()
{
- mirred_egress_test "redirect"
+ mirred_egress_test "redirect" "ip" "flower" "dst_ip 192.0.2.2"
}
mirred_egress_mirror_test()
{
- mirred_egress_test "mirror"
+ mirred_egress_test "mirror" "ip" "flower" "dst_ip 192.0.2.2"
+}
+
+matchall_mirred_egress_mirror_test()
+{
+ mirred_egress_test "mirror" "all" "matchall" ""
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
new file mode 100755
index 000000000000..160f9cccdfb7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -0,0 +1,333 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test tc-police action.
+#
+# +---------------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/24 |
+# | | |
+# | | default via 192.0.2.2 |
+# +----|----------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | + $rp1 |
+# | 192.0.2.2/24 |
+# | |
+# | 198.51.100.2/24 203.0.113.2/24 |
+# | + $rp2 + $rp3 |
+# | | | |
+# +----|-----------------------------------------|----------------------------+
+# | |
+# +----|----------------------------+ +----|----------------------------+
+# | | default via 198.51.100.2 | | | default via 203.0.113.2 |
+# | | | | | |
+# | | 198.51.100.1/24 | | | 203.0.113.1/24 |
+# | + $h2 | | + $h3 |
+# | H2 (vrf) | | H3 (vrf) |
+# +---------------------------------+ +---------------------------------+
+
+ALL_TESTS="
+ police_rx_test
+ police_tx_test
+ police_shared_test
+ police_rx_mirror_test
+ police_tx_mirror_test
+"
+NUM_NETIFS=6
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/24
+}
+
+h3_create()
+{
+ simple_if_init $h3 203.0.113.1/24
+
+ ip -4 route add default vrf v$h3 nexthop via 203.0.113.2
+
+ tc qdisc add dev $h3 clsact
+}
+
+h3_destroy()
+{
+ tc qdisc del dev $h3 clsact
+
+ ip -4 route del default vrf v$h3 nexthop via 203.0.113.2
+
+ simple_if_fini $h3 203.0.113.1/24
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+ ip link set dev $rp3 up
+
+ __addr_add_del $rp1 add 192.0.2.2/24
+ __addr_add_del $rp2 add 198.51.100.2/24
+ __addr_add_del $rp3 add 203.0.113.2/24
+
+ tc qdisc add dev $rp1 clsact
+ tc qdisc add dev $rp2 clsact
+}
+
+router_destroy()
+{
+ tc qdisc del dev $rp2 clsact
+ tc qdisc del dev $rp1 clsact
+
+ __addr_add_del $rp3 del 203.0.113.2/24
+ __addr_add_del $rp2 del 198.51.100.2/24
+ __addr_add_del $rp1 del 192.0.2.2/24
+
+ ip link set dev $rp3 down
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+police_common_test()
+{
+ local test_name=$1; shift
+
+ RET=0
+
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+ local er=$((80 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_rx_test()
+{
+ # Rule to police traffic destined to $h2 on ingress of $rp1
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 80mbit burst 16k conform-exceed drop/ok
+
+ police_common_test "police on rx"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_tx_test()
+{
+ # Rule to police traffic destined to $h2 on egress of $rp2
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 80mbit burst 16k conform-exceed drop/ok
+
+ police_common_test "police on tx"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
+police_shared_common_test()
+{
+ local dport=$1; shift
+ local test_name=$1; shift
+
+ RET=0
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=$dport -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+ local er=$((80 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+}
+
+police_shared_test()
+{
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp src_port 12345 \
+ action drop
+
+ # Rule to police traffic destined to $h2 on ingress of $rp1
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 80mbit burst 16k conform-exceed drop/ok \
+ index 10
+
+ # Rule to police a different flow destined to $h2 on egress of $rp2
+ # using same policer
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 22222 \
+ action police index 10
+
+ police_shared_common_test 54321 "police with shared policer - rx"
+
+ police_shared_common_test 22222 "police with shared policer - tx"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_mirror_common_test()
+{
+ local pol_if=$1; shift
+ local dir=$1; shift
+ local test_name=$1; shift
+
+ RET=0
+
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ # Rule to measure bandwidth of mirrored traffic on ingress of $h3
+ tc filter add dev $h3 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ # Rule to police traffic destined to $h2 and mirror to $h3
+ tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 80mbit burst 16k conform-exceed drop/pipe \
+ action mirred egress mirror dev $rp3
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+ local er=$((80 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ local t0=$(tc_rule_stats_get $h3 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h3 1 ingress .bytes)
+
+ local er=$((80 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+ tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower
+ tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_rx_mirror_test()
+{
+ police_mirror_common_test $rp1 ingress "police rx and mirror"
+}
+
+police_tx_mirror_test()
+{
+ police_mirror_common_test $rp2 egress "police tx and mirror"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ router_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
index a0b5f57d6bd3..0727e2012b68 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
@@ -215,10 +215,16 @@ switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
switch_destroy()
{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
@@ -359,6 +365,10 @@ ns_switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
export -f ns_switch_create
diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
index 1209031bc794..5d97fa347d75 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
@@ -237,10 +237,16 @@ switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
switch_destroy()
{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
@@ -402,6 +408,10 @@ ns_switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
export -f ns_switch_create
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index 18c5de53558a..bf361f30d6ef 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -180,6 +180,8 @@ setup()
;;
r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+ ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c
index c0c9ecb891e1..f9ed749fd8c7 100644
--- a/tools/testing/selftests/net/ip_defrag.c
+++ b/tools/testing/selftests/net/ip_defrag.c
@@ -192,9 +192,9 @@ static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
}
res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
- if (res < 0)
+ if (res < 0 && errno != EPERM)
error(1, errno, "send_fragment");
- if (res != frag_len)
+ if (res >= 0 && res != frag_len)
error(1, 0, "send_fragment: %d vs %d", res, frag_len);
frag_counter++;
@@ -313,9 +313,9 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr,
iphdr->ip_len = htons(frag_len);
}
res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
- if (res < 0)
+ if (res < 0 && errno != EPERM)
error(1, errno, "sendto overlap: %d", frag_len);
- if (res != frag_len)
+ if (res >= 0 && res != frag_len)
error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len);
frag_counter++;
}
diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh
index 15d3489ecd9c..ceb7ad4dbd94 100755
--- a/tools/testing/selftests/net/ip_defrag.sh
+++ b/tools/testing/selftests/net/ip_defrag.sh
@@ -6,6 +6,8 @@
set +x
set -e
+modprobe -q nf_defrag_ipv6
+
readonly NETNS="ns-$(mktemp -u XXXXXX)"
setup() {
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
new file mode 100644
index 000000000000..17ced7d6ce25
--- /dev/null
+++ b/tools/testing/selftests/net/ipsec.c
@@ -0,0 +1,2195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ipsec.c - Check xfrm on veth inside a net-ns.
+ * Copyright (c) 2018 Dmitry Safonov
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/limits.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <linux/veth.h>
+#include <linux/xfrm.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define printk(fmt, ...) \
+ ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
+
+#define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__)
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */
+#define MAX_PAYLOAD 2048
+#define XFRM_ALGO_KEY_BUF_SIZE 512
+#define MAX_PROCESSES (1 << 14) /* /16 mask divided by /30 subnets */
+#define INADDR_A ((in_addr_t) 0x0a000000) /* 10.0.0.0 */
+#define INADDR_B ((in_addr_t) 0xc0a80000) /* 192.168.0.0 */
+
+/* /30 mask for one veth connection */
+#define PREFIX_LEN 30
+#define child_ip(nr) (4*nr + 1)
+#define grchild_ip(nr) (4*nr + 2)
+
+#define VETH_FMT "ktst-%d"
+#define VETH_LEN 12
+
+static int nsfd_parent = -1;
+static int nsfd_childa = -1;
+static int nsfd_childb = -1;
+static long page_size;
+
+/*
+ * ksft_cnt is static in kselftest, so isn't shared with children.
+ * We have to send a test result back to parent and count there.
+ * results_fd is a pipe with test feedback from children.
+ */
+static int results_fd[2];
+
+const unsigned int ping_delay_nsec = 50 * 1000 * 1000;
+const unsigned int ping_timeout = 300;
+const unsigned int ping_count = 100;
+const unsigned int ping_success = 80;
+
+static void randomize_buffer(void *buf, size_t buflen)
+{
+ int *p = (int *)buf;
+ size_t words = buflen / sizeof(int);
+ size_t leftover = buflen % sizeof(int);
+
+ if (!buflen)
+ return;
+
+ while (words--)
+ *p++ = rand();
+
+ if (leftover) {
+ int tmp = rand();
+
+ memcpy(buf + buflen - leftover, &tmp, leftover);
+ }
+
+ return;
+}
+
+static int unshare_open(void)
+{
+ const char *netns_path = "/proc/self/ns/net";
+ int fd;
+
+ if (unshare(CLONE_NEWNET) != 0) {
+ pr_err("unshare()");
+ return -1;
+ }
+
+ fd = open(netns_path, O_RDONLY);
+ if (fd <= 0) {
+ pr_err("open(%s)", netns_path);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int switch_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWNET)) {
+ pr_err("setns()");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Running the test inside a new parent net namespace to bother less
+ * about cleanup on error-path.
+ */
+static int init_namespaces(void)
+{
+ nsfd_parent = unshare_open();
+ if (nsfd_parent <= 0)
+ return -1;
+
+ nsfd_childa = unshare_open();
+ if (nsfd_childa <= 0)
+ return -1;
+
+ if (switch_ns(nsfd_parent))
+ return -1;
+
+ nsfd_childb = unshare_open();
+ if (nsfd_childb <= 0)
+ return -1;
+
+ if (switch_ns(nsfd_parent))
+ return -1;
+ return 0;
+}
+
+static int netlink_sock(int *sock, uint32_t *seq_nr, int proto)
+{
+ if (*sock > 0) {
+ seq_nr++;
+ return 0;
+ }
+
+ *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
+ if (*sock <= 0) {
+ pr_err("socket(AF_NETLINK)");
+ return -1;
+ }
+
+ randomize_buffer(seq_nr, sizeof(*seq_nr));
+
+ return 0;
+}
+
+static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh)
+{
+ return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len));
+}
+
+static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */
+ struct rtattr *attr = rtattr_hdr(nh);
+ size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size);
+
+ if (req_sz < nl_size) {
+ printk("req buf is too small: %zu < %zu", req_sz, nl_size);
+ return -1;
+ }
+ nh->nlmsg_len = nl_size;
+
+ attr->rta_len = RTA_LENGTH(size);
+ attr->rta_type = rta_type;
+ memcpy(RTA_DATA(attr), payload, size);
+
+ return 0;
+}
+
+static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ struct rtattr *ret = rtattr_hdr(nh);
+
+ if (rtattr_pack(nh, req_sz, rta_type, payload, size))
+ return 0;
+
+ return ret;
+}
+
+static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type)
+{
+ return _rtattr_begin(nh, req_sz, rta_type, 0, 0);
+}
+
+static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+ char *nlmsg_end = (char *)nh + nh->nlmsg_len;
+
+ attr->rta_len = nlmsg_end - (char *)attr;
+}
+
+static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz,
+ const char *peer, int ns)
+{
+ struct ifinfomsg pi;
+ struct rtattr *peer_attr;
+
+ memset(&pi, 0, sizeof(pi));
+ pi.ifi_family = AF_UNSPEC;
+ pi.ifi_change = 0xFFFFFFFF;
+
+ peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi));
+ if (!peer_attr)
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer)))
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns)))
+ return -1;
+
+ rtattr_end(nh, peer_attr);
+
+ return 0;
+}
+
+static int netlink_check_answer(int sock)
+{
+ struct nlmsgerror {
+ struct nlmsghdr hdr;
+ int error;
+ struct nlmsghdr orig_msg;
+ } answer;
+
+ if (recv(sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type);
+ return -1;
+ } else if (answer.error) {
+ printk("NLMSG_ERROR: %d: %s",
+ answer.error, strerror(-answer.error));
+ return answer.error;
+ }
+
+ return 0;
+}
+
+static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a,
+ const char *peerb, int ns_b)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ const char veth_type[] = "veth";
+ struct rtattr *link_info, *info_data;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a)))
+ return -1;
+
+ link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+ if (!link_info)
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type)))
+ return -1;
+
+ info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+ if (!info_data)
+ return -1;
+
+ if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b))
+ return -1;
+
+ rtattr_end(&req.nh, info_data);
+ rtattr_end(&req.nh, link_info);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int ip4_addr_set(int sock, uint32_t seq, const char *intf,
+ struct in_addr addr, uint8_t prefix)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifaddrmsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWADDR;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifa_family = AF_INET;
+ req.info.ifa_prefixlen = prefix;
+ req.info.ifa_index = if_nametoindex(intf);
+
+#ifdef DEBUG
+ {
+ char addr_str[IPV4_STR_SZ] = {};
+
+ strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1);
+
+ printk("ip addr set %s", addr_str);
+ }
+#endif
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr)))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int link_set_up(int sock, uint32_t seq, const char *intf)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+ req.info.ifi_index = if_nametoindex(intf);
+ req.info.ifi_flags = IFF_UP;
+ req.info.ifi_change = IFF_UP;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int ip4_route_set(int sock, uint32_t seq, const char *intf,
+ struct in_addr src, struct in_addr dst)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rt;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ unsigned int index = if_nametoindex(intf);
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt));
+ req.nh.nlmsg_type = RTM_NEWROUTE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+ req.nh.nlmsg_seq = seq;
+ req.rt.rtm_family = AF_INET;
+ req.rt.rtm_dst_len = 32;
+ req.rt.rtm_table = RT_TABLE_MAIN;
+ req.rt.rtm_protocol = RTPROT_BOOT;
+ req.rt.rtm_scope = RT_SCOPE_LINK;
+ req.rt.rtm_type = RTN_UNICAST;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index)))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(sock);
+}
+
+static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ if (ip4_addr_set(route_sock, (*route_seq)++, "lo",
+ tunsrc, PREFIX_LEN)) {
+ printk("Failed to set ipv4 addr");
+ return -1;
+ }
+
+ if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) {
+ printk("Failed to set ipv4 route");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst)
+{
+ struct in_addr intsrc = inet_makeaddr(INADDR_B, src);
+ struct in_addr tunsrc = inet_makeaddr(INADDR_A, src);
+ struct in_addr tundst = inet_makeaddr(INADDR_A, dst);
+ int route_sock = -1, ret = -1;
+ uint32_t route_seq;
+
+ if (switch_ns(nsfd))
+ return -1;
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) {
+ printk("Failed to open netlink route socket in child");
+ return -1;
+ }
+
+ if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) {
+ printk("Failed to set ipv4 addr");
+ goto err;
+ }
+
+ if (link_set_up(route_sock, route_seq++, veth)) {
+ printk("Failed to bring up %s", veth);
+ goto err;
+ }
+
+ if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) {
+ printk("Failed to add tunnel route on %s", veth);
+ goto err;
+ }
+ ret = 0;
+
+err:
+ close(route_sock);
+ return ret;
+}
+
+#define ALGO_LEN 64
+enum desc_type {
+ CREATE_TUNNEL = 0,
+ ALLOCATE_SPI,
+ MONITOR_ACQUIRE,
+ EXPIRE_STATE,
+ EXPIRE_POLICY,
+};
+const char *desc_name[] = {
+ "create tunnel",
+ "alloc spi",
+ "monitor acquire",
+ "expire state",
+ "expire policy"
+};
+struct xfrm_desc {
+ enum desc_type type;
+ uint8_t proto;
+ char a_algo[ALGO_LEN];
+ char e_algo[ALGO_LEN];
+ char c_algo[ALGO_LEN];
+ char ae_algo[ALGO_LEN];
+ unsigned int icv_len;
+ /* unsigned key_len; */
+};
+
+enum msg_type {
+ MSG_ACK = 0,
+ MSG_EXIT,
+ MSG_PING,
+ MSG_XFRM_PREPARE,
+ MSG_XFRM_ADD,
+ MSG_XFRM_DEL,
+ MSG_XFRM_CLEANUP,
+};
+
+struct test_desc {
+ enum msg_type type;
+ union {
+ struct {
+ in_addr_t reply_ip;
+ unsigned int port;
+ } ping;
+ struct xfrm_desc xfrm_desc;
+ } body;
+};
+
+struct test_result {
+ struct xfrm_desc desc;
+ unsigned int res;
+};
+
+static void write_test_result(unsigned int res, struct xfrm_desc *d)
+{
+ struct test_result tr = {};
+ ssize_t ret;
+
+ tr.desc = *d;
+ tr.res = res;
+
+ ret = write(results_fd[1], &tr, sizeof(tr));
+ if (ret != sizeof(tr))
+ pr_err("Failed to write the result in pipe %zd", ret);
+}
+
+static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+ ssize_t bytes = write(fd, msg, sizeof(*msg));
+
+ /* Make sure that write/read is atomic to a pipe */
+ BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF);
+
+ if (bytes < 0) {
+ pr_err("write()");
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+ if (bytes != sizeof(*msg)) {
+ pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg));
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+}
+
+static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+ ssize_t bytes = read(fd, msg, sizeof(*msg));
+
+ if (bytes < 0) {
+ pr_err("read()");
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+ if (bytes != sizeof(*msg)) {
+ pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg));
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+}
+
+static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout,
+ unsigned int *server_port, int sock[2])
+{
+ struct sockaddr_in server;
+ struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout };
+ socklen_t s_len = sizeof(server);
+
+ sock[0] = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock[0] < 0) {
+ pr_err("socket()");
+ return -1;
+ }
+
+ server.sin_family = AF_INET;
+ server.sin_port = 0;
+ memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr));
+
+ if (bind(sock[0], (struct sockaddr *)&server, s_len)) {
+ pr_err("bind()");
+ goto err_close_server;
+ }
+
+ if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) {
+ pr_err("getsockname()");
+ goto err_close_server;
+ }
+
+ *server_port = ntohs(server.sin_port);
+
+ if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) {
+ pr_err("setsockopt()");
+ goto err_close_server;
+ }
+
+ sock[1] = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock[1] < 0) {
+ pr_err("socket()");
+ goto err_close_server;
+ }
+
+ return 0;
+
+err_close_server:
+ close(sock[0]);
+ return -1;
+}
+
+static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len)
+{
+ struct sockaddr_in server;
+ const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+ char *sock_buf[buf_len];
+ ssize_t r_bytes, s_bytes;
+
+ server.sin_family = AF_INET;
+ server.sin_port = htons(port);
+ server.sin_addr.s_addr = dest_ip;
+
+ s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+ if (s_bytes < 0) {
+ pr_err("sendto()");
+ return -1;
+ } else if (s_bytes != buf_len) {
+ printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+ return -1;
+ }
+
+ r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+ if (r_bytes < 0) {
+ if (errno != EAGAIN)
+ pr_err("recv()");
+ return -1;
+ } else if (r_bytes == 0) { /* EOF */
+ printk("EOF on reply to ping");
+ return -1;
+ } else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+ printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len)
+{
+ struct sockaddr_in server;
+ const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+ char *sock_buf[buf_len];
+ ssize_t r_bytes, s_bytes;
+
+ server.sin_family = AF_INET;
+ server.sin_port = htons(port);
+ server.sin_addr.s_addr = dest_ip;
+
+ r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+ if (r_bytes < 0) {
+ if (errno != EAGAIN)
+ pr_err("recv()");
+ return -1;
+ }
+ if (r_bytes == 0) { /* EOF */
+ printk("EOF on reply to ping");
+ return -1;
+ }
+ if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+ printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+ return -1;
+ }
+
+ s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+ if (s_bytes < 0) {
+ pr_err("sendto()");
+ return -1;
+ } else if (s_bytes != buf_len) {
+ printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+ return -1;
+ }
+
+ return 0;
+}
+
+typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len);
+static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from,
+ bool init_side, int d_port, in_addr_t to, ping_f func)
+{
+ struct test_desc msg;
+ unsigned int s_port, i, ping_succeeded = 0;
+ int ping_sock[2];
+ char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {};
+
+ if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) {
+ printk("Failed to init ping");
+ return -1;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_PING;
+ msg.body.ping.port = s_port;
+ memcpy(&msg.body.ping.reply_ip, &from, sizeof(from));
+
+ write_msg(cmd_fd, &msg, 0);
+ if (init_side) {
+ /* The other end sends ip to ping */
+ read_msg(cmd_fd, &msg, 0);
+ if (msg.type != MSG_PING)
+ return -1;
+ to = msg.body.ping.reply_ip;
+ d_port = msg.body.ping.port;
+ }
+
+ for (i = 0; i < ping_count ; i++) {
+ struct timespec sleep_time = {
+ .tv_sec = 0,
+ .tv_nsec = ping_delay_nsec,
+ };
+
+ ping_succeeded += !func(ping_sock, to, d_port, buf, page_size);
+ nanosleep(&sleep_time, 0);
+ }
+
+ close(ping_sock[0]);
+ close(ping_sock[1]);
+
+ strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1);
+ strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1);
+
+ if (ping_succeeded < ping_success) {
+ printk("ping (%s) %s->%s failed %u/%u times",
+ init_side ? "send" : "reply", from_str, to_str,
+ ping_count - ping_succeeded, ping_count);
+ return -1;
+ }
+
+#ifdef DEBUG
+ printk("ping (%s) %s->%s succeeded %u/%u times",
+ init_side ? "send" : "reply", from_str, to_str,
+ ping_succeeded, ping_count);
+#endif
+
+ return 0;
+}
+
+static int xfrm_fill_key(char *name, char *buf,
+ size_t buf_len, unsigned int *key_len)
+{
+ /* TODO: use set/map instead */
+ if (strncmp(name, "digest_null", ALGO_LEN) == 0)
+ *key_len = 0;
+ else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0)
+ *key_len = 0;
+ else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0)
+ *key_len = 64;
+ else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0)
+ *key_len = 192;
+ else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0)
+ *key_len = 288;
+ else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0)
+ *key_len = 384;
+ else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0)
+ *key_len = 448;
+ else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0)
+ *key_len = 512;
+ else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0)
+ *key_len = 152;
+ else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0)
+ *key_len = 224;
+ else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0)
+ *key_len = 224;
+ else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0)
+ *key_len = 216;
+ else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0)
+ *key_len = 288;
+ else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0)
+ *key_len = 288;
+ else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0)
+ *key_len = 280;
+ else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0)
+ *key_len = 0;
+
+ if (*key_len > buf_len) {
+ printk("Can't pack a key - too big for buffer");
+ return -1;
+ }
+
+ randomize_buffer(buf, *key_len);
+
+ return 0;
+}
+
+static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
+ struct xfrm_desc *desc)
+{
+ struct {
+ union {
+ struct xfrm_algo alg;
+ struct xfrm_algo_aead aead;
+ struct xfrm_algo_auth auth;
+ } u;
+ char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ } alg = {};
+ size_t alen, elen, clen, aelen;
+ unsigned short type;
+
+ alen = strlen(desc->a_algo);
+ elen = strlen(desc->e_algo);
+ clen = strlen(desc->c_algo);
+ aelen = strlen(desc->ae_algo);
+
+ /* Verify desc */
+ switch (desc->proto) {
+ case IPPROTO_AH:
+ if (!alen || elen || clen || aelen) {
+ printk("BUG: buggy ah desc");
+ return -1;
+ }
+ strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_AUTH;
+ break;
+ case IPPROTO_COMP:
+ if (!clen || elen || alen || aelen) {
+ printk("BUG: buggy comp desc");
+ return -1;
+ }
+ strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_COMP;
+ break;
+ case IPPROTO_ESP:
+ if (!((alen && elen) ^ aelen) || clen) {
+ printk("BUG: buggy esp desc");
+ return -1;
+ }
+ if (aelen) {
+ alg.u.aead.alg_icv_len = desc->icv_len;
+ strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key,
+ sizeof(alg.buf), &alg.u.aead.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_AEAD;
+ } else {
+
+ strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1);
+ type = XFRMA_ALG_CRYPT;
+ if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+ return -1;
+
+ strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN);
+ type = XFRMA_ALG_AUTH;
+ if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ }
+ break;
+ default:
+ printk("BUG: unknown proto in desc");
+ return -1;
+ }
+
+ if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+ return -1;
+
+ return 0;
+}
+
+static inline uint32_t gen_spi(struct in_addr src)
+{
+ return htonl(inet_lnaof(src));
+}
+
+static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_usersa_info info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = XFRM_MSG_NEWSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill selector. */
+ memcpy(&req.info.sel.daddr, &dst, sizeof(dst));
+ memcpy(&req.info.sel.saddr, &src, sizeof(src));
+ req.info.sel.family = AF_INET;
+ req.info.sel.prefixlen_d = PREFIX_LEN;
+ req.info.sel.prefixlen_s = PREFIX_LEN;
+
+ /* Fill id */
+ memcpy(&req.info.id.daddr, &dst, sizeof(dst));
+ /* Note: zero-spi cannot be deleted */
+ req.info.id.spi = spi;
+ req.info.id.proto = desc->proto;
+
+ memcpy(&req.info.saddr, &src, sizeof(src));
+
+ /* Fill lifteme_cfg */
+ req.info.lft.soft_byte_limit = XFRM_INF;
+ req.info.lft.hard_byte_limit = XFRM_INF;
+ req.info.lft.soft_packet_limit = XFRM_INF;
+ req.info.lft.hard_packet_limit = XFRM_INF;
+
+ req.info.family = AF_INET;
+ req.info.mode = XFRM_MODE_TUNNEL;
+
+ if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ if (memcmp(&info->sel.daddr, &dst, sizeof(dst)))
+ return false;
+
+ if (memcmp(&info->sel.saddr, &src, sizeof(src)))
+ return false;
+
+ if (info->sel.family != AF_INET ||
+ info->sel.prefixlen_d != PREFIX_LEN ||
+ info->sel.prefixlen_s != PREFIX_LEN)
+ return false;
+
+ if (info->id.spi != spi || info->id.proto != desc->proto)
+ return false;
+
+ if (memcmp(&info->id.daddr, &dst, sizeof(dst)))
+ return false;
+
+ if (memcmp(&info->saddr, &src, sizeof(src)))
+ return false;
+
+ if (info->lft.soft_byte_limit != XFRM_INF ||
+ info->lft.hard_byte_limit != XFRM_INF ||
+ info->lft.soft_packet_limit != XFRM_INF ||
+ info->lft.hard_packet_limit != XFRM_INF)
+ return false;
+
+ if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL)
+ return false;
+
+ /* XXX: check xfrm algo, see xfrm_state_pack_algo(). */
+
+ return true;
+}
+
+static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_usersa_info info;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } answer;
+ struct xfrm_address_filter filter = {};
+ bool found = false;
+
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(0);
+ req.nh.nlmsg_type = XFRM_MSG_GETSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.nh.nlmsg_seq = seq;
+
+ /*
+ * Add dump filter by source address as there may be other tunnels
+ * in this netns (if tests run in parallel).
+ */
+ filter.family = AF_INET;
+ filter.splen = 0x1f; /* 0xffffffff mask see addr_match() */
+ memcpy(&filter.saddr, &src, sizeof(src));
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER,
+ &filter, sizeof(filter)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ while (1) {
+ if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ }
+ if (answer.nh.nlmsg_type == NLMSG_ERROR) {
+ printk("NLMSG_ERROR: %d: %s",
+ answer.error, strerror(-answer.error));
+ return -1;
+ } else if (answer.nh.nlmsg_type == NLMSG_DONE) {
+ if (found)
+ return 0;
+ printk("didn't find allocated xfrm state in dump");
+ return -1;
+ } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+ if (xfrm_usersa_found(&answer.info, spi, src, dst, desc))
+ found = true;
+ }
+ }
+}
+
+static int xfrm_set(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst,
+ struct xfrm_desc *desc)
+{
+ int err;
+
+ err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+ if (err) {
+ printk("Failed to add xfrm state");
+ return -1;
+ }
+
+ err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+ if (err) {
+ printk("Failed to add xfrm state");
+ return -1;
+ }
+
+ /* Check dumps for XFRM_MSG_GETSA */
+ err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+ err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+ if (err) {
+ printk("Failed to check xfrm state");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst, uint8_t dir,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userpolicy_info info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrm_user_tmpl tmpl;
+
+ memset(&req, 0, sizeof(req));
+ memset(&tmpl, 0, sizeof(tmpl));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = XFRM_MSG_NEWPOLICY;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill selector. */
+ memcpy(&req.info.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc));
+ req.info.sel.family = AF_INET;
+ req.info.sel.prefixlen_d = PREFIX_LEN;
+ req.info.sel.prefixlen_s = PREFIX_LEN;
+
+ /* Fill lifteme_cfg */
+ req.info.lft.soft_byte_limit = XFRM_INF;
+ req.info.lft.hard_byte_limit = XFRM_INF;
+ req.info.lft.soft_packet_limit = XFRM_INF;
+ req.info.lft.hard_packet_limit = XFRM_INF;
+
+ req.info.dir = dir;
+
+ /* Fill tmpl */
+ memcpy(&tmpl.id.daddr, &dst, sizeof(dst));
+ /* Note: zero-spi cannot be deleted */
+ tmpl.id.spi = spi;
+ tmpl.id.proto = proto;
+ tmpl.family = AF_INET;
+ memcpy(&tmpl.saddr, &src, sizeof(src));
+ tmpl.mode = XFRM_MODE_TUNNEL;
+ tmpl.aalgos = (~(uint32_t)0);
+ tmpl.ealgos = (~(uint32_t)0);
+ tmpl.calgos = (~(uint32_t)0);
+
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_prepare(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst, proto)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src,
+ XFRM_POLICY_IN, tunsrc, tundst, proto)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_policy_del(int xfrm_sock, uint32_t seq,
+ struct in_addr src, struct in_addr dst, uint8_t dir,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userpolicy_id id;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id));
+ req.nh.nlmsg_type = XFRM_MSG_DELPOLICY;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill id */
+ memcpy(&req.id.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc));
+ req.id.sel.family = AF_INET;
+ req.id.sel.prefixlen_d = PREFIX_LEN;
+ req.id.sel.prefixlen_s = PREFIX_LEN;
+ req.id.dir = dir;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_cleanup(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src,
+ XFRM_POLICY_IN, tunsrc, tundst)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_usersa_id id;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ xfrm_address_t saddr = {};
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id));
+ req.nh.nlmsg_type = XFRM_MSG_DELSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ memcpy(&req.id.daddr, &dst, sizeof(dst));
+ req.id.family = AF_INET;
+ req.id.proto = proto;
+ /* Note: zero-spi cannot be deleted */
+ req.id.spi = spi;
+
+ memcpy(&saddr, &src, sizeof(src));
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_delete(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) {
+ printk("Failed to remove xfrm state");
+ return -1;
+ }
+
+ if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) {
+ printk("Failed to remove xfrm state");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq,
+ uint32_t spi, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userspi_info spi;
+ } req;
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_usersa_info info;
+ int error;
+ };
+ } answer;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.spi));
+ req.nh.nlmsg_type = XFRM_MSG_ALLOCSPI;
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ req.spi.info.family = AF_INET;
+ req.spi.min = spi;
+ req.spi.max = spi;
+ req.spi.info.id.proto = proto;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return KSFT_FAIL;
+ }
+
+ if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return KSFT_FAIL;
+ } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+ uint32_t new_spi = htonl(answer.info.id.spi);
+
+ if (new_spi != spi) {
+ printk("allocated spi is different from requested: %#x != %#x",
+ new_spi, spi);
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+ } else if (answer.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type);
+ return KSFT_FAIL;
+ }
+
+ printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error));
+ return (answer.error) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups)
+{
+ struct sockaddr_nl snl = {};
+ socklen_t addr_len;
+ int ret = -1;
+
+ snl.nl_family = AF_NETLINK;
+ snl.nl_groups = groups;
+
+ if (netlink_sock(sock, seq, proto)) {
+ printk("Failed to open xfrm netlink socket");
+ return -1;
+ }
+
+ if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) {
+ pr_err("bind()");
+ goto out_close;
+ }
+
+ addr_len = sizeof(snl);
+ if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) {
+ pr_err("getsockname()");
+ goto out_close;
+ }
+ if (addr_len != sizeof(snl)) {
+ printk("Wrong address length %d", addr_len);
+ goto out_close;
+ }
+ if (snl.nl_family != AF_NETLINK) {
+ printk("Wrong address family %d", snl.nl_family);
+ goto out_close;
+ }
+ return 0;
+
+out_close:
+ close(*sock);
+ return ret;
+}
+
+static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_acquire acq;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrm_user_tmpl xfrm_tmpl = {};
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.acq));
+ req.nh.nlmsg_type = XFRM_MSG_ACQUIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ req.acq.policy.sel.family = AF_INET;
+ req.acq.aalgos = 0xfeed;
+ req.acq.ealgos = 0xbaad;
+ req.acq.calgos = 0xbabe;
+
+ xfrm_tmpl.family = AF_INET;
+ xfrm_tmpl.id.proto = IPPROTO_ESP;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl)))
+ goto out_close;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad
+ || req.acq.calgos != 0xbabe) {
+ printk("xfrm_user_acquire has changed %x %x %x",
+ req.acq.aalgos, req.acq.ealgos, req.acq.calgos);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int xfrm_expire_state(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_expire expire;
+ int error;
+ };
+ } req;
+ struct in_addr src, dst;
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+
+ if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) {
+ printk("Failed to add xfrm state");
+ return KSFT_FAIL;
+ }
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire));
+ req.nh.nlmsg_type = XFRM_MSG_EXPIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst));
+ req.expire.state.id.spi = gen_spi(src);
+ req.expire.state.id.proto = desc->proto;
+ req.expire.state.family = AF_INET;
+ req.expire.hard = 0xff;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.expire.hard != 0x1) {
+ printk("expire.hard is not set: %x", req.expire.hard);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_polexpire expire;
+ int error;
+ };
+ } req;
+ struct in_addr src, dst, tunsrc, tundst;
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) {
+ printk("Failed to add xfrm policy");
+ return KSFT_FAIL;
+ }
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire));
+ req.nh.nlmsg_type = XFRM_MSG_POLEXPIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ /* Fill selector. */
+ memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc));
+ req.expire.pol.sel.family = AF_INET;
+ req.expire.pol.sel.prefixlen_d = PREFIX_LEN;
+ req.expire.pol.sel.prefixlen_s = PREFIX_LEN;
+ req.expire.pol.dir = XFRM_POLICY_OUT;
+ req.expire.hard = 0xff;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.expire.hard != 0x1) {
+ printk("expire.hard is not set: %x", req.expire.hard);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int child_serv(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
+{
+ struct in_addr src, dst, tunsrc, tundst;
+ struct test_desc msg;
+ int ret = KSFT_FAIL;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+ /* UDP pinging without xfrm */
+ if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) {
+ printk("ping failed before setting xfrm");
+ return KSFT_FAIL;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_PREPARE;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+
+ if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+ printk("failed to prepare xfrm");
+ goto cleanup;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_ADD;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+ if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+ printk("failed to set xfrm");
+ goto delete;
+ }
+
+ /* UDP pinging with xfrm tunnel */
+ if (do_ping(cmd_fd, buf, page_size, tunsrc,
+ true, 0, 0, udp_ping_send)) {
+ printk("ping failed for xfrm");
+ goto delete;
+ }
+
+ ret = KSFT_PASS;
+delete:
+ /* xfrm delete */
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_DEL;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+
+ if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+ printk("failed ping to remove xfrm");
+ ret = KSFT_FAIL;
+ }
+
+cleanup:
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_CLEANUP;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+ if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+ printk("failed ping to cleanup xfrm");
+ ret = KSFT_FAIL;
+ }
+ return ret;
+}
+
+static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
+{
+ struct xfrm_desc desc;
+ struct test_desc msg;
+ int xfrm_sock = -1;
+ uint32_t seq;
+
+ if (switch_ns(nsfd_childa))
+ exit(KSFT_FAIL);
+
+ if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+ printk("Failed to open xfrm netlink socket");
+ exit(KSFT_FAIL);
+ }
+
+ /* Check that seq sock is ready, just for sure. */
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_ACK;
+ write_msg(cmd_fd, &msg, 1);
+ read_msg(cmd_fd, &msg, 1);
+ if (msg.type != MSG_ACK) {
+ printk("Ack failed");
+ exit(KSFT_FAIL);
+ }
+
+ for (;;) {
+ ssize_t received = read(test_desc_fd, &desc, sizeof(desc));
+ int ret;
+
+ if (received == 0) /* EOF */
+ break;
+
+ if (received != sizeof(desc)) {
+ pr_err("read() returned %zd", received);
+ exit(KSFT_FAIL);
+ }
+
+ switch (desc.type) {
+ case CREATE_TUNNEL:
+ ret = child_serv(xfrm_sock, &seq, nr,
+ cmd_fd, buf, &desc);
+ break;
+ case ALLOCATE_SPI:
+ ret = xfrm_state_allocspi(xfrm_sock, &seq,
+ -1, desc.proto);
+ break;
+ case MONITOR_ACQUIRE:
+ ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr);
+ break;
+ case EXPIRE_STATE:
+ ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc);
+ break;
+ case EXPIRE_POLICY:
+ ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
+ break;
+ default:
+ printk("Unknown desc type %d", desc.type);
+ exit(KSFT_FAIL);
+ }
+ write_test_result(ret, &desc);
+ }
+
+ close(xfrm_sock);
+
+ msg.type = MSG_EXIT;
+ write_msg(cmd_fd, &msg, 1);
+ exit(KSFT_PASS);
+}
+
+static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
+ struct test_desc *msg, int xfrm_sock, uint32_t *seq)
+{
+ struct in_addr src, dst, tunsrc, tundst;
+ bool tun_reply;
+ struct xfrm_desc *desc = &msg->body.xfrm_desc;
+
+ src = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ dst = inet_makeaddr(INADDR_B, child_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, child_ip(nr));
+
+ switch (msg->type) {
+ case MSG_EXIT:
+ exit(KSFT_PASS);
+ case MSG_ACK:
+ write_msg(cmd_fd, msg, 1);
+ break;
+ case MSG_PING:
+ tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t));
+ /* UDP pinging without xfrm */
+ if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src,
+ false, msg->body.ping.port,
+ msg->body.ping.reply_ip, udp_ping_reply)) {
+ printk("ping failed before setting xfrm");
+ }
+ break;
+ case MSG_XFRM_PREPARE:
+ if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst,
+ desc->proto)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to prepare xfrm");
+ }
+ break;
+ case MSG_XFRM_ADD:
+ if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to set xfrm");
+ }
+ break;
+ case MSG_XFRM_DEL:
+ if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst,
+ desc->proto)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to remove xfrm");
+ }
+ break;
+ case MSG_XFRM_CLEANUP:
+ if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+ printk("failed to cleanup xfrm");
+ }
+ break;
+ default:
+ printk("got unknown msg type %d", msg->type);
+ };
+}
+
+static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)
+{
+ struct test_desc msg;
+ int xfrm_sock = -1;
+ uint32_t seq;
+
+ if (switch_ns(nsfd_childb))
+ exit(KSFT_FAIL);
+
+ if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+ printk("Failed to open xfrm netlink socket");
+ exit(KSFT_FAIL);
+ }
+
+ do {
+ read_msg(cmd_fd, &msg, 1);
+ grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq);
+ } while (1);
+
+ close(xfrm_sock);
+ exit(KSFT_FAIL);
+}
+
+static int start_child(unsigned int nr, char *veth, int test_desc_fd[2])
+{
+ int cmd_sock[2];
+ void *data_map;
+ pid_t child;
+
+ if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr)))
+ return -1;
+
+ if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr)))
+ return -1;
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ } else if (child) {
+ /* in parent - selftest */
+ return switch_ns(nsfd_parent);
+ }
+
+ if (close(test_desc_fd[1])) {
+ pr_err("close()");
+ return -1;
+ }
+
+ /* child */
+ data_map = mmap(0, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (data_map == MAP_FAILED) {
+ pr_err("mmap()");
+ return -1;
+ }
+
+ randomize_buffer(data_map, page_size);
+
+ if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) {
+ pr_err("socketpair()");
+ return -1;
+ }
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ } else if (child) {
+ if (close(cmd_sock[0])) {
+ pr_err("close()");
+ return -1;
+ }
+ return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map);
+ }
+ if (close(cmd_sock[1])) {
+ pr_err("close()");
+ return -1;
+ }
+ return grand_child_f(nr, cmd_sock[0], data_map);
+}
+
+static void exit_usage(char **argv)
+{
+ printk("Usage: %s [nr_process]", argv[0]);
+ exit(KSFT_FAIL);
+}
+
+static int __write_desc(int test_desc_fd, struct xfrm_desc *desc)
+{
+ ssize_t ret;
+
+ ret = write(test_desc_fd, desc, sizeof(*desc));
+
+ if (ret == sizeof(*desc))
+ return 0;
+
+ pr_err("Writing test's desc failed %ld", ret);
+
+ return -1;
+}
+
+static int write_desc(int proto, int test_desc_fd,
+ char *a, char *e, char *c, char *ae)
+{
+ struct xfrm_desc desc = {};
+
+ desc.type = CREATE_TUNNEL;
+ desc.proto = proto;
+
+ if (a)
+ strncpy(desc.a_algo, a, ALGO_LEN - 1);
+ if (e)
+ strncpy(desc.e_algo, e, ALGO_LEN - 1);
+ if (c)
+ strncpy(desc.c_algo, c, ALGO_LEN - 1);
+ if (ae)
+ strncpy(desc.ae_algo, ae, ALGO_LEN - 1);
+
+ return __write_desc(test_desc_fd, &desc);
+}
+
+int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP };
+char *ah_list[] = {
+ "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)",
+ "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)",
+ "xcbc(aes)", "cmac(aes)"
+};
+char *comp_list[] = {
+ "deflate",
+#if 0
+ /* No compression backend realization */
+ "lzs", "lzjh"
+#endif
+};
+char *e_list[] = {
+ "ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)",
+ "cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)",
+ "cbc(twofish)", "rfc3686(ctr(aes))"
+};
+char *ae_list[] = {
+#if 0
+ /* not implemented */
+ "rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))",
+ "rfc7539esp(chacha20,poly1305)"
+#endif
+};
+
+const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \
+ + (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \
+ + ARRAY_SIZE(ae_list);
+
+static int write_proto_plan(int fd, int proto)
+{
+ unsigned int i;
+
+ switch (proto) {
+ case IPPROTO_AH:
+ for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+ if (write_desc(proto, fd, ah_list[i], 0, 0, 0))
+ return -1;
+ }
+ break;
+ case IPPROTO_COMP:
+ for (i = 0; i < ARRAY_SIZE(comp_list); i++) {
+ if (write_desc(proto, fd, 0, 0, comp_list[i], 0))
+ return -1;
+ }
+ break;
+ case IPPROTO_ESP:
+ for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+ int j;
+
+ for (j = 0; j < ARRAY_SIZE(e_list); j++) {
+ if (write_desc(proto, fd, ah_list[i],
+ e_list[j], 0, 0))
+ return -1;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(ae_list); i++) {
+ if (write_desc(proto, fd, 0, 0, 0, ae_list[i]))
+ return -1;
+ }
+ break;
+ default:
+ printk("BUG: Specified unknown proto %d", proto);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Some structures in xfrm uapi header differ in size between
+ * 64-bit and 32-bit ABI:
+ *
+ * 32-bit UABI | 64-bit UABI
+ * -------------------------------------|-------------------------------------
+ * sizeof(xfrm_usersa_info) = 220 | sizeof(xfrm_usersa_info) = 224
+ * sizeof(xfrm_userpolicy_info) = 164 | sizeof(xfrm_userpolicy_info) = 168
+ * sizeof(xfrm_userspi_info) = 228 | sizeof(xfrm_userspi_info) = 232
+ * sizeof(xfrm_user_acquire) = 276 | sizeof(xfrm_user_acquire) = 280
+ * sizeof(xfrm_user_expire) = 224 | sizeof(xfrm_user_expire) = 232
+ * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176
+ *
+ * Check the affected by the UABI difference structures.
+ */
+const unsigned int compat_plan = 4;
+static int write_compat_struct_tests(int test_desc_fd)
+{
+ struct xfrm_desc desc = {};
+
+ desc.type = ALLOCATE_SPI;
+ desc.proto = IPPROTO_AH;
+ strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1);
+
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = MONITOR_ACQUIRE;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = EXPIRE_STATE;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = EXPIRE_POLICY;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ return 0;
+}
+
+static int write_test_plan(int test_desc_fd)
+{
+ unsigned int i;
+ pid_t child;
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ }
+ if (child) {
+ if (close(test_desc_fd))
+ printk("close(): %m");
+ return 0;
+ }
+
+ if (write_compat_struct_tests(test_desc_fd))
+ exit(KSFT_FAIL);
+
+ for (i = 0; i < ARRAY_SIZE(proto_list); i++) {
+ if (write_proto_plan(test_desc_fd, proto_list[i]))
+ exit(KSFT_FAIL);
+ }
+
+ exit(KSFT_PASS);
+}
+
+static int children_cleanup(void)
+{
+ unsigned ret = KSFT_PASS;
+
+ while (1) {
+ int status;
+ pid_t p = wait(&status);
+
+ if ((p < 0) && errno == ECHILD)
+ break;
+
+ if (p < 0) {
+ pr_err("wait()");
+ return KSFT_FAIL;
+ }
+
+ if (!WIFEXITED(status)) {
+ ret = KSFT_FAIL;
+ continue;
+ }
+
+ if (WEXITSTATUS(status) == KSFT_FAIL)
+ ret = KSFT_FAIL;
+ }
+
+ return ret;
+}
+
+typedef void (*print_res)(const char *, ...);
+
+static int check_results(void)
+{
+ struct test_result tr = {};
+ struct xfrm_desc *d = &tr.desc;
+ int ret = KSFT_PASS;
+
+ while (1) {
+ ssize_t received = read(results_fd[0], &tr, sizeof(tr));
+ print_res result;
+
+ if (received == 0) /* EOF */
+ break;
+
+ if (received != sizeof(tr)) {
+ pr_err("read() returned %zd", received);
+ return KSFT_FAIL;
+ }
+
+ switch (tr.res) {
+ case KSFT_PASS:
+ result = ksft_test_result_pass;
+ break;
+ case KSFT_FAIL:
+ default:
+ result = ksft_test_result_fail;
+ ret = KSFT_FAIL;
+ }
+
+ result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n",
+ desc_name[d->type], (unsigned int)d->proto, d->a_algo,
+ d->e_algo, d->c_algo, d->ae_algo, d->icv_len);
+ }
+
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int nr_process = 1;
+ int route_sock = -1, ret = KSFT_SKIP;
+ int test_desc_fd[2];
+ uint32_t route_seq;
+ unsigned int i;
+
+ if (argc > 2)
+ exit_usage(argv);
+
+ if (argc > 1) {
+ char *endptr;
+
+ errno = 0;
+ nr_process = strtol(argv[1], &endptr, 10);
+ if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN))
+ || (errno != 0 && nr_process == 0)
+ || (endptr == argv[1]) || (*endptr != '\0')) {
+ printk("Failed to parse [nr_process]");
+ exit_usage(argv);
+ }
+
+ if (nr_process > MAX_PROCESSES || !nr_process) {
+ printk("nr_process should be between [1; %u]",
+ MAX_PROCESSES);
+ exit_usage(argv);
+ }
+ }
+
+ srand(time(NULL));
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 1)
+ ksft_exit_skip("sysconf(): %m\n");
+
+ if (pipe2(test_desc_fd, O_DIRECT) < 0)
+ ksft_exit_skip("pipe(): %m\n");
+
+ if (pipe2(results_fd, O_DIRECT) < 0)
+ ksft_exit_skip("pipe(): %m\n");
+
+ if (init_namespaces())
+ ksft_exit_skip("Failed to create namespaces\n");
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+ ksft_exit_skip("Failed to open netlink route socket\n");
+
+ for (i = 0; i < nr_process; i++) {
+ char veth[VETH_LEN];
+
+ snprintf(veth, VETH_LEN, VETH_FMT, i);
+
+ if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) {
+ close(route_sock);
+ ksft_exit_fail_msg("Failed to create veth device");
+ }
+
+ if (start_child(i, veth, test_desc_fd)) {
+ close(route_sock);
+ ksft_exit_fail_msg("Child %u failed to start", i);
+ }
+ }
+
+ if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1]))
+ ksft_exit_fail_msg("close(): %m");
+
+ ksft_set_plan(proto_plan + compat_plan);
+
+ if (write_test_plan(test_desc_fd[1]))
+ ksft_exit_fail_msg("Failed to write test plan to pipe");
+
+ ret = check_results();
+
+ if (children_cleanup() == KSFT_FAIL)
+ exit(KSFT_FAIL);
+
+ exit(ret);
+}
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index f50976ee7d44..00bb158b4a5d 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -5,7 +5,8 @@ KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+ simult_flows.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 2499824d9e1c..741a1c4f4ae8 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,4 +1,7 @@
CONFIG_MPTCP=y
+CONFIG_IPV6=y
CONFIG_MPTCP_IPV6=y
+CONFIG_INET_DIAG=m
+CONFIG_INET_MPTCP_DIAG=m
CONFIG_VETH=y
CONFIG_NET_SCH_NETEM=m
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
new file mode 100755
index 000000000000..39edce4f541c
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns="ns1-$rndh"
+ksft_skip=4
+test_cnt=1
+ret=0
+pids=()
+
+flush_pids()
+{
+ # mptcp_connect in join mode will sleep a bit before completing,
+ # give it some time
+ sleep 1.1
+
+ for pid in ${pids[@]}; do
+ [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1
+ done
+ pids=()
+}
+
+cleanup()
+{
+ ip netns del $ns
+ for pid in ${pids[@]}; do
+ [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1
+ done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+ss -h | grep -q MPTCP
+if [ $? -ne 0 ];then
+ echo "SKIP: ss tool does not support MPTCP"
+ exit $ksft_skip
+fi
+
+__chk_nr()
+{
+ local condition="$1"
+ local expected=$2
+ local msg nr
+
+ shift 2
+ msg=$*
+ nr=$(ss -inmHMN $ns | $condition)
+
+ printf "%-50s" "$msg"
+ if [ $nr != $expected ]; then
+ echo "[ fail ] expected $expected found $nr"
+ ret=$test_cnt
+ else
+ echo "[ ok ]"
+ fi
+ test_cnt=$((test_cnt+1))
+}
+
+chk_msk_nr()
+{
+ __chk_nr "grep -c token:" $*
+}
+
+chk_msk_fallback_nr()
+{
+ __chk_nr "grep -c fallback" $*
+}
+
+chk_msk_remote_key_nr()
+{
+ __chk_nr "grep -c remote_key" $*
+}
+
+
+trap cleanup EXIT
+ip netns add $ns
+ip -n $ns link set dev lo up
+
+echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null &
+sleep 0.1
+pids[0]=$!
+chk_msk_nr 0 "no msk on netns creation"
+
+echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null &
+sleep 0.1
+pids[1]=$!
+chk_msk_nr 2 "after MPC handshake "
+chk_msk_remote_key_nr 2 "....chk remote_key"
+chk_msk_fallback_nr 0 "....chk no fallback"
+flush_pids
+
+
+echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null &
+pids[0]=$!
+sleep 0.1
+echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null &
+pids[1]=$!
+sleep 0.1
+chk_msk_fallback_nr 1 "check fallback"
+flush_pids
+
+NR_CLIENTS=100
+for I in `seq 1 $NR_CLIENTS`; do
+ echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null &
+ pids[$((I*2))]=$!
+done
+sleep 0.1
+
+for I in `seq 1 $NR_CLIENTS`; do
+ echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null &
+ pids[$((I*2 + 1))]=$!
+done
+sleep 1.5
+
+chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+flush_pids
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index cedee5b952ba..77bb62feb872 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -11,6 +11,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
+#include <signal.h>
#include <unistd.h>
#include <sys/poll.h>
@@ -36,6 +37,7 @@ extern int optind;
static int poll_timeout = 10 * 1000;
static bool listen_mode;
+static bool quit;
enum cfg_mode {
CFG_MODE_POLL,
@@ -52,22 +54,30 @@ static int pf = AF_INET;
static int cfg_sndbuf;
static int cfg_rcvbuf;
static bool cfg_join;
+static bool cfg_remove;
+static int cfg_wait;
static void die_usage(void)
{
fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
- "[-l] connect_address\n");
+ "[-l] [-w sec] connect_address\n");
fprintf(stderr, "\t-6 use ipv6\n");
fprintf(stderr, "\t-t num -- set poll timeout to num\n");
fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
fprintf(stderr, "\t-p num -- use port num\n");
- fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
- fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
+ fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
+ fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
+ fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
}
+static void handle_signal(int nr)
+{
+ quit = true;
+}
+
static const char *getxinfo_strerr(int err)
{
if (err == EAI_SYSTEM)
@@ -262,6 +272,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
if (cfg_join && first && do_w > 100)
do_w = 100;
+ if (cfg_remove && do_w > 50)
+ do_w = 50;
+
bw = write(fd, buf, do_w);
if (bw < 0)
perror("write");
@@ -272,6 +285,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
first = false;
}
+ if (cfg_remove)
+ usleep(200000);
+
return bw;
}
@@ -397,10 +413,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
/* ... but we still receive.
* Close our write side, ev. give some time
- * for address notification
+ * for address notification and/or checking
+ * the current status
*/
- if (cfg_join)
- usleep(400000);
+ if (cfg_wait)
+ usleep(cfg_wait);
shutdown(peerfd, SHUT_WR);
} else {
if (errno == EINTR)
@@ -418,8 +435,8 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
/* leave some time for late join/announce */
- if (cfg_join)
- usleep(400000);
+ if (cfg_join || cfg_remove)
+ usleep(cfg_wait);
close(peerfd);
return 0;
@@ -676,7 +693,7 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
- if (!cfg_join && (r & 1))
+ if (!(cfg_join || cfg_remove) && (r & 1))
close(fd);
}
@@ -812,11 +829,17 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) {
+ while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
cfg_mode = CFG_MODE_POLL;
+ cfg_wait = 400000;
+ break;
+ case 'r':
+ cfg_remove = true;
+ cfg_mode = CFG_MODE_POLL;
+ cfg_wait = 400000;
break;
case 'l':
listen_mode = true;
@@ -850,6 +873,9 @@ static void parse_opts(int argc, char **argv)
case 'R':
cfg_rcvbuf = parse_int(optarg);
break;
+ case 'w':
+ cfg_wait = atoi(optarg)*1000000;
+ break;
}
}
@@ -865,6 +891,7 @@ int main(int argc, char *argv[])
{
init_rng();
+ signal(SIGUSR1, handle_signal);
parse_opts(argc, argv);
if (tcpulp_audit)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index acf02e156d20..2cfd87d94db8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:"
+optstring="S:R:d:e:l:r:h4cm:f:t"
ret=0
sin=""
sout=""
@@ -14,13 +14,14 @@ capture=false
timeout=30
ipv6=true
ethtool_random_on=true
-tc_delay="$((RANDOM%400))"
+tc_delay="$((RANDOM%50))"
tc_loss=$((RANDOM%101))
-tc_reorder=""
testmode=""
sndbuf=0
rcvbuf=0
options_log=true
+do_tcp=0
+filesize=0
if [ $tc_loss -eq 100 ];then
tc_loss=1%
@@ -40,9 +41,11 @@ usage() {
echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-f: size of file to transfer in bytes (default random)"
echo -e "\t-S: set sndbuf value (default: use kernel default)"
echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
+ echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
}
while getopts "$optstring" option;do
@@ -94,6 +97,12 @@ while getopts "$optstring" option;do
"m")
testmode="$OPTARG"
;;
+ "f")
+ filesize="$OPTARG"
+ ;;
+ "t")
+ do_tcp=$((do_tcp+1))
+ ;;
"?")
usage $0
exit 1
@@ -186,6 +195,9 @@ ip -net "$ns4" link set ns4eth3 up
ip -net "$ns4" route add default via 10.0.3.2
ip -net "$ns4" route add default via dead:beef:3::2
+# use TCP syn cookies, even if no flooding was detected.
+ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+
set_ethtool_flags() {
local ns="$1"
local dev="$2"
@@ -385,14 +397,23 @@ do_transfer()
capuser="-Z $SUDO_USER"
fi
- local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap"
+ local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
+ local capopt="-i any -s 65535 -B 32768 ${capuser}"
+
+ ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ local cappid_listener=$!
- ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
- local cappid=$!
+ ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
sleep 1
fi
+ local stat_synrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done)
+ local stat_ackrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done)
+ local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done)
+ local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done)
+
ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
local spid=$!
@@ -413,7 +434,8 @@ do_transfer()
if $capture; then
sleep 1
- kill $cappid
+ kill ${cappid_listener}
+ kill ${cappid_connector}
fi
local duration
@@ -421,9 +443,9 @@ do_transfer()
duration=$(printf "(duration %05sms)" $duration)
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2
- echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
- echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
cat "$capout"
@@ -435,6 +457,45 @@ do_transfer()
check_transfer $cin $sout "file received by server"
rets=$?
+ local stat_synrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done)
+ local stat_ackrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done)
+
+ local stat_cookietx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done)
+ local stat_cookierx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done)
+
+ expect_synrx=$((stat_synrx_last_l))
+ expect_ackrx=$((stat_ackrx_last_l))
+
+ cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
+ cookies=${cookies##*=}
+
+ if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
+ expect_synrx=$((stat_synrx_last_l+1))
+ expect_ackrx=$((stat_ackrx_last_l+1))
+ fi
+ if [ $cookies -eq 2 ];then
+ if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+ echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: did not advance"
+ fi
+ if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
+ echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: did not advance"
+ fi
+ else
+ if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
+ echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: changed"
+ fi
+ if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
+ echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: changed"
+ fi
+ fi
+
+ if [ $expect_synrx -ne $stat_synrx_now_l ] ;then
+ echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}"
+ fi
+ if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then
+ echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}"
+ fi
+
if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
echo "$duration [ OK ]"
cat "$capout"
@@ -449,20 +510,25 @@ make_file()
{
local name=$1
local who=$2
+ local SIZE=$filesize
+ local ksize
+ local rem
+
+ if [ $SIZE -eq 0 ]; then
+ local MAXSIZE=$((1024 * 1024 * 8))
+ local MINSIZE=$((1024 * 256))
- local SIZE TSIZE
- SIZE=$((RANDOM % (1024 * 8)))
- TSIZE=$((SIZE * 1024))
+ SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE))
+ fi
- dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+ ksize=$((SIZE / 1024))
+ rem=$((SIZE - (ksize * 1024)))
- SIZE=$((RANDOM % 1024))
- SIZE=$((SIZE + 128))
- TSIZE=$((TSIZE + SIZE))
- dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
+ dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
+ dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
- echo "Created $name (size $TSIZE) containing data sent by $who"
+ echo "Created $name (size $(du -b "$name")) containing data sent by $who"
}
run_tests_lo()
@@ -497,9 +563,11 @@ run_tests_lo()
return 1
fi
- # don't bother testing fallback tcp except for loopback case.
- if [ ${listener_ns} != ${connector_ns} ]; then
- return 0
+ if [ $do_tcp -eq 0 ]; then
+ # don't bother testing fallback tcp except for loopback case.
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ return 0
+ fi
fi
do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
@@ -516,6 +584,15 @@ run_tests_lo()
return 1
fi
+ if [ $do_tcp -gt 1 ] ;then
+ do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr}
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return 1
+ fi
+ fi
+
return 0
}
@@ -550,30 +627,32 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns4" $sender dead:beef:3::1
done
-[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss
+[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
echo -n "INFO: Using loss of $tc_loss "
test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+reorder_delay=$(($tc_delay / 4))
+
if [ -z "${tc_reorder}" ]; then
reorder1=$((RANDOM%10))
reorder1=$((100 - reorder1))
reorder2=$((RANDOM%100))
- if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
+ if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
tc_reorder="reorder ${reorder1}% ${reorder2}%"
- echo -n "$tc_reorder "
+ echo -n "$tc_reorder with delay ${reorder_delay}ms "
fi
elif [ "$tc_reorder" = "0" ];then
tc_reorder=""
-elif [ "$tc_delay" -gt 0 ];then
+elif [ "$reorder_delay" -gt 0 ];then
# reordering requires some delay
tc_reorder="reorder $tc_reorder"
- echo -n "$tc_reorder "
+ echo -n "$tc_reorder with delay ${reorder_delay}ms "
fi
echo "on ns3eth4"
-tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder
+tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
for sender in $ns1 $ns2 $ns3 $ns4;do
run_tests_lo "$ns1" "$sender" 10.0.1.1 1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index dd42c2f692d0..08f53d86dedc 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,6 +8,7 @@ cin=""
cout=""
ksft_skip=4
timeout=30
+mptcp_connect=""
capture=0
TEST_COUNT=0
@@ -72,6 +73,15 @@ reset()
init
}
+reset_with_cookies()
+{
+ reset
+
+ for netns in "$ns1" "$ns2";do
+ ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2
+ done
+}
+
for arg in "$@"; do
if [ "$arg" = "-c" ]; then
capture=1
@@ -123,6 +133,8 @@ do_transfer()
cl_proto="$3"
srv_proto="$4"
connect_addr="$5"
+ rm_nr_ns1="$6"
+ rm_nr_ns2="$7"
port=$((10000+$TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))
@@ -138,7 +150,7 @@ do_transfer()
capuser="-Z $SUDO_USER"
fi
- capfile="mp_join-${listener_ns}.pcap"
+ capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}")
echo "Capturing traffic for test $TEST_COUNT into $capfile"
ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
@@ -147,14 +159,44 @@ do_transfer()
sleep 1
fi
- ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
+ if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then
+ mptcp_connect="./mptcp_connect -j"
+ else
+ mptcp_connect="./mptcp_connect -r"
+ fi
+
+ ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
spid=$!
sleep 1
- ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
cpid=$!
+ if [ $rm_nr_ns1 -gt 0 ]; then
+ counter=1
+ sleep 1
+
+ while [ $counter -le $rm_nr_ns1 ]
+ do
+ ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+ sleep 1
+ let counter+=1
+ done
+ fi
+
+ if [ $rm_nr_ns2 -gt 0 ]; then
+ counter=1
+ sleep 1
+
+ while [ $counter -le $rm_nr_ns2 ]
+ do
+ ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+ sleep 1
+ let counter+=1
+ done
+ fi
+
wait $cpid
retc=$?
wait $spid
@@ -167,9 +209,9 @@ do_transfer()
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo " client exit code $retc, server $rets" 1>&2
- echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
- echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
cat "$capout"
@@ -210,7 +252,24 @@ run_tests()
connect_addr="$3"
lret=0
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return
+ fi
+}
+
+run_remove_tests()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ connect_addr="$3"
+ rm_nr_ns1="$4"
+ rm_nr_ns2="$5"
+ lret=0
+
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2}
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
@@ -227,7 +286,7 @@ chk_join_nr()
local count
local dump_stats
- printf "%-36s %s" "$msg" "syn"
+ printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn"
count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$syn_nr" ]; then
@@ -267,6 +326,80 @@ chk_join_nr()
fi
}
+chk_add_nr()
+{
+ local add_nr=$1
+ local echo_nr=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "add"
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$add_nr" ]; then
+ echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - echo "
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$echo_nr" ]; then
+ echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
+chk_rm_nr()
+{
+ local rm_addr_nr=$1
+ local rm_subflow_nr=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "rm "
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$rm_addr_nr" ]; then
+ echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - sf "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$rm_subflow_nr" ]; then
+ echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
sin=$(mktemp)
sout=$(mktemp)
cin=$(mktemp)
@@ -323,6 +456,7 @@ reset
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "unused signal address" 0 0 0
+chk_add_nr 1 1
# accept and use add_addr
reset
@@ -331,6 +465,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address" 1 1 1
+chk_add_nr 1 1
# accept and use add_addr with an additional subflow
# note: signal address in server ns and local addresses in client ns must
@@ -343,6 +478,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and signal" 2 2 2
+chk_add_nr 1 1
# accept and use add_addr with additional subflows
reset
@@ -353,5 +489,114 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows and signal" 3 3 3
+chk_add_nr 1 1
+
+# single subflow, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 0 1
+chk_join_nr "remove single subflow" 1 1 1
+chk_rm_nr 1 1
+
+# multiple subflows, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 0 2
+chk_join_nr "remove multiple subflows" 2 2 2
+chk_rm_nr 2 2
+
+# single address, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+run_remove_tests $ns1 $ns2 10.0.1.1 1 0
+chk_join_nr "remove single address" 1 1 1
+chk_add_nr 1 1
+chk_rm_nr 0 0
+
+# subflow and signal, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 1 1
+chk_join_nr "remove subflow and signal" 2 2 2
+chk_add_nr 1 1
+chk_rm_nr 1 1
+
+# subflows and signal, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 1 2
+chk_join_nr "remove subflows and signal" 3 3 3
+chk_add_nr 1 1
+chk_rm_nr 2 2
+
+# single subflow, syncookies
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow with syn cookies" 1 1 1
+
+# multiple subflows with syn cookies
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows with syn cookies" 2 2 2
+
+# multiple subflows limited by server
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflows limited by server w cookies" 2 2 1
+
+# test signal address with cookies
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "signal address with syn cookies" 1 1 1
+chk_add_nr 1 1
+
+# test cookie with subflow and signal
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflow and signal w cookies" 2 2 2
+chk_add_nr 1 1
+
+# accept and use add_addr with additional subflows
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflows and signal w. cookies" 3 3 3
+chk_add_nr 1 1
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
new file mode 100755
index 000000000000..2f649b431456
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns3="ns3-$rndh"
+capture=false
+ksft_skip=4
+timeout=30
+test_cnt=1
+ret=0
+bail=0
+
+usage() {
+ echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
+ echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+ echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-d: debug this script"
+}
+
+cleanup()
+{
+ rm -f "$cin" "$cout"
+ rm -f "$sin" "$sout"
+ rm -f "$capout"
+
+ local netns
+ for netns in "$ns1" "$ns2" "$ns3";do
+ ip netns del $netns
+ done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+# "$ns1" ns2 ns3
+# ns1eth1 ns2eth1 ns2eth3 ns3eth1
+# netem
+# ns1eth2 ns2eth2
+# netem
+
+setup()
+{
+ large=$(mktemp)
+ small=$(mktemp)
+ sout=$(mktemp)
+ cout=$(mktemp)
+ capout=$(mktemp)
+ size=$((2048 * 4096))
+ dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
+ dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
+
+ trap cleanup EXIT
+
+ for i in "$ns1" "$ns2" "$ns3";do
+ ip netns add $i || exit $ksft_skip
+ ip -net $i link set lo up
+ done
+
+ ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+ ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
+ ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3"
+
+ ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1
+ ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad
+ ip -net "$ns1" link set ns1eth1 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.1.2
+ ip -net "$ns1" route add default via dead:beef:1::2
+
+ ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ ip -net "$ns1" link set ns1eth2 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.2.2 metric 101
+ ip -net "$ns1" route add default via dead:beef:2::2 metric 101
+
+ ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
+ ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
+ ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
+
+ ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ ip -net "$ns2" link set ns2eth1 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2
+ ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad
+ ip -net "$ns2" link set ns2eth2 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3
+ ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad
+ ip -net "$ns2" link set ns2eth3 up mtu 1500
+ ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
+ ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
+
+ ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1
+ ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad
+ ip -net "$ns3" link set ns3eth1 up mtu 1500
+ ip -net "$ns3" route add default via 10.0.3.2
+ ip -net "$ns3" route add default via dead:beef:3::2
+
+ ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+}
+
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
+
+do_transfer()
+{
+ local cin=$1
+ local sin=$2
+ local max_time=$3
+ local port
+ port=$((10000+$test_cnt))
+ test_cnt=$((test_cnt+1))
+
+ :> "$cout"
+ :> "$sout"
+ :> "$capout"
+
+ local addr_port
+ addr_port=$(printf "%s:%d" ${connect_addr} ${port})
+
+ if $capture; then
+ local capuser
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ local capfile="${rndh}-${port}"
+ local capopt="-i any -s 65535 -B 32768 ${capuser}"
+
+ ip netns exec ${ns3} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ local cappid_listener=$!
+
+ ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
+
+ sleep 1
+ fi
+
+ ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" &
+ local spid=$!
+
+ wait_local_port_listen "${ns3}" "${port}"
+
+ local start
+ start=$(date +%s%3N)
+ ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" &
+ local cpid=$!
+
+ wait $cpid
+ local retc=$?
+ wait $spid
+ local rets=$?
+
+ local stop
+ stop=$(date +%s%3N)
+
+ if $capture; then
+ sleep 1
+ kill ${cappid_listener}
+ kill ${cappid_connector}
+ fi
+
+ local duration
+ duration=$((stop-start))
+
+ cmp $sin $cout > /dev/null 2>&1
+ local cmps=$?
+ cmp $cin $sout > /dev/null 2>&1
+ local cmpc=$?
+
+ printf "%16s" "$duration max $max_time "
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
+ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \
+ [ $duration -lt $max_time ]; then
+ echo "[ OK ]"
+ cat "$capout"
+ return 0
+ fi
+
+ echo " [ fail ]"
+ echo "client exit code $retc, server $rets" 1>&2
+ echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
+ ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
+ echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
+ ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+ ls -l $sin $cout
+ ls -l $cin $sout
+
+ cat "$capout"
+ return 1
+}
+
+run_test()
+{
+ local rate1=$1
+ local rate2=$2
+ local delay1=$3
+ local delay2=$4
+ local lret
+ local dev
+ shift 4
+ local msg=$*
+
+ [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
+ [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
+
+ for dev in ns1eth1 ns1eth2; do
+ tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ for dev in ns2eth1 ns2eth2; do
+ tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+ # time is measure in ms
+ local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) ))
+
+ # mptcp_connect will do some sleeps to allow the mp_join handshake
+ # completion
+ time=$((time + 1350))
+
+ printf "%-50s" "$msg"
+ do_transfer $small $large $((time * 11 / 10))
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+
+ printf "%-50s" "$msg - reverse direction"
+ do_transfer $large $small $((time * 11 / 10))
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+}
+
+while getopts "bcdh" option;do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "b")
+ bail=1
+ ;;
+ "c")
+ capture=true
+ ;;
+ "d")
+ set -x
+ ;;
+ "?")
+ usage $0
+ exit 1
+ ;;
+ esac
+done
+
+setup
+run_test 10 10 0 0 "balanced bwidth"
+run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+
+# we still need some additional infrastructure to pass the following test-cases
+# run_test 30 10 0 0 "unbalanced bwidth"
+# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+exit $ret
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 4b02933cab8a..bdc03a2097e8 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -125,9 +125,8 @@ static int do_setcpu(int cpu)
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
if (sched_setaffinity(0, sizeof(mask), &mask))
- error(1, 0, "setaffinity %d", cpu);
-
- if (cfg_verbose)
+ fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
+ else if (cfg_verbose)
fprintf(stderr, "cpu: %u\n", cpu);
return 0;
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index 93208caacbe6..f75c53ce0a2d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -1667,6 +1667,8 @@ int main(int argc, char *argv[])
case 'R':
args.type = SOCK_RAW;
args.port = 0;
+ if (!args.protocol)
+ args.protocol = IPPROTO_RAW;
break;
case 'P':
pe = getprotobyname(optarg);
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 71a62e7e35b1..6bbf69a28e12 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -59,6 +59,45 @@
# Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
# VXLAN
#
+# - pmtu_ipv{4,6}_br_vxlan{4,6}_exception
+# Set up three namespaces, A, B, and C, with routing between A and B over
+# R1. R2 is unused in these tests. A has a veth connection to C, and is
+# connected to B via a VXLAN endpoint, which is directly bridged to C.
+# MTU on the B-R1 link is lower than other MTUs.
+#
+# Check that both C and A are able to communicate with B over the VXLAN
+# tunnel, and that PMTU exceptions with the correct values are created.
+#
+# segment a_r1 segment b_r1 b_r1: 4000
+# .--------------R1--------------. everything
+# C---veth A B else: 5000
+# ' bridge |
+# '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_br_geneve{4,6}_exception
+# Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel
+# instead.
+#
+# - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception
+# Set up two namespaces, B, and C, with routing between the init namespace
+# and B over R1. A and R2 are unused in these tests. The init namespace
+# has a veth connection to C, and is connected to B via a VXLAN endpoint,
+# which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link
+# is lower than other MTUs.
+#
+# Check that C is able to communicate with B over the VXLAN tunnel, and
+# that PMTU exceptions with the correct values are created.
+#
+# segment a_r1 segment b_r1 b_r1: 4000
+# .--------------R1--------------. everything
+# C---veth init B else: 5000
+# '- ovs |
+# '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception
+# Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel
+# instead.
+#
# - pmtu_ipv{4,6}_fou{4,6}_exception
# Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
# (FoU) over IPv4/IPv6, instead of VXLAN
@@ -67,6 +106,10 @@
# Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
# encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
#
+# - pmtu_ipv{4,6}_ipv{4,6}_exception
+# Same as pmtu_ipv4_vxlan4, but using a IPv4/IPv6 tunnel over IPv4/IPv6,
+# instead of VXLAN
+#
# - pmtu_vti4_exception
# Set up vti tunnel on top of veth, with xfrm states and policies, in two
# namespaces with matching endpoints. Check that route exception is not
@@ -143,6 +186,22 @@ tests="
pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1
pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1
pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1
+ pmtu_ipv4_br_vxlan4_exception IPv4, bridged vxlan4: PMTU exceptions 1
+ pmtu_ipv6_br_vxlan4_exception IPv6, bridged vxlan4: PMTU exceptions 1
+ pmtu_ipv4_br_vxlan6_exception IPv4, bridged vxlan6: PMTU exceptions 1
+ pmtu_ipv6_br_vxlan6_exception IPv6, bridged vxlan6: PMTU exceptions 1
+ pmtu_ipv4_br_geneve4_exception IPv4, bridged geneve4: PMTU exceptions 1
+ pmtu_ipv6_br_geneve4_exception IPv6, bridged geneve4: PMTU exceptions 1
+ pmtu_ipv4_br_geneve6_exception IPv4, bridged geneve6: PMTU exceptions 1
+ pmtu_ipv6_br_geneve6_exception IPv6, bridged geneve6: PMTU exceptions 1
+ pmtu_ipv4_ovs_vxlan4_exception IPv4, OVS vxlan4: PMTU exceptions 1
+ pmtu_ipv6_ovs_vxlan4_exception IPv6, OVS vxlan4: PMTU exceptions 1
+ pmtu_ipv4_ovs_vxlan6_exception IPv4, OVS vxlan6: PMTU exceptions 1
+ pmtu_ipv6_ovs_vxlan6_exception IPv6, OVS vxlan6: PMTU exceptions 1
+ pmtu_ipv4_ovs_geneve4_exception IPv4, OVS geneve4: PMTU exceptions 1
+ pmtu_ipv6_ovs_geneve4_exception IPv6, OVS geneve4: PMTU exceptions 1
+ pmtu_ipv4_ovs_geneve6_exception IPv4, OVS geneve6: PMTU exceptions 1
+ pmtu_ipv6_ovs_geneve6_exception IPv6, OVS geneve6: PMTU exceptions 1
pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1
pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1
pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1
@@ -151,6 +210,10 @@ tests="
pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1
pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1
pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1
+ pmtu_ipv4_ipv4_exception IPv4 over IPv4: PMTU exceptions 1
+ pmtu_ipv6_ipv4_exception IPv6 over IPv4: PMTU exceptions 1
+ pmtu_ipv4_ipv6_exception IPv4 over IPv6: PMTU exceptions 1
+ pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1
pmtu_vti6_exception vti6: PMTU exceptions 0
pmtu_vti4_exception vti4: PMTU exceptions 0
pmtu_vti4_default_mtu vti4: default MTU assignment 0
@@ -165,10 +228,12 @@ tests="
NS_A="ns-A"
NS_B="ns-B"
+NS_C="ns-C"
NS_R1="ns-R1"
NS_R2="ns-R2"
ns_a="ip netns exec ${NS_A}"
ns_b="ip netns exec ${NS_B}"
+ns_c="ip netns exec ${NS_C}"
ns_r1="ip netns exec ${NS_R1}"
ns_r2="ip netns exec ${NS_R2}"
@@ -231,9 +296,11 @@ routes_nh="
veth4_a_addr="192.168.1.1"
veth4_b_addr="192.168.1.2"
+veth4_c_addr="192.168.2.10"
veth4_mask="24"
veth6_a_addr="fd00:1::a"
veth6_b_addr="fd00:1::b"
+veth6_c_addr="fd00:2::c"
veth6_mask="64"
tunnel4_a_addr="192.168.2.1"
@@ -363,8 +430,64 @@ setup_gue66() {
setup_fou_or_gue 6 6 gue
}
+setup_ipvX_over_ipvY() {
+ inner=${1}
+ outer=${2}
+
+ if [ "${outer}" -eq 4 ]; then
+ a_addr="${prefix4}.${a_r1}.1"
+ b_addr="${prefix4}.${b_r1}.1"
+ if [ "${inner}" -eq 4 ]; then
+ type="ipip"
+ mode="ipip"
+ else
+ type="sit"
+ mode="ip6ip"
+ fi
+ else
+ a_addr="${prefix6}:${a_r1}::1"
+ b_addr="${prefix6}:${b_r1}::1"
+ type="ip6tnl"
+ if [ "${inner}" -eq 4 ]; then
+ mode="ipip6"
+ else
+ mode="ip6ip6"
+ fi
+ fi
+
+ run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2
+ run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
+
+ run_cmd ${ns_a} ip link set ip_a up
+ run_cmd ${ns_b} ip link set ip_b up
+
+ if [ "${inner}" = "4" ]; then
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ip_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ip_b
+ else
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ip_a
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ip_b
+ fi
+}
+
+setup_ip4ip4() {
+ setup_ipvX_over_ipvY 4 4
+}
+
+setup_ip6ip4() {
+ setup_ipvX_over_ipvY 6 4
+}
+
+setup_ip4ip6() {
+ setup_ipvX_over_ipvY 4 6
+}
+
+setup_ip6ip6() {
+ setup_ipvX_over_ipvY 6 6
+}
+
setup_namespaces() {
- for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
ip netns add ${n} || return 1
# Disable DAD, so that we don't have to wait to use the
@@ -420,6 +543,7 @@ setup_vxlan_or_geneve() {
a_addr="${2}"
b_addr="${3}"
opts="${4}"
+ br_if_a="${5}"
if [ "${type}" = "vxlan" ]; then
opts="${opts} ttl 64 dstport 4789"
@@ -433,10 +557,16 @@ setup_vxlan_or_geneve() {
run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
- run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
- run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ if [ -n "${br_if_a}" ]; then
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a}
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a}
+ run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a}
+ else
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ fi
- run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
run_cmd ${ns_a} ip link set ${type}_a up
@@ -452,11 +582,27 @@ setup_vxlan4() {
}
setup_geneve6() {
- setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+ setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
}
setup_vxlan6() {
- setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+ setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
+}
+
+setup_bridged_geneve4() {
+ setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
+}
+
+setup_bridged_vxlan4() {
+ setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
+}
+
+setup_bridged_geneve6() {
+ setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
+}
+
+setup_bridged_vxlan6() {
+ setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
}
setup_xfrm() {
@@ -566,6 +712,80 @@ setup_routing() {
return 0
}
+setup_bridge() {
+ run_cmd ${ns_a} ip link add br0 type bridge || return 2
+ run_cmd ${ns_a} ip link set br0 up
+
+ run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+ run_cmd ${ns_c} ip link set veth_A-C netns ns-A
+
+ run_cmd ${ns_a} ip link set veth_A-C up
+ run_cmd ${ns_c} ip link set veth_C-A up
+ run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+ run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+ run_cmd ${ns_a} ip link set veth_A-C master br0
+}
+
+setup_ovs_vxlan_or_geneve() {
+ type="${1}"
+ a_addr="${2}"
+ b_addr="${3}"
+
+ if [ "${type}" = "vxlan" ]; then
+ opts="${opts} ttl 64 dstport 4789"
+ opts_b="local ${b_addr}"
+ fi
+
+ run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
+ set interface ${type}_a type=${type} \
+ options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
+
+ run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1
+
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+
+ run_cmd ${ns_b} ip link set ${type}_b up
+}
+
+setup_ovs_geneve4() {
+ setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_vxlan4() {
+ setup_ovs_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_geneve6() {
+ setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_vxlan6() {
+ setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_bridge() {
+ run_cmd ovs-vsctl add-br ovs_br0 || return 2
+ run_cmd ip link set ovs_br0 up
+
+ run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+ run_cmd ${ns_c} ip link set veth_A-C netns 1
+
+ run_cmd ip link set veth_A-C up
+ run_cmd ${ns_c} ip link set veth_C-A up
+ run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+ run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+ run_cmd ovs-vsctl add-port ovs_br0 veth_A-C
+
+ # Move veth_A-R1 to init
+ run_cmd ${ns_a} ip link set veth_A-R1 netns 1
+ run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1
+ run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1
+ run_cmd ip link set veth_A-R1 up
+ run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2
+ run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
+}
+
setup() {
[ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
@@ -593,9 +813,14 @@ cleanup() {
done
tcpdump_pids=
- for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
ip netns del ${n} 2> /dev/null
done
+
+ ip link del veth_A-C 2>/dev/null
+ ip link del veth_A-R1 2>/dev/null
+ ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
+ ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
}
mtu() {
@@ -828,6 +1053,177 @@ test_pmtu_ipv6_geneve6_exception() {
test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
}
+test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
+ type=${1}
+ family=${2}
+ outer_family=${3}
+ ll_mtu=4000
+
+ if [ ${outer_family} -eq 4 ]; then
+ setup namespaces routing bridge bridged_${type}4 || return 2
+ # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
+ else
+ setup namespaces routing bridge bridged_${type}6 || return 2
+ # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
+ fi
+
+ trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \
+ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
+ "${ns_a}" br0 "${ns_a}" veth-A-C \
+ "${ns_c}" veth_C-A
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "${ns_a}" br0 $((${ll_mtu} + 1000))
+ mtu "${ns_a}" veth_A-C $((${ll_mtu} + 1000))
+ mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
+ mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+ run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+}
+
+test_pmtu_ipv4_br_vxlan4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 4
+}
+
+test_pmtu_ipv6_br_vxlan4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 4
+}
+
+test_pmtu_ipv4_br_geneve4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_br_geneve4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_br_vxlan6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 6
+}
+
+test_pmtu_ipv6_br_vxlan6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 6
+}
+
+test_pmtu_ipv4_br_geneve6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_br_geneve6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6
+}
+
+test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
+ type=${1}
+ family=${2}
+ outer_family=${3}
+ ll_mtu=4000
+
+ if [ ${outer_family} -eq 4 ]; then
+ setup namespaces routing ovs_bridge ovs_${type}4 || return 2
+ # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
+ else
+ setup namespaces routing ovs_bridge ovs_${type}6 || return 2
+ # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
+ fi
+
+ if [ "${type}" = "vxlan" ]; then
+ tun_a="vxlan_sys_4789"
+ elif [ "${type}" = "geneve" ]; then
+ tun_a="genev_sys_6081"
+ fi
+
+ trace "" "${tun_a}" "${ns_b}" ${type}_b \
+ "" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
+ "" ovs_br0 "" veth-A-C \
+ "${ns_c}" veth_C-A
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "" ovs_br0 $((${ll_mtu} + 1000))
+ mtu "" veth_A-C $((${ll_mtu} + 1000))
+ mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "" ${tun_a} $((${ll_mtu} + 1000))
+ mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+ run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface"
+}
+
+test_pmtu_ipv4_ovs_vxlan4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 4
+}
+
+test_pmtu_ipv6_ovs_vxlan4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 4
+}
+
+test_pmtu_ipv4_ovs_geneve4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_ovs_geneve4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_ovs_vxlan6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 6
+}
+
+test_pmtu_ipv6_ovs_vxlan6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 6
+}
+
+test_pmtu_ipv4_ovs_geneve6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_ovs_geneve6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6
+}
+
test_pmtu_ipvX_over_fouY_or_gueY() {
inner_family=${1}
outer_family=${2}
@@ -908,6 +1304,64 @@ test_pmtu_ipv6_gue6_exception() {
test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
}
+test_pmtu_ipvX_over_ipvY_exception() {
+ inner=${1}
+ outer=${2}
+ ll_mtu=4000
+
+ setup namespaces routing ip${inner}ip${outer} || return 2
+
+ trace "${ns_a}" ip_a "${ns_b}" ip_b \
+ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
+
+ if [ ${inner} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ if [ ${outer} -eq 4 ]; then
+ # IPv4 header
+ exp_mtu=$((${ll_mtu} - 20))
+ else
+ # IPv6 header Option 4
+ exp_mtu=$((${ll_mtu} - 40 - 8))
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "${ns_a}" ip_a $((${ll_mtu} + 1000)) || return
+ mtu "${ns_b}" ip_b $((${ll_mtu} + 1000)) || return
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
+
+ # Check that exception was created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ip${inner}ip${outer} interface"
+}
+
+test_pmtu_ipv4_ipv4_exception() {
+ test_pmtu_ipvX_over_ipvY_exception 4 4
+}
+
+test_pmtu_ipv6_ipv4_exception() {
+ test_pmtu_ipvX_over_ipvY_exception 6 4
+}
+
+test_pmtu_ipv4_ipv6_exception() {
+ test_pmtu_ipvX_over_ipvY_exception 4 6
+}
+
+test_pmtu_ipv6_ipv6_exception() {
+ test_pmtu_ipvX_over_ipvY_exception 6 6
+}
+
test_pmtu_vti4_exception() {
setup namespaces veth vti4 xfrm4 || return 2
trace "${ns_a}" veth_a "${ns_b}" veth_b \
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 8c8c7d79c38d..2c522f7a0aec 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -350,7 +350,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
int fds[2], fds_udp[2][2], ret;
fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
- typeflags, PORT_BASE, PORT_BASE + port_off);
+ typeflags, (uint16_t)PORT_BASE,
+ (uint16_t)(PORT_BASE + port_off));
fds[0] = sock_fanout_open(typeflags, 0);
fds[1] = sock_fanout_open(typeflags, 0);
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 6331d91b86a6..170be65e0816 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -45,7 +45,7 @@ echo "raw vnet hdr"
echo "raw csum_off"
./in_netns.sh ./psock_snd -v -c
-echo "raw csum_off with bad offset (fails)"
+echo "raw csum_off with bad offset (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -C)
@@ -57,7 +57,7 @@ echo "raw min size"
echo "raw mtu size"
./in_netns.sh ./psock_snd -l "${mss}"
-echo "raw mtu size + 1 (fails)"
+echo "raw mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
@@ -65,19 +65,19 @@ echo "raw mtu size + 1 (fails)"
# echo "raw vlan mtu size"
# ./in_netns.sh ./psock_snd -V -l "${mss}"
-echo "raw vlan mtu size + 1 (fails)"
+echo "raw vlan mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
echo "dgram mtu size"
./in_netns.sh ./psock_snd -d -l "${mss}"
-echo "dgram mtu size + 1 (fails)"
+echo "dgram mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
-echo "raw truncate hlen (fails: does not arrive)"
+echo "raw truncate hlen (expected to fail: does not arrive)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
-echo "raw truncate hlen - 1 (fails: EINVAL)"
+echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
@@ -86,13 +86,13 @@ echo "raw truncate hlen - 1 (fails: EINVAL)"
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (fails)"
+echo "raw gso min size - 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
-echo "raw gso max size + 1 (fails)"
+echo "raw gso max size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index bdbf4b3125b6..c9ce3dfa42ee 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -5,7 +5,6 @@
# set -e
devdummy="test-dummy0"
-ret=0
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -66,7 +65,7 @@ kci_test_bridge()
devbr="test-br0"
vlandev="testbr-vlan1"
- ret=0
+ local ret=0
ip link add name "$devbr" type bridge
check_err $?
@@ -113,7 +112,7 @@ kci_test_gre()
rem=10.42.42.1
loc=10.0.0.1
- ret=0
+ local ret=0
ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
check_err $?
ip link set $gredev up
@@ -149,7 +148,7 @@ kci_test_gre()
kci_test_tc()
{
dev=lo
- ret=0
+ local ret=0
tc qdisc add dev "$dev" root handle 1: htb
check_err $?
@@ -184,7 +183,7 @@ kci_test_tc()
kci_test_polrouting()
{
- ret=0
+ local ret=0
ip rule add fwmark 1 lookup 100
check_err $?
ip route add local 0.0.0.0/0 dev lo table 100
@@ -207,7 +206,7 @@ kci_test_route_get()
{
local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
- ret=0
+ local ret=0
ip route get 127.0.0.1 > /dev/null
check_err $?
@@ -290,7 +289,7 @@ kci_test_promote_secondaries()
kci_test_addrlabel()
{
- ret=0
+ local ret=0
ip addrlabel add prefix dead::/64 dev lo label 1
check_err $?
@@ -330,7 +329,7 @@ kci_test_addrlabel()
kci_test_ifalias()
{
- ret=0
+ local ret=0
namewant=$(uuidgen)
syspathname="/sys/class/net/$devdummy/ifalias"
@@ -385,7 +384,7 @@ kci_test_ifalias()
kci_test_vrf()
{
vrfname="test-vrf"
- ret=0
+ local ret=0
ip link show type vrf 2>/dev/null
if [ $? -ne 0 ]; then
@@ -425,7 +424,7 @@ kci_test_vrf()
kci_test_encap_vxlan()
{
- ret=0
+ local ret=0
vxlan="test-vxlan0"
vlan="test-vlan0"
testns="$1"
@@ -511,7 +510,7 @@ kci_test_encap_vxlan()
kci_test_encap_fou()
{
- ret=0
+ local ret=0
name="test-fou"
testns="$1"
@@ -521,6 +520,11 @@ kci_test_encap_fou()
return $ksft_skip
fi
+ if ! /sbin/modprobe -q -n fou; then
+ echo "SKIP: module fou is not found"
+ return $ksft_skip
+ fi
+ /sbin/modprobe -q fou
ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null
if [ $? -ne 0 ];then
echo "FAIL: can't add fou port 7777, skipping test"
@@ -548,7 +552,7 @@ kci_test_encap_fou()
kci_test_encap()
{
testns="testns"
- ret=0
+ local ret=0
ip netns add "$testns"
if [ $? -ne 0 ]; then
@@ -565,15 +569,18 @@ kci_test_encap()
check_err $?
kci_test_encap_vxlan "$testns"
+ check_err $?
kci_test_encap_fou "$testns"
+ check_err $?
ip netns del "$testns"
+ return $ret
}
kci_test_macsec()
{
msname="test_macsec0"
- ret=0
+ local ret=0
ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
if [ $? -ne 0 ]; then
@@ -631,7 +638,7 @@ kci_test_macsec()
#-------------------------------------------------------------------
kci_test_ipsec()
{
- ret=0
+ local ret=0
algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
srcip=192.168.123.1
dstip=192.168.123.2
@@ -731,7 +738,7 @@ kci_test_ipsec()
#-------------------------------------------------------------------
kci_test_ipsec_offload()
{
- ret=0
+ local ret=0
algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
srcip=192.168.123.3
dstip=192.168.123.4
@@ -841,7 +848,7 @@ kci_test_gretap()
{
testns="testns"
DEV_NS=gretap00
- ret=0
+ local ret=0
ip netns add "$testns"
if [ $? -ne 0 ]; then
@@ -891,7 +898,7 @@ kci_test_ip6gretap()
{
testns="testns"
DEV_NS=ip6gretap00
- ret=0
+ local ret=0
ip netns add "$testns"
if [ $? -ne 0 ]; then
@@ -941,7 +948,7 @@ kci_test_erspan()
{
testns="testns"
DEV_NS=erspan00
- ret=0
+ local ret=0
ip link help erspan 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
@@ -1006,7 +1013,7 @@ kci_test_ip6erspan()
{
testns="testns"
DEV_NS=ip6erspan00
- ret=0
+ local ret=0
ip link help ip6erspan 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
@@ -1077,7 +1084,7 @@ kci_test_fdb_get()
test_mac=de:ad:be:ef:13:37
localip="10.0.2.2"
dstip="10.0.2.3"
- ret=0
+ local ret=0
bridge fdb help 2>&1 |grep -q 'bridge fdb get'
if [ $? -ne 0 ];then
@@ -1125,7 +1132,7 @@ kci_test_neigh_get()
dstmac=de:ad:be:ef:13:37
dstip=10.0.2.4
dstip6=dead::2
- ret=0
+ local ret=0
ip neigh help 2>&1 |grep -q 'ip neigh get'
if [ $? -ne 0 ];then
@@ -1173,8 +1180,54 @@ kci_test_neigh_get()
echo "PASS: neigh get"
}
+kci_test_bridge_parent_id()
+{
+ local ret=0
+ sysfsnet=/sys/bus/netdevsim/devices/netdevsim
+ probed=false
+
+ if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+ modprobe -q netdevsim
+ check_err $?
+ if [ $ret -ne 0 ]; then
+ echo "SKIP: bridge_parent_id can't load netdevsim"
+ return $ksft_skip
+ fi
+ probed=true
+ fi
+
+ echo "10 1" > /sys/bus/netdevsim/new_device
+ while [ ! -d ${sysfsnet}10 ] ; do :; done
+ echo "20 1" > /sys/bus/netdevsim/new_device
+ while [ ! -d ${sysfsnet}20 ] ; do :; done
+ udevadm settle
+ dev10=`ls ${sysfsnet}10/net/`
+ dev20=`ls ${sysfsnet}20/net/`
+
+ ip link add name test-bond0 type bond mode 802.3ad
+ ip link set dev $dev10 master test-bond0
+ ip link set dev $dev20 master test-bond0
+ ip link add name test-br0 type bridge
+ ip link set dev test-bond0 master test-br0
+ check_err $?
+
+ # clean up any leftovers
+ ip link del dev test-br0
+ ip link del dev test-bond0
+ echo 20 > /sys/bus/netdevsim/del_device
+ echo 10 > /sys/bus/netdevsim/del_device
+ $probed && rmmod netdevsim
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: bridge_parent_id"
+ return 1
+ fi
+ echo "PASS: bridge_parent_id"
+}
+
kci_test_rtnl()
{
+ local ret=0
kci_add_dummy
if [ $ret -ne 0 ];then
echo "FAIL: cannot add dummy interface"
@@ -1182,27 +1235,50 @@ kci_test_rtnl()
fi
kci_test_polrouting
+ check_err $?
kci_test_route_get
+ check_err $?
kci_test_addrlft
+ check_err $?
kci_test_promote_secondaries
+ check_err $?
kci_test_tc
+ check_err $?
kci_test_gre
+ check_err $?
kci_test_gretap
+ check_err $?
kci_test_ip6gretap
+ check_err $?
kci_test_erspan
+ check_err $?
kci_test_ip6erspan
+ check_err $?
kci_test_bridge
+ check_err $?
kci_test_addrlabel
+ check_err $?
kci_test_ifalias
+ check_err $?
kci_test_vrf
+ check_err $?
kci_test_encap
+ check_err $?
kci_test_macsec
+ check_err $?
kci_test_ipsec
+ check_err $?
kci_test_ipsec_offload
+ check_err $?
kci_test_fdb_get
+ check_err $?
kci_test_neigh_get
+ check_err $?
+ kci_test_bridge_parent_id
+ check_err $?
kci_del_dummy
+ return $ret
}
#check for needed privileges
@@ -1221,4 +1297,4 @@ done
kci_test_rtnl
-exit $ret
+exit $?
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 6dee9e636a95..e4613ce4ed69 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -44,6 +44,7 @@ struct test_case {
struct options sockopt;
struct tstamps expected;
bool enabled;
+ bool warn_on_fail;
};
struct sof_flag {
@@ -67,44 +68,44 @@ static struct socket_type socket_types[] = {
static struct test_case test_cases[] = {
{ {}, {} },
{
- { so_timestamp: 1 },
- { tstamp: true }
+ { .so_timestamp = 1 },
+ { .tstamp = true }
},
{
- { so_timestampns: 1 },
- { tstampns: true }
+ { .so_timestampns = 1 },
+ { .tstampns = true }
},
{
- { so_timestamp: 1, so_timestampns: 1 },
- { tstampns: true }
+ { .so_timestamp = 1, .so_timestampns = 1 },
+ { .tstampns = true }
},
{
- { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+ { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE },
{}
},
{
/* Loopback device does not support hw timestamps. */
- { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+ { .so_timestamping = SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
- { so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
- {}
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE },
+ .warn_on_fail = true
},
{
- { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+ { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE
| SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
- { so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
- { swtstamp: true }
+ { .swtstamp = true }
},
{
- { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ { .so_timestamp = 1, .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
- { tstamp: true, swtstamp: true }
+ { .tstamp = true, .swtstamp = true }
},
};
@@ -115,6 +116,10 @@ static struct option long_options[] = {
{ "tcp", no_argument, 0, 't' },
{ "udp", no_argument, 0, 'u' },
{ "ip", no_argument, 0, 'i' },
+ { "strict", no_argument, 0, 'S' },
+ { "ipv4", no_argument, 0, '4' },
+ { "ipv6", no_argument, 0, '6' },
+ { NULL, 0, NULL, 0 },
};
static int next_port = 19999;
@@ -269,37 +274,55 @@ void config_so_flags(int rcv, struct options o)
error(1, errno, "Failed to set SO_TIMESTAMPING");
}
-bool run_test_case(struct socket_type s, struct test_case t)
+bool run_test_case(struct socket_type *s, int test_num, char ip_version,
+ bool strict)
{
- int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+ union {
+ struct sockaddr_in6 addr6;
+ struct sockaddr_in addr4;
+ struct sockaddr addr_un;
+ } addr;
int read_size = op_size;
- struct sockaddr_in addr;
+ int src, dst, rcv, port;
+ socklen_t addr_size;
bool failed = false;
- int src, dst, rcv;
- src = socket(AF_INET, s.type, s.protocol);
+ port = (s->type == SOCK_RAW) ? 0 : next_port++;
+ memset(&addr, 0, sizeof(addr));
+ if (ip_version == '4') {
+ addr.addr4.sin_family = AF_INET;
+ addr.addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr.addr4.sin_port = htons(port);
+ addr_size = sizeof(addr.addr4);
+ if (s->type == SOCK_RAW)
+ read_size += 20; /* for IPv4 header */
+ } else {
+ addr.addr6.sin6_family = AF_INET6;
+ addr.addr6.sin6_addr = in6addr_loopback;
+ addr.addr6.sin6_port = htons(port);
+ addr_size = sizeof(addr.addr6);
+ }
+ printf("Starting testcase %d over ipv%c...\n", test_num, ip_version);
+ src = socket(addr.addr_un.sa_family, s->type,
+ s->protocol);
if (src < 0)
error(1, errno, "Failed to open src socket");
- dst = socket(AF_INET, s.type, s.protocol);
+ dst = socket(addr.addr_un.sa_family, s->type,
+ s->protocol);
if (dst < 0)
error(1, errno, "Failed to open dst socket");
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addr.sin_port = htons(port);
-
- if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ if (bind(dst, &addr.addr_un, addr_size) < 0)
error(1, errno, "Failed to bind to port %d", port);
- if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+ if (s->type == SOCK_STREAM && (listen(dst, 1) < 0))
error(1, errno, "Failed to listen");
- if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ if (connect(src, &addr.addr_un, addr_size) < 0)
error(1, errno, "Failed to connect");
- if (s.type == SOCK_STREAM) {
+ if (s->type == SOCK_STREAM) {
rcv = accept(dst, NULL, NULL);
if (rcv < 0)
error(1, errno, "Failed to accept");
@@ -308,17 +331,22 @@ bool run_test_case(struct socket_type s, struct test_case t)
rcv = dst;
}
- config_so_flags(rcv, t.sockopt);
+ config_so_flags(rcv, test_cases[test_num].sockopt);
usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
do_send(src);
- if (s.type == SOCK_RAW)
- read_size += 20; /* for IP header */
- failed = do_recv(rcv, read_size, t.expected);
+ failed = do_recv(rcv, read_size, test_cases[test_num].expected);
close(rcv);
close(src);
+ if (failed) {
+ printf("FAILURE in testcase %d over ipv%c ", test_num,
+ ip_version);
+ print_test_case(&test_cases[test_num]);
+ if (!strict && test_cases[test_num].warn_on_fail)
+ failed = false;
+ }
return failed;
}
@@ -326,10 +354,12 @@ int main(int argc, char **argv)
{
bool all_protocols = true;
bool all_tests = true;
+ bool cfg_ipv4 = false;
+ bool cfg_ipv6 = false;
+ bool strict = false;
int arg_index = 0;
int failures = 0;
- int s, t;
- char opt;
+ int s, t, opt;
while ((opt = getopt_long(argc, argv, "", long_options,
&arg_index)) != -1) {
@@ -362,6 +392,15 @@ int main(int argc, char **argv)
all_protocols = false;
socket_types[0].enabled = true;
break;
+ case 'S':
+ strict = true;
+ break;
+ case '4':
+ cfg_ipv4 = true;
+ break;
+ case '6':
+ cfg_ipv6 = true;
+ break;
default:
error(1, 0, "Failed to parse parameters.");
}
@@ -375,13 +414,14 @@ int main(int argc, char **argv)
for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
if (!all_tests && !test_cases[t].enabled)
continue;
-
- printf("Starting testcase %d...\n", t);
- if (run_test_case(socket_types[s], test_cases[t])) {
- failures++;
- printf("FAILURE in test case ");
- print_test_case(&test_cases[t]);
- }
+ if (cfg_ipv4 || !cfg_ipv6)
+ if (run_test_case(&socket_types[s], t, '4',
+ strict))
+ failures++;
+ if (cfg_ipv6 || !cfg_ipv4)
+ if (run_test_case(&socket_types[s], t, '6',
+ strict))
+ failures++;
}
}
if (!failures)
diff --git a/tools/testing/selftests/net/rxtimestamp.sh b/tools/testing/selftests/net/rxtimestamp.sh
new file mode 100755
index 000000000000..91631e88bf46
--- /dev/null
+++ b/tools/testing/selftests/net/rxtimestamp.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh ./rxtimestamp $@
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 383bac05ac32..3155fbbf644b 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -15,8 +15,9 @@
#include <inttypes.h>
#include <linux/net_tstamp.h>
#include <linux/errqueue.h>
+#include <linux/if_ether.h>
#include <linux/ipv6.h>
-#include <linux/tcp.h>
+#include <linux/udp.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
@@ -120,7 +121,7 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
if (rbuf[0] != ts->data)
error(1, 0, "payload mismatch. expected %c", ts->data);
- if (labs(tstop - texpect) > cfg_variance_us)
+ if (llabs(tstop - texpect) > cfg_variance_us)
error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
return false;
@@ -140,8 +141,8 @@ static void do_recv_errqueue_timeout(int fdt)
{
char control[CMSG_SPACE(sizeof(struct sock_extended_err)) +
CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
- char data[sizeof(struct ipv6hdr) +
- sizeof(struct tcphdr) + 1];
+ char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) +
+ sizeof(struct udphdr) + 1];
struct sock_extended_err *err;
struct msghdr msg = {0};
struct iovec iov = {0};
@@ -159,6 +160,8 @@ static void do_recv_errqueue_timeout(int fdt)
msg.msg_controllen = sizeof(control);
while (1) {
+ const char *reason;
+
ret = recvmsg(fdt, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
break;
@@ -176,14 +179,30 @@ static void do_recv_errqueue_timeout(int fdt)
err = (struct sock_extended_err *)CMSG_DATA(cm);
if (err->ee_origin != SO_EE_ORIGIN_TXTIME)
error(1, 0, "errqueue: origin 0x%x\n", err->ee_origin);
- if (err->ee_code != ECANCELED)
- error(1, 0, "errqueue: code 0x%x\n", err->ee_code);
+
+ switch (err->ee_errno) {
+ case ECANCELED:
+ if (err->ee_code != SO_EE_CODE_TXTIME_MISSED)
+ error(1, 0, "errqueue: unknown ECANCELED %u\n",
+ err->ee_code);
+ reason = "missed txtime";
+ break;
+ case EINVAL:
+ if (err->ee_code != SO_EE_CODE_TXTIME_INVALID_PARAM)
+ error(1, 0, "errqueue: unknown EINVAL %u\n",
+ err->ee_code);
+ reason = "invalid txtime";
+ break;
+ default:
+ error(1, 0, "errqueue: errno %u code %u\n",
+ err->ee_errno, err->ee_code);
+ };
tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info;
tstamp -= (int64_t) glob_tstart;
tstamp /= 1000 * 1000;
- fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped\n",
- data[ret - 1], tstamp);
+ fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped: %s\n",
+ data[ret - 1], tstamp, reason);
msg.msg_flags = 0;
msg.msg_controllen = sizeof(control);
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index 4555f88252ba..00f837c9bc6c 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -123,6 +123,28 @@ void hash_zone(void *zone, unsigned int length)
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+
+static void *mmap_large_buffer(size_t need, size_t *allocated)
+{
+ void *buffer;
+ size_t sz;
+
+ /* Attempt to use huge pages if possible. */
+ sz = ALIGN_UP(need, map_align);
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+
+ if (buffer == (void *)-1) {
+ sz = need;
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buffer != (void *)-1)
+ fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
+ }
+ *allocated = sz;
+ return buffer;
+}
+
void *child_thread(void *arg)
{
unsigned long total_mmap = 0, total = 0;
@@ -135,6 +157,7 @@ void *child_thread(void *arg)
void *addr = NULL;
double throughput;
struct rusage ru;
+ size_t buffer_sz;
int lu, fd;
fd = (int)(unsigned long)arg;
@@ -142,9 +165,9 @@ void *child_thread(void *arg)
gettimeofday(&t0, NULL);
fcntl(fd, F_SETFL, O_NDELAY);
- buffer = malloc(chunk_size);
- if (!buffer) {
- perror("malloc");
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
+ if (buffer == (void *)-1) {
+ perror("mmap");
goto error;
}
if (zflg) {
@@ -179,6 +202,10 @@ void *child_thread(void *arg)
total_mmap += zc.length;
if (xflg)
hash_zone(addr, zc.length);
+ /* It is more efficient to unmap the pages right now,
+ * instead of doing this in next TCP_ZEROCOPY_RECEIVE.
+ */
+ madvise(addr, zc.length, MADV_DONTNEED);
total += zc.length;
}
if (zc.recv_skip_hint) {
@@ -230,7 +257,7 @@ end:
ru.ru_nvcsw);
}
error:
- free(buffer);
+ munmap(buffer, buffer_sz);
close(fd);
if (zflg)
munmap(raddr, chunk_size + map_align);
@@ -344,9 +371,10 @@ int main(int argc, char *argv[])
{
struct sockaddr_storage listenaddr, addr;
unsigned int max_pacing_rate = 0;
- size_t total = 0;
+ uint64_t total = 0;
char *host = NULL;
int fd, c, on = 1;
+ size_t buffer_sz;
char *buffer;
int sflg = 0;
int mss = 0;
@@ -437,8 +465,8 @@ int main(int argc, char *argv[])
}
do_accept(fdlisten);
}
- buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
if (buffer == (char *)-1) {
perror("mmap");
exit(1);
@@ -473,17 +501,17 @@ int main(int argc, char *argv[])
zflg = 0;
}
while (total < FILE_SZ) {
- ssize_t wr = FILE_SZ - total;
+ int64_t wr = FILE_SZ - total;
if (wr > chunk_size)
wr = chunk_size;
/* Note : we just want to fill the pipe with 0 bytes */
- wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+ wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0);
if (wr <= 0)
break;
total += wr;
}
close(fd);
- munmap(buffer, chunk_size);
+ munmap(buffer, buffer_sz);
return 0;
}
diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index aca3491174a1..f4bb4fef0f39 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -313,10 +313,16 @@ int main(int argc, char **argv)
int val;
socklen_t len;
struct timeval next;
+ size_t if_len;
if (argc < 2)
usage(0);
interface = argv[1];
+ if_len = strlen(interface);
+ if (if_len >= IFNAMSIZ) {
+ printf("interface name exceeds IFNAMSIZ\n");
+ exit(1);
+ }
for (i = 2; i < argc; i++) {
if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
@@ -350,12 +356,12 @@ int main(int argc, char **argv)
bail("socket");
memset(&device, 0, sizeof(device));
- strncpy(device.ifr_name, interface, sizeof(device.ifr_name));
+ memcpy(device.ifr_name, interface, if_len + 1);
if (ioctl(sock, SIOCGIFADDR, &device) < 0)
bail("getting interface IP address");
memset(&hwtstamp, 0, sizeof(hwtstamp));
- strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name));
+ memcpy(hwtstamp.ifr_name, interface, if_len + 1);
hwtstamp.ifr_data = (void *)&hwconfig;
memset(&hwconfig, 0, sizeof(hwconfig));
hwconfig.tx_type =
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 0ea44d975b6c..b599f1fa99b5 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -101,6 +101,21 @@ FIXTURE(tls)
bool notls;
};
+FIXTURE_VARIANT(tls)
+{
+ unsigned int tls_version;
+};
+
+FIXTURE_VARIANT_ADD(tls, 12)
+{
+ .tls_version = TLS_1_2_VERSION,
+};
+
+FIXTURE_VARIANT_ADD(tls, 13)
+{
+ .tls_version = TLS_1_3_VERSION,
+};
+
FIXTURE_SETUP(tls)
{
struct tls12_crypto_info_aes_gcm_128 tls12;
@@ -112,7 +127,7 @@ FIXTURE_SETUP(tls)
len = sizeof(addr);
memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = TLS_1_3_VERSION;
+ tls12.info.version = variant->tls_version;
tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
addr.sin_family = AF_INET;
@@ -198,6 +213,64 @@ TEST_F(tls, send_then_sendfile)
EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size);
}
+static void chunked_sendfile(struct __test_metadata *_metadata,
+ struct _test_data_tls *self,
+ uint16_t chunk_size,
+ uint16_t extra_payload_size)
+{
+ char buf[TLS_PAYLOAD_MAX_LEN];
+ uint16_t test_payload_size;
+ int size = 0;
+ int ret;
+ char filename[] = "/tmp/mytemp.XXXXXX";
+ int fd = mkstemp(filename);
+ off_t offset = 0;
+
+ unlink(filename);
+ ASSERT_GE(fd, 0);
+ EXPECT_GE(chunk_size, 1);
+ test_payload_size = chunk_size + extra_payload_size;
+ ASSERT_GE(TLS_PAYLOAD_MAX_LEN, test_payload_size);
+ memset(buf, 1, test_payload_size);
+ size = write(fd, buf, test_payload_size);
+ EXPECT_EQ(size, test_payload_size);
+ fsync(fd);
+
+ while (size > 0) {
+ ret = sendfile(self->fd, fd, &offset, chunk_size);
+ EXPECT_GE(ret, 0);
+ size -= ret;
+ }
+
+ EXPECT_EQ(recv(self->cfd, buf, test_payload_size, MSG_WAITALL),
+ test_payload_size);
+
+ close(fd);
+}
+
+TEST_F(tls, multi_chunk_sendfile)
+{
+ chunked_sendfile(_metadata, self, 4096, 4096);
+ chunked_sendfile(_metadata, self, 4096, 0);
+ chunked_sendfile(_metadata, self, 4096, 1);
+ chunked_sendfile(_metadata, self, 4096, 2048);
+ chunked_sendfile(_metadata, self, 8192, 2048);
+ chunked_sendfile(_metadata, self, 4096, 8192);
+ chunked_sendfile(_metadata, self, 8192, 4096);
+ chunked_sendfile(_metadata, self, 12288, 1024);
+ chunked_sendfile(_metadata, self, 12288, 2000);
+ chunked_sendfile(_metadata, self, 15360, 100);
+ chunked_sendfile(_metadata, self, 15360, 300);
+ chunked_sendfile(_metadata, self, 1, 4096);
+ chunked_sendfile(_metadata, self, 2048, 4096);
+ chunked_sendfile(_metadata, self, 2048, 8192);
+ chunked_sendfile(_metadata, self, 4096, 8192);
+ chunked_sendfile(_metadata, self, 1024, 12288);
+ chunked_sendfile(_metadata, self, 2000, 12288);
+ chunked_sendfile(_metadata, self, 100, 15360);
+ chunked_sendfile(_metadata, self, 300, 15360);
+}
+
TEST_F(tls, recv_max)
{
unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
@@ -733,7 +806,7 @@ TEST_F(tls, bidir)
struct tls12_crypto_info_aes_gcm_128 tls12;
memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = TLS_1_3_VERSION;
+ tls12.info.version = variant->tls_version;
tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
@@ -1258,78 +1331,4 @@ TEST(keysizes) {
close(cfd);
}
-TEST(tls12) {
- int fd, cfd;
- bool notls;
-
- struct tls12_crypto_info_aes_gcm_128 tls12;
- struct sockaddr_in addr;
- socklen_t len;
- int sfd, ret;
-
- notls = false;
- len = sizeof(addr);
-
- memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = TLS_1_2_VERSION;
- tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
-
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_ANY);
- addr.sin_port = 0;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- sfd = socket(AF_INET, SOCK_STREAM, 0);
-
- ret = bind(sfd, &addr, sizeof(addr));
- ASSERT_EQ(ret, 0);
- ret = listen(sfd, 10);
- ASSERT_EQ(ret, 0);
-
- ret = getsockname(sfd, &addr, &len);
- ASSERT_EQ(ret, 0);
-
- ret = connect(fd, &addr, sizeof(addr));
- ASSERT_EQ(ret, 0);
-
- ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
- if (ret != 0) {
- notls = true;
- printf("Failure setting TCP_ULP, testing without tls\n");
- }
-
- if (!notls) {
- ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
- sizeof(tls12));
- ASSERT_EQ(ret, 0);
- }
-
- cfd = accept(sfd, &addr, &len);
- ASSERT_GE(cfd, 0);
-
- if (!notls) {
- ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
- sizeof("tls"));
- ASSERT_EQ(ret, 0);
-
- ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
- sizeof(tls12));
- ASSERT_EQ(ret, 0);
- }
-
- close(sfd);
-
- char const *test_str = "test_read";
- int send_len = 10;
- char buf[10];
-
- send_len = strlen(test_str) + 1;
- EXPECT_EQ(send(fd, test_str, send_len, 0), send_len);
- EXPECT_NE(recv(cfd, buf, send_len, 0), -1);
- EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
-
- close(fd);
- close(cfd);
-}
-
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 011b0da6b033..490a8cca708a 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -64,6 +64,7 @@ static int cfg_payload_len = 10;
static int cfg_poll_timeout = 100;
static int cfg_delay_snd;
static int cfg_delay_ack;
+static int cfg_delay_tolerance_usec = 500;
static bool cfg_show_payload;
static bool cfg_do_pktinfo;
static bool cfg_busy_poll;
@@ -152,11 +153,12 @@ static void validate_key(int tskey, int tstype)
static void validate_timestamp(struct timespec *cur, int min_delay)
{
- int max_delay = min_delay + 500 /* processing time upper bound */;
int64_t cur64, start64;
+ int max_delay;
cur64 = timespec_to_us64(cur);
start64 = timespec_to_us64(&ts_usr);
+ max_delay = min_delay + cfg_delay_tolerance_usec;
if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
fprintf(stderr, "ERROR: %lu us expected between %d and %d\n",
@@ -683,6 +685,7 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -r: use raw\n"
" -R: use raw (IP_HDRINCL)\n"
" -S N: usec to sleep before reading error queue\n"
+ " -t N: tolerance (usec) for timestamp validation\n"
" -u: use udp\n"
" -v: validate SND delay (usec)\n"
" -V: validate ACK delay (usec)\n"
@@ -697,7 +700,7 @@ static void parse_opt(int argc, char **argv)
int c;
while ((c = getopt(argc, argv,
- "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) {
+ "46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
@@ -760,6 +763,9 @@ static void parse_opt(int argc, char **argv)
case 'S':
cfg_sleep_usec = strtoul(optarg, NULL, 10);
break;
+ case 't':
+ cfg_delay_tolerance_usec = strtoul(optarg, NULL, 10);
+ break;
case 'u':
proto_count++;
cfg_proto = SOCK_DGRAM;
diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh
index eea6f5193693..31637769f59f 100755
--- a/tools/testing/selftests/net/txtimestamp.sh
+++ b/tools/testing/selftests/net/txtimestamp.sh
@@ -75,7 +75,7 @@ main() {
fi
}
-if [[ "$(ip netns identify)" == "root" ]]; then
+if [[ -z "$(ip netns identify)" ]]; then
./in_netns.sh $0 $@
else
main $@
diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh
new file mode 100755
index 000000000000..184da81f554f
--- /dev/null
+++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh
@@ -0,0 +1,436 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Various combinations of VRF with xfrms and qdisc.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+ret=0
+
+HOST1_4=192.168.1.1
+HOST2_4=192.168.1.2
+HOST1_6=2001:db8:1::1
+HOST2_6=2001:db8:1::2
+
+XFRM1_4=10.0.1.1
+XFRM2_4=10.0.1.2
+XFRM1_6=fc00:1000::1
+XFRM2_6=fc00:1000::2
+IF_ID=123
+
+VRF=red
+TABLE=300
+
+AUTH_1=0xd94fcfea65fddf21dc6e0d24a0253508
+AUTH_2=0xdc6e0d24a0253508d94fcfea65fddf21
+ENC_1=0xfc46c20f8048be9725930ff3fb07ac2a91f0347dffeacf62
+ENC_2=0x3fb07ac2a91f0347dffeacf62fc46c20f8048be9725930ff
+SPI_1=0x02122b77
+SPI_2=0x2b770212
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+################################################################################
+#
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd_host1()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ fi
+
+ out=$(eval ip netns exec host1 $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" ]; then
+ if [ -n "$out" ]; then
+ echo
+ echo " $out"
+ fi
+ echo
+ fi
+
+ return $rc
+}
+
+################################################################################
+# create namespaces for hosts and sws
+
+create_vrf()
+{
+ local ns=$1
+ local vrf=$2
+ local table=$3
+
+ if [ -n "${ns}" ]; then
+ ns="-netns ${ns}"
+ fi
+
+ ip ${ns} link add ${vrf} type vrf table ${table}
+ ip ${ns} link set ${vrf} up
+ ip ${ns} route add vrf ${vrf} unreachable default metric 8192
+ ip ${ns} -6 route add vrf ${vrf} unreachable default metric 8192
+
+ ip ${ns} addr add 127.0.0.1/8 dev ${vrf}
+ ip ${ns} -6 addr add ::1 dev ${vrf} nodad
+
+ ip ${ns} ru del pref 0
+ ip ${ns} ru add pref 32765 from all lookup local
+ ip ${ns} -6 ru del pref 0
+ ip ${ns} -6 ru add pref 32765 from all lookup local
+}
+
+create_ns()
+{
+ local ns=$1
+ local addr=$2
+ local addr6=$3
+
+ [ -z "${addr}" ] && addr="-"
+ [ -z "${addr6}" ] && addr6="-"
+
+ ip netns add ${ns}
+
+ ip -netns ${ns} link set lo up
+ if [ "${addr}" != "-" ]; then
+ ip -netns ${ns} addr add dev lo ${addr}
+ fi
+ if [ "${addr6}" != "-" ]; then
+ ip -netns ${ns} -6 addr add dev lo ${addr6}
+ fi
+
+ ip -netns ${ns} ro add unreachable default metric 8192
+ ip -netns ${ns} -6 ro add unreachable default metric 8192
+
+ ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+# create veth pair to connect namespaces and apply addresses.
+connect_ns()
+{
+ local ns1=$1
+ local ns1_dev=$2
+ local ns1_addr=$3
+ local ns1_addr6=$4
+ local ns2=$5
+ local ns2_dev=$6
+ local ns2_addr=$7
+ local ns2_addr6=$8
+ local ns1arg
+ local ns2arg
+
+ if [ -n "${ns1}" ]; then
+ ns1arg="-netns ${ns1}"
+ fi
+ if [ -n "${ns2}" ]; then
+ ns2arg="-netns ${ns2}"
+ fi
+
+ ip ${ns1arg} li add ${ns1_dev} type veth peer name tmp
+ ip ${ns1arg} li set ${ns1_dev} up
+ ip ${ns1arg} li set tmp netns ${ns2} name ${ns2_dev}
+ ip ${ns2arg} li set ${ns2_dev} up
+
+ if [ "${ns1_addr}" != "-" ]; then
+ ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr}
+ ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr}
+ fi
+
+ if [ "${ns1_addr6}" != "-" ]; then
+ ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr6} nodad
+ ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr6} nodad
+ fi
+}
+
+################################################################################
+
+cleanup()
+{
+ ip netns del host1
+ ip netns del host2
+}
+
+setup()
+{
+ create_ns "host1"
+ create_ns "host2"
+
+ connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
+ "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
+
+ create_vrf "host1" ${VRF} ${TABLE}
+ ip -netns host1 link set dev eth0 master ${VRF}
+}
+
+cleanup_xfrm()
+{
+ for ns in host1 host2
+ do
+ for x in state policy
+ do
+ ip -netns ${ns} xfrm ${x} flush
+ ip -6 -netns ${ns} xfrm ${x} flush
+ done
+ done
+}
+
+setup_xfrm()
+{
+ local h1_4=$1
+ local h2_4=$2
+ local h1_6=$3
+ local h2_6=$4
+ local devarg="$5"
+
+ #
+ # policy
+ #
+
+ # host1 - IPv4 out
+ ip -netns host1 xfrm policy add \
+ src ${h1_4} dst ${h2_4} ${devarg} dir out \
+ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
+
+ # host2 - IPv4 in
+ ip -netns host2 xfrm policy add \
+ src ${h1_4} dst ${h2_4} dir in \
+ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
+
+ # host1 - IPv4 in
+ ip -netns host1 xfrm policy add \
+ src ${h2_4} dst ${h1_4} ${devarg} dir in \
+ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
+
+ # host2 - IPv4 out
+ ip -netns host2 xfrm policy add \
+ src ${h2_4} dst ${h1_4} dir out \
+ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
+
+
+ # host1 - IPv6 out
+ ip -6 -netns host1 xfrm policy add \
+ src ${h1_6} dst ${h2_6} ${devarg} dir out \
+ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
+
+ # host2 - IPv6 in
+ ip -6 -netns host2 xfrm policy add \
+ src ${h1_6} dst ${h2_6} dir in \
+ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
+
+ # host1 - IPv6 in
+ ip -6 -netns host1 xfrm policy add \
+ src ${h2_6} dst ${h1_6} ${devarg} dir in \
+ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
+
+ # host2 - IPv6 out
+ ip -6 -netns host2 xfrm policy add \
+ src ${h2_6} dst ${h1_6} dir out \
+ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
+
+ #
+ # state
+ #
+ ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+ proto esp spi ${SPI_1} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+ enc 'cbc(des3_ede)' ${ENC_1} \
+ sel src ${h1_4} dst ${h2_4} ${devarg}
+
+ ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+ proto esp spi ${SPI_1} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+ enc 'cbc(des3_ede)' ${ENC_1} \
+ sel src ${h1_4} dst ${h2_4}
+
+
+ ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+ proto esp spi ${SPI_2} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+ enc 'cbc(des3_ede)' ${ENC_2} \
+ sel src ${h2_4} dst ${h1_4} ${devarg}
+
+ ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+ proto esp spi ${SPI_2} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+ enc 'cbc(des3_ede)' ${ENC_2} \
+ sel src ${h2_4} dst ${h1_4}
+
+
+ ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+ proto esp spi ${SPI_1} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+ enc 'cbc(des3_ede)' ${ENC_1} \
+ sel src ${h1_6} dst ${h2_6} ${devarg}
+
+ ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+ proto esp spi ${SPI_1} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+ enc 'cbc(des3_ede)' ${ENC_1} \
+ sel src ${h1_6} dst ${h2_6}
+
+
+ ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+ proto esp spi ${SPI_2} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+ enc 'cbc(des3_ede)' ${ENC_2} \
+ sel src ${h2_6} dst ${h1_6} ${devarg}
+
+ ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+ proto esp spi ${SPI_2} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+ enc 'cbc(des3_ede)' ${ENC_2} \
+ sel src ${h2_6} dst ${h1_6}
+}
+
+cleanup_xfrm_dev()
+{
+ ip -netns host1 li del xfrm0
+ ip -netns host2 addr del ${XFRM2_4}/24 dev eth0
+ ip -netns host2 addr del ${XFRM2_6}/64 dev eth0
+}
+
+setup_xfrm_dev()
+{
+ local vrfarg="vrf ${VRF}"
+
+ ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID}
+ ip -netns host1 li set xfrm0 ${vrfarg} up
+ ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0
+ ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0
+
+ ip -netns host2 addr add ${XFRM2_4}/24 dev eth0
+ ip -netns host2 addr add ${XFRM2_6}/64 dev eth0
+
+ setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}"
+}
+
+run_tests()
+{
+ cleanup_xfrm
+
+ # no IPsec
+ run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+ log_test $? 0 "IPv4 no xfrm policy"
+ run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+ log_test $? 0 "IPv6 no xfrm policy"
+
+ # xfrm without VRF in sel
+ setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6}
+ run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+ log_test $? 0 "IPv4 xfrm policy based on address"
+ run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+ log_test $? 0 "IPv6 xfrm policy based on address"
+ cleanup_xfrm
+
+ # xfrm with VRF in sel
+ # Known failure: ipv4 resets the flow oif after the lookup. Fix is
+ # not straightforward.
+ # setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev ${VRF}"
+ # run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+ # log_test $? 0 "IPv4 xfrm policy with VRF in selector"
+ run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+ log_test $? 0 "IPv6 xfrm policy with VRF in selector"
+ cleanup_xfrm
+
+ # xfrm with enslaved device in sel
+ # Known failures: combined with the above, __xfrm{4,6}_selector_match
+ # needs to consider both l3mdev and enslaved device index.
+ # setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev eth0"
+ # run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+ # log_test $? 0 "IPv4 xfrm policy with enslaved device in selector"
+ # run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+ # log_test $? 0 "IPv6 xfrm policy with enslaved device in selector"
+ # cleanup_xfrm
+
+ # xfrm device
+ setup_xfrm_dev
+ run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${XFRM2_4}
+ log_test $? 0 "IPv4 xfrm policy with xfrm device"
+ run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${XFRM2_6}
+ log_test $? 0 "IPv6 xfrm policy with xfrm device"
+ cleanup_xfrm_dev
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+
+done
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+cleanup 2>/dev/null
+setup
+
+echo
+echo "No qdisc on VRF device"
+run_tests
+
+run_cmd_host1 tc qdisc add dev ${VRF} root netem delay 100ms
+echo
+echo "netem qdisc on VRF device"
+run_tests
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
new file mode 100755
index 000000000000..23cf924754a5
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -0,0 +1,626 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
+# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved.
+#
+# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS.
+#
+#
+# Symmetric routing topology
+#
+# blue red
+# +----+ .253 +----+ .253 +----+
+# | h1 |-------------------| r1 |-------------------| h2 |
+# +----+ .1 +----+ .2 +----+
+# 172.16.1/24 172.16.2/24
+# 2001:db8:16:1/64 2001:db8:16:2/64
+#
+#
+# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route
+# to the outgoing vrf red for the n2 network and red has a route back to n1.
+# The red VRF interface has a MTU of 1400.
+#
+# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the
+# output of the command to check that a ttl expired error is received.
+#
+# The second test runs traceroute from h1 to h2 and parses the output to check
+# for a hop on r1.
+#
+# The third test sends a ping with a packet size of 1450 from h1 to h2 and
+# parses the output of the command to check that a fragmentation error is
+# received.
+#
+#
+# Asymmetric routing topology
+#
+# This topology represents a customer setup where the issue with icmp errors
+# and VRF route leaking was initialy reported. The MTU test isn't done here
+# because of the lack of a return route in the red VRF.
+#
+# blue red
+# .253 +----+ .253
+# +----| r1 |----+
+# | +----+ |
+# +----+ | | +----+
+# | h1 |--------------+ +--------------| h2 |
+# +----+ .1 | | .2 +----+
+# 172.16.1/24 | +----+ | 172.16.2/24
+# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64
+# .254 +----+ .254
+#
+#
+# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the
+# outgoing vrf red for the n2 network but red doesn't have a route back to n1.
+# Route from h2 to h1 goes through r2.
+#
+# The objective is to check that the incoming vrf routing table is selected
+# to send an ICMP error back to the source when the ttl of a packet reaches 1
+# while it is forwarded between different vrfs.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+DEFAULT_TTYPE=sym
+
+H1_N1=172.16.1.0/24
+H1_N1_6=2001:db8:16:1::/64
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+
+H2_N2_IP=172.16.2.2
+R1_N2_IP=172.16.2.253
+R2_N2_IP=172.16.2.254
+
+H2_N2_IP6=2001:db8:16:2::2
+R1_N2_IP6=2001:db8:16:2::253
+R2_N2_IP6=2001:db8:16:2::254
+
+################################################################################
+# helpers
+
+log_section()
+{
+ echo
+ echo "###########################################################################"
+ echo "$*"
+ echo "###########################################################################"
+ echo
+}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read -r a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+run_cmd_grep()
+{
+ local grep_pattern="$1"
+ shift
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ echo "$out" | grep -q "$grep_pattern"
+ rc=$?
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+ local ns
+
+ for ns in h1 h2 r1 r2; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+setup_vrf()
+{
+ local ns=$1
+
+ ip -netns "${ns}" rule del pref 0
+ ip -netns "${ns}" rule add pref 32765 from all lookup local
+ ip -netns "${ns}" -6 rule del pref 0
+ ip -netns "${ns}" -6 rule add pref 32765 from all lookup local
+}
+
+create_vrf()
+{
+ local ns=$1
+ local vrf=$2
+ local table=$3
+
+ ip -netns "${ns}" link add "${vrf}" type vrf table "${table}"
+ ip -netns "${ns}" link set "${vrf}" up
+ ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192
+ ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192
+
+ ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}"
+ ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad
+}
+
+setup_sym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
+
+ #
+ # h2
+ #
+ ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 up
+
+ # h2 to h1 via r1
+ ip -netns h2 route add default via ${R1_N2_IP} dev eth0
+ ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # Route leak from red to blue
+ ip -netns r1 route add vrf red ${H1_N1} dev blue
+ ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
+
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+setup_asym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1 r2; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h1 link add eth1 type veth peer name r2h1
+ ip -netns h1 link set r2h1 netns r2 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ ip -netns h2 link add eth1 type veth peer name r2h2
+ ip -netns h2 link set r2h2 netns r2 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 link add br0 type bridge
+ ip -netns h1 link set br0 up
+ ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 master br0 up
+ ip -netns h1 link set eth1 master br0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
+
+ #
+ # h2
+ #
+ ip -netns h2 link add br0 type bridge
+ ip -netns h2 link set br0 up
+ ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 master br0 up
+ ip -netns h2 link set eth1 master br0 up
+
+ # h2 to h1 via r2
+ ip -netns h2 route add default via ${R2_N2_IP} dev br0
+ ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # No route leak from red to blue
+
+ #
+ # r2
+ #
+ ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+ ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+ ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
+ ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+check_connectivity()
+{
+ ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv4 connectivity"
+ return $?
+}
+
+check_connectivity6()
+{
+ ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv6 connectivity"
+ return $?
+}
+
+check_traceroute()
+{
+ if [ ! -x "$(command -v traceroute)" ]; then
+ echo "SKIP: Could not run IPV4 test without traceroute"
+ return 1
+ fi
+}
+
+check_traceroute6()
+{
+ if [ ! -x "$(command -v traceroute6)" ]; then
+ echo "SKIP: Could not run IPV6 test without traceroute6"
+ return 1
+ fi
+}
+
+ipv4_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute || return
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
+ log_test $? 0 "Traceroute reports a hop on r1"
+}
+
+ipv4_traceroute_asym()
+{
+ ipv4_traceroute asym
+}
+
+ipv6_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute6 || return
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
+ log_test $? 0 "Traceroute6 reports a hop on r1"
+}
+
+ipv6_traceroute_asym()
+{
+ ipv6_traceroute asym
+}
+
+ipv4_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP ttl exceeded"
+}
+
+ipv4_ping_ttl_asym()
+{
+ ipv4_ping_ttl asym
+}
+
+ipv4_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP Frag needed"
+}
+
+ipv4_ping_frag_asym()
+{
+ ipv4_ping_frag asym
+}
+
+ipv6_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Hop limit"
+}
+
+ipv6_ping_ttl_asym()
+{
+ ipv6_ping_ttl asym
+}
+
+ipv6_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Packet too big"
+}
+
+ipv6_ping_frag_asym()
+{
+ ipv6_ping_frag asym
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -4 Run IPv4 tests only
+ -6 Run IPv6 tests only
+ -t TEST Run only TEST
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
+
+TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym"
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :46t:pvh o
+do
+ case $o in
+ 4) TESTS=ipv4;;
+ 6) TESTS=ipv6;;
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+#
+# show user test config
+#
+if [ -z "$TESTS" ]; then
+ TESTS="$TESTS_IPV4 $TESTS_IPV6"
+elif [ "$TESTS" = "ipv4" ]; then
+ TESTS="$TESTS_IPV4"
+elif [ "$TESTS" = "ipv6" ]; then
+ TESTS="$TESTS_IPV6"
+fi
+
+for t in $TESTS
+do
+ case $t in
+ ipv4_ping_ttl|ping) ipv4_ping_ttl;;&
+ ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;&
+ ipv4_traceroute|traceroute) ipv4_traceroute;;&
+ ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
+ ipv4_ping_frag|ping) ipv4_ping_frag;;&
+
+ ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
+ ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
+ ipv6_traceroute|traceroute) ipv6_traceroute;;&
+ ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
+ ipv6_ping_frag|ping) ipv6_ping_frag;;&
+
+ # setup namespaces and config, but do not run any tests
+ setup_sym|setup) setup_sym; exit 0;;
+ setup_asym) setup_asym; exit 0;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+cleanup
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
new file mode 100755
index 000000000000..18b982d611de
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -0,0 +1,396 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is designed for testing the new VRF strict_mode functionality.
+
+ret=0
+
+# identifies the "init" network namespace which is often called root network
+# namespace.
+INIT_NETNS_NAME="init"
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+ip_expand_args()
+{
+ local nsname=$1
+ local nsarg=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ nsarg="-netns ${nsname}"
+ fi
+
+ echo "${nsarg}"
+}
+
+vrf_count()
+{
+ local nsname=$1
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -o link show type vrf | wc -l
+}
+
+count_vrf_by_table_id()
+{
+ local nsname=$1
+ local tableid=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -d -o link show type vrf | grep "table ${tableid}" | wc -l
+}
+
+add_vrf()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link add ${vrfname} type vrf table ${vrftable} &>/dev/null
+}
+
+add_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 0 "${nsname}: add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+add_vrf_and_check_fail()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 2 "${nsname}: CANNOT add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+del_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link del ${vrfname}
+ log_test $? 0 "${nsname}: remove vrf ${vrfname}"
+}
+
+config_vrf_and_check()
+{
+ local nsname=$1
+ local addr=$2
+ local vrfname=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link set dev ${vrfname} up && \
+ ip ${nsarg} addr add ${addr} dev ${vrfname}
+ log_test $? 0 "${nsname}: vrf ${vrfname} up, addr ${addr}"
+}
+
+read_strict_mode()
+{
+ local nsname=$1
+ local rval
+ local rc=0
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ rval="$(${nsexec} bash -c "cat /proc/sys/net/vrf/strict_mode" | \
+ grep -E "^[0-1]$")" &> /dev/null
+ if [ $? -ne 0 ]; then
+ # set errors
+ rval=255
+ rc=1
+ fi
+
+ # on success, rval can be only 0 or 1; on error, rval is equal to 255
+ echo ${rval}
+ return ${rc}
+}
+
+read_strict_mode_compare_and_check()
+{
+ local nsname=$1
+ local expected=$2
+ local res
+
+ res="$(read_strict_mode ${nsname})"
+ log_test ${res} ${expected} "${nsname}: check strict_mode=${res}"
+}
+
+set_strict_mode()
+{
+ local nsname=$1
+ local val=$2
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ ${nsexec} bash -c "echo ${val} >/proc/sys/net/vrf/strict_mode" &>/dev/null
+}
+
+enable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 1
+}
+
+disable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 0
+}
+
+disable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ disable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: disable strict_mode (=0)"
+}
+
+enable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: enable strict_mode (=1)"
+}
+
+enable_strict_mode_and_check_fail()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 1 "${nsname}: CANNOT enable strict_mode"
+}
+
+strict_mode_check_default()
+{
+ local nsname=$1
+ local strictmode
+ local vrfcnt
+
+ vrfcnt=$(vrf_count ${nsname})
+ strictmode=$(read_strict_mode ${nsname})
+ log_test ${strictmode} 0 "${nsname}: strict_mode=0 by default, ${vrfcnt} vrfs"
+}
+
+setup()
+{
+ modprobe vrf
+
+ ip netns add testns
+ ip netns exec testns ip link set lo up
+}
+
+cleanup()
+{
+ ip netns del testns 2>/dev/null
+
+ ip link del vrf100 2>/dev/null
+ ip link del vrf101 2>/dev/null
+ ip link del vrf102 2>/dev/null
+
+ echo 0 >/proc/sys/net/vrf/strict_mode 2>/dev/null
+}
+
+vrf_strict_mode_tests_init()
+{
+ vrf_strict_mode_check_support init
+
+ strict_mode_check_default init
+
+ add_vrf_and_check init vrf100 100
+ config_vrf_and_check init 172.16.100.1/24 vrf100
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check_fail init vrf101 100
+
+ disable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf101 100
+ config_vrf_and_check init 172.16.101.1/24 vrf101
+
+ enable_strict_mode_and_check_fail init
+
+ del_vrf_and_check init vrf101
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf102 102
+ config_vrf_and_check init 172.16.102.1/24 vrf102
+
+ # the strict_modle is enabled in the init
+}
+
+vrf_strict_mode_tests_testns()
+{
+ vrf_strict_mode_check_support testns
+
+ strict_mode_check_default testns
+
+ enable_strict_mode_and_check testns
+
+ add_vrf_and_check testns vrf100 100
+ config_vrf_and_check testns 10.0.100.1/24 vrf100
+
+ add_vrf_and_check_fail testns vrf101 100
+
+ add_vrf_and_check_fail testns vrf102 100
+
+ add_vrf_and_check testns vrf200 200
+
+ disable_strict_mode_and_check testns
+
+ add_vrf_and_check testns vrf101 100
+
+ add_vrf_and_check testns vrf102 100
+
+ #the strict_mode is disabled in the testns
+}
+
+vrf_strict_mode_tests_mix()
+{
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check testns 0
+
+ del_vrf_and_check testns vrf101
+
+ del_vrf_and_check testns vrf102
+
+ disable_strict_mode_and_check init
+
+ enable_strict_mode_and_check testns
+
+ enable_strict_mode_and_check init
+ enable_strict_mode_and_check init
+
+ disable_strict_mode_and_check testns
+ disable_strict_mode_and_check testns
+
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check testns 0
+}
+
+vrf_strict_mode_tests()
+{
+ log_section "VRF strict_mode test on init network namespace"
+ vrf_strict_mode_tests_init
+
+ log_section "VRF strict_mode test on testns network namespace"
+ vrf_strict_mode_tests_testns
+
+ log_section "VRF strict_mode test mixing init and testns network namespaces"
+ vrf_strict_mode_tests_mix
+}
+
+vrf_strict_mode_check_support()
+{
+ local nsname=$1
+ local output
+ local rc
+
+ output="$(lsmod | grep '^vrf' | awk '{print $1}')"
+ if [ -z "${output}" ]; then
+ modinfo vrf || return $?
+ fi
+
+ # we do not care about the value of the strict_mode; we only check if
+ # the strict_mode parameter is available or not.
+ read_strict_mode ${nsname} &>/dev/null; rc=$?
+ log_test ${rc} 0 "${nsname}: net.vrf.strict_mode is available"
+
+ return ${rc}
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit 0
+fi
+
+cleanup &> /dev/null
+
+setup
+vrf_strict_mode_tests
+cleanup
+
+print_log_test_results
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
new file mode 100644
index 000000000000..8448f74adfec
--- /dev/null
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+nf-queue
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 9c0f758310fe..a374e10ef506 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -3,8 +3,8 @@
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
- nft_concat_range.sh \
- nft_queue.sh
+ nft_concat_range.sh nft_conntrack_helper.sh \
+ nft_queue.sh nft_meta.sh
LDLIBS = -lmnl
TEST_GEN_FILES = nf-queue
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
index 29c73bce38fa..9e56b9d47037 100644
--- a/tools/testing/selftests/netfilter/nf-queue.c
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -17,9 +17,12 @@
struct options {
bool count_packets;
+ bool gso_enabled;
int verbose;
unsigned int queue_num;
unsigned int timeout;
+ uint32_t verdict;
+ uint32_t delay_ms;
};
static unsigned int queue_stats[5];
@@ -27,7 +30,7 @@ static struct options opts;
static void help(const char *p)
{
- printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+ printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
@@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
}
static struct nlmsghdr *
-nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
{
struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd),
@@ -189,9 +192,6 @@ static void print_stats(void)
unsigned int last, total;
int i;
- if (!opts.count_packets)
- return;
-
total = 0;
last = queue_stats[0];
@@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void)
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
- flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+ flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
+ flags |= NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
@@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void)
return nl;
}
+static void sleep_ms(uint32_t delay)
+{
+ struct timespec ts = { .tv_sec = delay / 1000 };
+
+ delay %= 1000;
+
+ ts.tv_nsec = delay * 1000llu * 1000llu;
+
+ nanosleep(&ts, NULL);
+}
+
static int mainloop(void)
{
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
@@ -278,7 +290,7 @@ static int mainloop(void)
ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) {
- if (errno == ENOBUFS)
+ if (errno == ENOBUFS || errno == EINTR)
continue;
if (errno == EAGAIN) {
@@ -298,7 +310,10 @@ static int mainloop(void)
}
id = ret - MNL_CB_OK;
- nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+ if (opts.delay_ms)
+ sleep_ms(opts.delay_ms);
+
+ nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
@@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+ while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
@@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv)
if (opts.queue_num > 0xffff)
opts.queue_num = 0;
break;
+ case 'Q':
+ opts.verdict = atoi(optarg);
+ if (opts.verdict > 0xffff) {
+ fprintf(stderr, "Expected destination queue number\n");
+ exit(1);
+ }
+
+ opts.verdict <<= 16;
+ opts.verdict |= NF_QUEUE;
+ break;
+ case 'd':
+ opts.delay_ms = atoi(optarg);
+ if (opts.delay_ms == 0) {
+ fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
+ exit(1);
+ }
+ break;
case 't':
opts.timeout = atoi(optarg);
break;
+ case 'G':
+ opts.gso_enabled = false;
+ break;
case 'v':
opts.verbose++;
break;
}
}
+
+ if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
+ fprintf(stderr, "Cannot use same destination and source queue\n");
+ exit(1);
+ }
}
int main(int argc, char *argv[])
{
int ret;
+ opts.verdict = NF_ACCEPT;
+ opts.gso_enabled = true;
+
parse_opts(argc, argv);
ret = mainloop();
diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
new file mode 100755
index 000000000000..edf0a48da6bf
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+#
+# This tests connection tracking helper assignment:
+# 1. can attach ftp helper to a connection from nft ruleset.
+# 2. auto-assign still works.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+testipv6=1
+
+cleanup()
+{
+ ip netns del ${ns1}
+ ip netns del ${ns2}
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+which nc >/dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without netcat tool"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+ip link add veth0 netns ${ns1} type veth peer name veth0 netns ${ns2} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set veth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set veth0 up
+
+ip -net ${ns1} addr add 10.0.1.1/24 dev veth0
+ip -net ${ns1} addr add dead:1::1/64 dev veth0
+
+ip -net ${ns2} addr add 10.0.1.2/24 dev veth0
+ip -net ${ns2} addr add dead:1::2/64 dev veth0
+
+load_ruleset_family() {
+ local family=$1
+ local ns=$2
+
+ip netns exec ${ns} nft -f - <<EOF
+table $family raw {
+ ct helper ftp {
+ type "ftp" protocol tcp
+ }
+ chain pre {
+ type filter hook prerouting priority 0; policy accept;
+ tcp dport 2121 ct helper set "ftp"
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 2121 ct helper set "ftp"
+ }
+}
+EOF
+ return $?
+}
+
+check_for_helper()
+{
+ local netns=$1
+ local message=$2
+ local port=$3
+
+ ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
+ if [ $? -ne 0 ] ; then
+ echo "FAIL: ${netns} did not show attached helper $message" 1>&2
+ ret=1
+ fi
+
+ echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
+ return 0
+}
+
+test_helper()
+{
+ local port=$1
+ local msg=$2
+
+ sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null &
+
+ sleep 1
+ sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null &
+
+ check_for_helper "$ns1" "ip $msg" $port
+ check_for_helper "$ns2" "ip $msg" $port
+
+ wait
+
+ if [ $testipv6 -eq 0 ] ;then
+ return 0
+ fi
+
+ ip netns exec ${ns1} conntrack -F 2> /dev/null
+ ip netns exec ${ns2} conntrack -F 2> /dev/null
+
+ sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null &
+
+ sleep 1
+ sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null &
+
+ check_for_helper "$ns1" "ipv6 $msg" $port
+ check_for_helper "$ns2" "ipv6 $msg" $port
+
+ wait
+}
+
+load_ruleset_family ip ${ns1}
+if [ $? -ne 0 ];then
+ echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2
+ exit 1
+fi
+
+load_ruleset_family ip6 ${ns1}
+if [ $? -ne 0 ];then
+ echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2
+ testipv6=0
+fi
+
+load_ruleset_family inet ${ns2}
+if [ $? -ne 0 ];then
+ echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2
+ load_ruleset_family ip ${ns2}
+ if [ $? -ne 0 ];then
+ echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2
+ exit 1
+ fi
+
+ if [ $testipv6 -eq 1 ] ;then
+ load_ruleset_family ip6 ${ns2}
+ if [ $? -ne 0 ];then
+ echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2
+ exit 1
+ fi
+ fi
+fi
+
+test_helper 2121 "set via ruleset"
+ip netns exec ${ns1} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
+ip netns exec ${ns2} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
+test_helper 21 "auto-assign"
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index d3e0809ab368..431296c0f91c 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -2,13 +2,18 @@
# SPDX-License-Identifier: GPL-2.0
#
# This tests basic flowtable functionality.
-# Creates following topology:
+# Creates following default topology:
#
# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
# Router1 is the one doing flow offloading, Router2 has no special
# purpose other than having a link that is smaller than either Originator
# and responder, i.e. TCPMSS announced values are too large and will still
# result in fragmentation and/or PMTU discovery.
+#
+# You can check with different Orgininator/Link/Responder MTU eg:
+# nft_flowtable.sh -o8000 -l1500 -r2000
+#
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -21,29 +26,17 @@ ns2out=""
log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-which nc > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nc (netcat)"
- exit $ksft_skip
-fi
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
-ip netns add nsr1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
- exit $ksft_skip
-fi
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "which nc" "run test without nc (netcat)"
+checktool "ip netns add nsr1" "create net namespace"
ip netns add ns1
ip netns add ns2
@@ -89,11 +82,41 @@ ip -net nsr2 addr add dead:2::1/64 dev veth1
# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
# is NOT the lowest link mtu.
-ip -net nsr1 link set veth0 mtu 9000
-ip -net ns1 link set eth0 mtu 9000
+omtu=9000
+lmtu=1500
+rmtu=2000
+
+usage(){
+ echo "nft_flowtable.sh [OPTIONS]"
+ echo
+ echo "MTU options"
+ echo " -o originator"
+ echo " -l link"
+ echo " -r responder"
+ exit 1
+}
+
+while getopts "o:l:r:" o
+do
+ case $o in
+ o) omtu=$OPTARG;;
+ l) lmtu=$OPTARG;;
+ r) rmtu=$OPTARG;;
+ *) usage;;
+ esac
+done
+
+if ! ip -net nsr1 link set veth0 mtu $omtu; then
+ exit 1
+fi
+
+ip -net ns1 link set eth0 mtu $omtu
+
+if ! ip -net nsr2 link set veth1 mtu $rmtu; then
+ exit 1
+fi
-ip -net nsr2 link set veth1 mtu 2000
-ip -net ns2 link set eth0 mtu 2000
+ip -net ns2 link set eth0 mtu $rmtu
# transfer-net between nsr1 and nsr2.
# these addresses are not used for connections.
@@ -113,7 +136,10 @@ for i in 1 2; do
ip -net ns$i route add default via 10.0.$i.1
ip -net ns$i addr add dead:$i::99/64 dev eth0
ip -net ns$i route add default via dead:$i::1
- ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
+ if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ echo "ERROR: Check Originator/Responder values (problem during address addition)"
+ exit 1
+ fi
# don't set ip DF bit for first two tests
ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
@@ -147,7 +173,7 @@ table inet filter {
# as PMTUd is off.
# This rule is deleted for the last test, when we expect PMTUd
# to kick in and ensure all packets meet mtu requirements.
- meta length gt 1500 accept comment something-to-grep-for
+ meta length gt $lmtu accept comment something-to-grep-for
# next line blocks connection w.o. working offload.
# we only do this for reverse dir, because we expect packets to
@@ -171,15 +197,13 @@ if [ $? -ne 0 ]; then
fi
# test basic connectivity
-ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
echo "ERROR: ns1 cannot reach ns2" 1>&2
bash
exit 1
fi
-ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
echo "ERROR: ns2 cannot reach ns1" 1>&2
exit 1
fi
@@ -196,7 +220,6 @@ ns2out=$(mktemp)
make_file()
{
name=$1
- who=$2
SIZE=$((RANDOM % (1024 * 8)))
TSIZE=$((SIZE * 1024))
@@ -215,8 +238,7 @@ check_transfer()
out=$2
what=$3
- cmp "$in" "$out" > /dev/null 2>&1
- if [ $? -ne 0 ] ;then
+ if ! cmp "$in" "$out" > /dev/null 2>&1; then
echo "FAIL: file mismatch for $what" 1>&2
ls -l "$in"
ls -l "$out"
@@ -243,17 +265,21 @@ test_tcp_forwarding_ip()
sleep 3
- kill $lpid
- kill $cpid
+ if ps -p $lpid > /dev/null;then
+ kill $lpid
+ fi
+
+ if ps -p $cpid > /dev/null;then
+ kill $cpid
+ fi
+
wait
- check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
- if [ $? -ne 0 ];then
+ if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
lret=1
fi
- check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
- if [ $? -ne 0 ];then
+ if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
lret=1
fi
@@ -282,13 +308,12 @@ test_tcp_forwarding_nat()
return $lret
}
-make_file "$ns1in" "ns1"
-make_file "$ns2in" "ns2"
+make_file "$ns1in"
+make_file "$ns2in"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -319,9 +344,7 @@ table ip nat {
}
EOF
-test_tcp_forwarding_nat ns1 ns2
-
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding_nat ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2 with NAT"
else
echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
@@ -333,8 +356,7 @@ fi
# Same as second test, but with PMTU discovery enabled.
handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
-ip netns exec nsr1 nft delete rule inet filter forward $handle
-if [ $? -ne 0 ] ;then
+if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
echo "FAIL: Could not delete large-packet accept rule"
exit 1
fi
@@ -342,8 +364,7 @@ fi
ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-test_tcp_forwarding_nat ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding_nat ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
else
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
@@ -389,8 +410,7 @@ ip -net ns2 route del 192.168.10.1 via 10.0.2.1
ip -net ns2 route add default via 10.0.2.1
ip -net ns2 route add default via dead:2::1
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding ns1 ns2; then
echo "PASS: ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
new file mode 100755
index 000000000000..087f0e6e71ce
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# check iif/iifname/oifgroup/iiftype match.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+
+if ! nft --version > /dev/null 2>&1; then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+cleanup()
+{
+ ip netns del "$ns0"
+}
+
+ip netns add "$ns0"
+ip -net "$ns0" link set lo up
+ip -net "$ns0" addr add 127.0.0.1 dev lo
+
+trap cleanup EXIT
+
+currentyear=$(date +%G)
+lastyear=$((currentyear-1))
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table inet filter {
+ counter iifcount {}
+ counter iifnamecount {}
+ counter iifgroupcount {}
+ counter iiftypecount {}
+ counter infproto4count {}
+ counter il4protocounter {}
+ counter imarkcounter {}
+ counter icpu0counter {}
+ counter ilastyearcounter {}
+ counter icurrentyearcounter {}
+
+ counter oifcount {}
+ counter oifnamecount {}
+ counter oifgroupcount {}
+ counter oiftypecount {}
+ counter onfproto4count {}
+ counter ol4protocounter {}
+ counter oskuidcounter {}
+ counter oskgidcounter {}
+ counter omarkcounter {}
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+
+ meta iif lo counter name "iifcount"
+ meta iifname "lo" counter name "iifnamecount"
+ meta iifgroup "default" counter name "iifgroupcount"
+ meta iiftype "loopback" counter name "iiftypecount"
+ meta nfproto ipv4 counter name "infproto4count"
+ meta l4proto icmp counter name "il4protocounter"
+ meta mark 42 counter name "imarkcounter"
+ meta cpu 0 counter name "icpu0counter"
+ meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
+ meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
+ }
+
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oif lo counter name "oifcount" counter
+ meta oifname "lo" counter name "oifnamecount"
+ meta oifgroup "default" counter name "oifgroupcount"
+ meta oiftype "loopback" counter name "oiftypecount"
+ meta nfproto ipv4 counter name "onfproto4count"
+ meta l4proto icmp counter name "ol4protocounter"
+ meta skuid 0 counter name "oskuidcounter"
+ meta skgid 0 counter name "oskgidcounter"
+ meta mark 42 counter name "omarkcounter"
+ }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add test ruleset"
+ exit $ksft_skip
+fi
+
+ret=0
+
+check_one_counter()
+{
+ local cname="$1"
+ local want="packets $2"
+ local verbose="$3"
+
+ if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
+ echo "FAIL: $cname, want \"$want\", got"
+ ret=1
+ ip netns exec "$ns0" nft list counter inet filter $cname
+ fi
+}
+
+check_lo_counters()
+{
+ local want="$1"
+ local verbose="$2"
+ local counter
+
+ for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
+ oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
+ il4protocounter icurrentyearcounter ol4protocounter \
+ ; do
+ check_one_counter "$counter" "$want" "$verbose"
+ done
+}
+
+check_lo_counters "0" false
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null
+
+check_lo_counters "2" true
+
+check_one_counter oskuidcounter "1" true
+check_one_counter oskgidcounter "1" true
+check_one_counter imarkcounter "1" true
+check_one_counter omarkcounter "1" true
+check_one_counter ilastyearcounter "0" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta iif/oif counters at expected values"
+else
+ exit $ret
+fi
+
+#First CPU execution and counter
+taskset -p 01 $$ > /dev/null
+ip netns exec "$ns0" nft reset counters > /dev/null
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
+check_one_counter icpu0counter "2" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta cpu counter at expected values"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 6898448b4266..3d202b90b33d 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx"
+timeout=4
cleanup()
{
@@ -20,6 +21,7 @@ cleanup()
ip netns del ${nsrouter}
rm -f "$TMPFILE0"
rm -f "$TMPFILE1"
+ rm -f "$TMPFILE2" "$TMPFILE3"
}
nft --version > /dev/null 2>&1
@@ -42,6 +44,8 @@ fi
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
trap cleanup EXIT
ip netns add ${ns1}
@@ -83,7 +87,7 @@ load_ruleset() {
local name=$1
local prio=$2
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet $name {
chain nfq {
ip protocol icmp queue bypass
@@ -118,7 +122,7 @@ EOF
load_counter_ruleset() {
local prio=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet countrules {
chain pre {
type filter hook prerouting priority $prio; policy accept;
@@ -175,7 +179,7 @@ test_ping_router() {
test_queue_blackhole() {
local proto=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table $proto blackh {
chain forward {
type filter hook forward priority 0; policy accept;
@@ -184,10 +188,10 @@ table $proto blackh {
}
EOF
if [ $proto = "ip" ] ;then
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
lret=$?
elif [ $proto = "ip6" ]; then
- ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
lret=$?
else
lret=111
@@ -214,8 +218,8 @@ test_queue()
local last=""
# spawn nf-queue listeners
- ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
sleep 1
test_ping
ret=$?
@@ -250,11 +254,11 @@ test_queue()
test_tcp_forward()
{
- ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+ ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
local nfqpid=$!
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
@@ -270,15 +274,13 @@ test_tcp_forward()
test_tcp_localhost()
{
- tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
-
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$!
sleep 1
@@ -287,6 +289,47 @@ test_tcp_localhost()
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+}
+EOF
+ tmpfile=$(mktemp) || exit 1
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
+ ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
+
+ # nfqueue 1 will be called via output hook. But this time,
+ # re-queue the packet to nfqueue program on queue 2.
+ ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
+
+ sleep 1
+ ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+ rm -f "$tmpfile"
+
+ wait
+
+ if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+ echo "FAIL: lost packets during requeue?!" 1>&2
+ return
+ fi
+
+ echo "PASS: tcp via loopback and re-queueing"
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -328,5 +371,6 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
+test_tcp_localhost_requeue
exit $ret
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 9c60337317c6..020137b61407 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -241,7 +241,7 @@ function get_files_count()
split_remote $LOC
if [[ "$REMOTE" == "" ]]; then
- echo $(ls -1 "$LOC"/${NAME}* 2>/dev/null | wc -l)
+ echo $(ls -1 "$VPATH"/${NAME}* 2>/dev/null | wc -l)
else
echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \
wc -l" 2> /dev/null)
diff --git a/tools/testing/selftests/pid_namespace/regression_enomem.c b/tools/testing/selftests/pid_namespace/regression_enomem.c
index 73d532556d17..7d84097ad45c 100644
--- a/tools/testing/selftests/pid_namespace/regression_enomem.c
+++ b/tools/testing/selftests/pid_namespace/regression_enomem.c
@@ -11,7 +11,6 @@
#include <syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
#include "../kselftest_harness.h"
#include "../pidfd/pidfd.h"
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 2d4db5afb142..973198a3ec3d 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -5,3 +5,4 @@ pidfd_test
pidfd_wait
pidfd_fdinfo_test
pidfd_getfd_test
+pidfd_setns_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 75a545861375..f4a2f28f926b 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -g -I../../../../usr/include/ -pthread
-TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test
+TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
+ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test
include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
new file mode 100644
index 000000000000..bb11de90c0c9
--- /dev/null
+++ b/tools/testing/selftests/pidfd/config
@@ -0,0 +1,6 @@
+CONFIG_UTS_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+CONFIG_CGROUPS=y
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index c1921a53dbed..01f8d3c0cf2c 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -22,6 +22,10 @@
#define P_PIDFD 3
#endif
+#ifndef CLONE_NEWTIME
+#define CLONE_NEWTIME 0x00000080
+#endif
+
#ifndef CLONE_PIDFD
#define CLONE_PIDFD 0x00001000
#endif
@@ -42,6 +46,10 @@
#define __NR_pidfd_getfd -1
#endif
+#ifndef PIDFD_NONBLOCK
+#define PIDFD_NONBLOCK O_NONBLOCK
+#endif
+
/*
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
* That means, when it wraps around any pid < 300 will be skipped.
@@ -95,4 +103,9 @@ static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
}
+static inline int sys_memfd_create(const char *name, unsigned int flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
#endif /* __PIDFD_H */
diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
index 401a7c1d0312..7758c98be015 100644
--- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -18,7 +18,6 @@
#include <linux/kcmp.h>
#include "pidfd.h"
-#include "../kselftest.h"
#include "../kselftest_harness.h"
/*
@@ -34,11 +33,6 @@ static int sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1,
return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
}
-static int sys_memfd_create(const char *name, unsigned int flags)
-{
- return syscall(__NR_memfd_create, name, flags);
-}
-
static int __child(int sk, int memfd)
{
int ret;
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
new file mode 100644
index 000000000000..1f085b922c6e
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <linux/kcmp.h>
+
+#include "pidfd.h"
+#include "../clone3/clone3_selftests.h"
+#include "../kselftest_harness.h"
+
+enum {
+ PIDFD_NS_USER,
+ PIDFD_NS_MNT,
+ PIDFD_NS_PID,
+ PIDFD_NS_UTS,
+ PIDFD_NS_IPC,
+ PIDFD_NS_NET,
+ PIDFD_NS_CGROUP,
+ PIDFD_NS_PIDCLD,
+ PIDFD_NS_TIME,
+ PIDFD_NS_MAX
+};
+
+const struct ns_info {
+ const char *name;
+ int flag;
+} ns_info[] = {
+ [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, },
+ [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, },
+ [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, },
+ [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, },
+ [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, },
+ [PIDFD_NS_NET] = { "net", CLONE_NEWNET, },
+ [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, },
+ [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, },
+ [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, },
+};
+
+FIXTURE(current_nsset)
+{
+ pid_t pid;
+ int pidfd;
+ int nsfds[PIDFD_NS_MAX];
+
+ pid_t child_pid_exited;
+ int child_pidfd_exited;
+
+ pid_t child_pid1;
+ int child_pidfd1;
+ int child_nsfds1[PIDFD_NS_MAX];
+
+ pid_t child_pid2;
+ int child_pidfd2;
+ int child_nsfds2[PIDFD_NS_MAX];
+};
+
+static int sys_waitid(int which, pid_t pid, int options)
+{
+ return syscall(__NR_waitid, which, pid, NULL, options, NULL);
+}
+
+pid_t create_child(int *pidfd, unsigned flags)
+{
+ struct __clone_args args = {
+ .flags = CLONE_PIDFD | flags,
+ .exit_signal = SIGCHLD,
+ .pidfd = ptr_to_u64(pidfd),
+ };
+
+ return sys_clone3(&args, sizeof(struct clone_args));
+}
+
+static bool switch_timens(void)
+{
+ int fd, ret;
+
+ if (unshare(CLONE_NEWTIME))
+ return false;
+
+ fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+
+ ret = setns(fd, CLONE_NEWTIME);
+ close(fd);
+ return ret == 0;
+}
+
+static ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = read(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+FIXTURE_SETUP(current_nsset)
+{
+ int i, proc_fd, ret;
+ int ipc_sockets[2];
+ char c;
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ self->nsfds[i] = -EBADF;
+ self->child_nsfds1[i] = -EBADF;
+ self->child_nsfds2[i] = -EBADF;
+ }
+
+ proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(proc_fd, 0) {
+ TH_LOG("%m - Failed to open /proc/self/ns");
+ }
+
+ self->pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+ if (self->nsfds[i] < 0) {
+ EXPECT_EQ(errno, ENOENT) {
+ TH_LOG("%m - Failed to open %s namespace for process %d",
+ info->name, self->pid);
+ }
+ }
+ }
+
+ self->pidfd = sys_pidfd_open(self->pid, 0);
+ EXPECT_GT(self->pidfd, 0) {
+ TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+ }
+
+ /* Create task that exits right away. */
+ self->child_pid_exited = create_child(&self->child_pidfd_exited,
+ CLONE_NEWUSER | CLONE_NEWNET);
+ EXPECT_GT(self->child_pid_exited, 0);
+
+ if (self->child_pid_exited == 0)
+ _exit(EXIT_SUCCESS);
+
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
+
+ self->pidfd = sys_pidfd_open(self->pid, 0);
+ EXPECT_GE(self->pidfd, 0) {
+ TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+ }
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ /* Create tasks that will be stopped. */
+ self->child_pid1 = create_child(&self->child_pidfd1,
+ CLONE_NEWUSER | CLONE_NEWNS |
+ CLONE_NEWCGROUP | CLONE_NEWIPC |
+ CLONE_NEWUTS | CLONE_NEWPID |
+ CLONE_NEWNET);
+ EXPECT_GE(self->child_pid1, 0);
+
+ if (self->child_pid1 == 0) {
+ close(ipc_sockets[0]);
+
+ if (!switch_timens())
+ _exit(EXIT_FAILURE);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ close(ipc_sockets[1]);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ close(ipc_sockets[0]);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ self->child_pid2 = create_child(&self->child_pidfd2,
+ CLONE_NEWUSER | CLONE_NEWNS |
+ CLONE_NEWCGROUP | CLONE_NEWIPC |
+ CLONE_NEWUTS | CLONE_NEWPID |
+ CLONE_NEWNET);
+ EXPECT_GE(self->child_pid2, 0);
+
+ if (self->child_pid2 == 0) {
+ close(ipc_sockets[0]);
+
+ if (!switch_timens())
+ _exit(EXIT_FAILURE);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ close(ipc_sockets[1]);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ close(ipc_sockets[0]);
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ char p[100];
+
+ const struct ns_info *info = &ns_info[i];
+
+ self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+ if (self->nsfds[i] < 0) {
+ EXPECT_EQ(errno, ENOENT) {
+ TH_LOG("%m - Failed to open %s namespace for process %d",
+ info->name, self->pid);
+ }
+ }
+
+ ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+ self->child_pid1, info->name);
+ EXPECT_GT(ret, 0);
+ EXPECT_LT(ret, sizeof(p));
+
+ self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
+ if (self->child_nsfds1[i] < 0) {
+ EXPECT_EQ(errno, ENOENT) {
+ TH_LOG("%m - Failed to open %s namespace for process %d",
+ info->name, self->child_pid1);
+ }
+ }
+
+ ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+ self->child_pid2, info->name);
+ EXPECT_GT(ret, 0);
+ EXPECT_LT(ret, sizeof(p));
+
+ self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
+ if (self->child_nsfds2[i] < 0) {
+ EXPECT_EQ(errno, ENOENT) {
+ TH_LOG("%m - Failed to open %s namespace for process %d",
+ info->name, self->child_pid1);
+ }
+ }
+ }
+
+ close(proc_fd);
+}
+
+FIXTURE_TEARDOWN(current_nsset)
+{
+ int i;
+
+ ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
+ SIGKILL, NULL, 0), 0);
+ ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
+ SIGKILL, NULL, 0), 0);
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ if (self->nsfds[i] >= 0)
+ close(self->nsfds[i]);
+ if (self->child_nsfds1[i] >= 0)
+ close(self->child_nsfds1[i]);
+ if (self->child_nsfds2[i] >= 0)
+ close(self->child_nsfds2[i]);
+ }
+
+ if (self->child_pidfd1 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd1));
+ if (self->child_pidfd2 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd2));
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
+}
+
+static int preserve_ns(const int pid, const char *ns)
+{
+ int ret;
+ char path[50];
+
+ ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
+ if (ret < 0 || (size_t)ret >= sizeof(path))
+ return -EIO;
+
+ return open(path, O_RDONLY | O_CLOEXEC);
+}
+
+static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
+{
+ int ns_fd2 = -EBADF;
+ int ret = -1;
+ struct stat ns_st1, ns_st2;
+
+ ret = fstat(ns_fd1, &ns_st1);
+ if (ret < 0)
+ return -1;
+
+ ns_fd2 = preserve_ns(pid2, ns);
+ if (ns_fd2 < 0)
+ return -1;
+
+ ret = fstat(ns_fd2, &ns_st2);
+ close(ns_fd2);
+ if (ret < 0)
+ return -1;
+
+ /* processes are in the same namespace */
+ if ((ns_st1.st_dev == ns_st2.st_dev) &&
+ (ns_st1.st_ino == ns_st2.st_ino))
+ return 1;
+
+ /* processes are in different namespaces */
+ return 0;
+}
+
+/* Test that we can't pass garbage to the kernel. */
+TEST_F(current_nsset, invalid_flags)
+{
+ ASSERT_NE(setns(self->pidfd, 0), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ ASSERT_NE(setns(self->pidfd, -1), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+/* Test that we can't attach to a task that has already exited. */
+TEST_F(current_nsset, pidfd_exited_child)
+{
+ int i;
+ pid_t pid;
+
+ ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
+ 0);
+ EXPECT_EQ(errno, ESRCH);
+
+ pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ /* Verify that we haven't changed any namespaces. */
+ if (self->nsfds[i] >= 0)
+ ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
+ }
+}
+
+TEST_F(current_nsset, pidfd_incremental_setns)
+{
+ int i;
+ pid_t pid;
+
+ pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ int nsfd;
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ if (info->flag) {
+ ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
+ TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
+ info->name, self->child_pid1,
+ self->child_pidfd1);
+ }
+ }
+
+ /* Verify that we have changed to the correct namespaces. */
+ if (info->flag == CLONE_NEWPID)
+ nsfd = self->nsfds[i];
+ else
+ nsfd = self->child_nsfds1[i];
+ ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+ TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
+ info->name, self->child_pid1,
+ self->child_pidfd1);
+ }
+ TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
+ info->name, self->child_pid1, self->child_pidfd1);
+ }
+}
+
+TEST_F(current_nsset, nsfd_incremental_setns)
+{
+ int i;
+ pid_t pid;
+
+ pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ int nsfd;
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ if (info->flag) {
+ ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
+ TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid1,
+ self->child_nsfds1[i]);
+ }
+ }
+
+ /* Verify that we have changed to the correct namespaces. */
+ if (info->flag == CLONE_NEWPID)
+ nsfd = self->nsfds[i];
+ else
+ nsfd = self->child_nsfds1[i];
+ ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+ TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
+ info->name, self->child_pid1,
+ self->child_nsfds1[i]);
+ }
+ TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid1, self->child_nsfds1[i]);
+ }
+}
+
+TEST_F(current_nsset, pidfd_one_shot_setns)
+{
+ unsigned flags = 0;
+ int i;
+ pid_t pid;
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ flags |= info->flag;
+ TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+ info->name, self->child_pid1);
+ }
+
+ ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+ TH_LOG("%m - Failed to setns to namespaces of %d",
+ self->child_pid1);
+ }
+
+ pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ int nsfd;
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ /* Verify that we have changed to the correct namespaces. */
+ if (info->flag == CLONE_NEWPID)
+ nsfd = self->nsfds[i];
+ else
+ nsfd = self->child_nsfds1[i];
+ ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+ TH_LOG("setns failed to place us correctly into %s namespace of %d",
+ info->name, self->child_pid1);
+ }
+ TH_LOG("Managed to correctly setns to %s namespace of %d",
+ info->name, self->child_pid1);
+ }
+}
+
+TEST_F(current_nsset, no_foul_play)
+{
+ unsigned flags = 0;
+ int i;
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ flags |= info->flag;
+ if (info->flag) /* No use logging pid_for_children. */
+ TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+ info->name, self->child_pid1);
+ }
+
+ ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+ TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
+ self->child_pid1, self->child_pidfd1);
+ }
+
+ /*
+ * Can't setns to a user namespace outside of our hierarchy since we
+ * don't have caps in there and didn't create it. That means that under
+ * no circumstances should we be able to setns to any of the other
+ * ones since they aren't owned by our user namespace.
+ */
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+
+ if (self->child_nsfds2[i] < 0 || !info->flag)
+ continue;
+
+ ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
+ TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
+ info->name, self->child_pid2,
+ self->child_pidfd2);
+ }
+ TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
+ info->name, self->child_pid2,
+ self->child_pidfd2);
+
+ ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
+ TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid2,
+ self->child_nsfds2[i]);
+ }
+ TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid2,
+ self->child_nsfds2[i]);
+ }
+}
+
+TEST(setns_einval)
+{
+ int fd;
+
+ fd = sys_memfd_create("rostock", 0);
+ EXPECT_GT(fd, 0);
+
+ ASSERT_NE(setns(fd, 0), 0);
+ EXPECT_EQ(errno, EINVAL);
+ close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index 7aff2d3b42c0..c585aaa2acd8 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -8,6 +8,7 @@
#include <sched.h>
#include <signal.h>
#include <stdio.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
@@ -27,6 +28,8 @@
#define MAX_EVENTS 5
+static bool have_pidfd_send_signal;
+
static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
{
size_t stack_size = 1024;
@@ -56,6 +59,13 @@ static int test_pidfd_send_signal_simple_success(void)
int pidfd, ret;
const char *test_name = "pidfd_send_signal send SIGUSR1";
+ if (!have_pidfd_send_signal) {
+ ksft_test_result_skip(
+ "%s test: pidfd_send_signal() syscall not supported\n",
+ test_name);
+ return 0;
+ }
+
pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
if (pidfd < 0)
ksft_exit_fail_msg(
@@ -86,6 +96,13 @@ static int test_pidfd_send_signal_exited_fail(void)
pid_t pid;
const char *test_name = "pidfd_send_signal signal exited process";
+ if (!have_pidfd_send_signal) {
+ ksft_test_result_skip(
+ "%s test: pidfd_send_signal() syscall not supported\n",
+ test_name);
+ return 0;
+ }
+
pid = fork();
if (pid < 0)
ksft_exit_fail_msg("%s test: Failed to create new process\n",
@@ -137,16 +154,34 @@ static int test_pidfd_send_signal_recycled_pid_fail(void)
pid_t pid1;
const char *test_name = "pidfd_send_signal signal recycled pid";
+ if (!have_pidfd_send_signal) {
+ ksft_test_result_skip(
+ "%s test: pidfd_send_signal() syscall not supported\n",
+ test_name);
+ return 0;
+ }
+
ret = unshare(CLONE_NEWPID);
- if (ret < 0)
+ if (ret < 0) {
+ if (errno == EPERM) {
+ ksft_test_result_skip("%s test: Unsharing pid namespace not permitted\n",
+ test_name);
+ return 0;
+ }
ksft_exit_fail_msg("%s test: Failed to unshare pid namespace\n",
test_name);
+ }
ret = unshare(CLONE_NEWNS);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: Failed to unshare mount namespace\n",
- test_name);
+ if (ret < 0) {
+ if (errno == EPERM) {
+ ksft_test_result_skip("%s test: Unsharing mount namespace not permitted\n",
+ test_name);
+ return 0;
+ }
+ ksft_exit_fail_msg("%s test: Failed to unshare mount namespace\n",
+ test_name);
+ }
ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
if (ret < 0)
@@ -325,15 +360,17 @@ static int test_pidfd_send_signal_syscall_support(void)
ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
if (ret < 0) {
- if (errno == ENOSYS)
- ksft_exit_skip(
+ if (errno == ENOSYS) {
+ ksft_test_result_skip(
"%s test: pidfd_send_signal() syscall not supported\n",
test_name);
-
+ return 0;
+ }
ksft_exit_fail_msg("%s test: Failed to send signal\n",
test_name);
}
+ have_pidfd_send_signal = true;
close(pidfd);
ksft_test_result_pass(
"%s test: pidfd_send_signal() syscall is supported. Tests can be executed\n",
@@ -521,7 +558,7 @@ static void test_pidfd_poll_leader_exit(int use_waitpid)
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(4);
+ ksft_set_plan(8);
test_pidfd_poll_exec(0);
test_pidfd_poll_exec(1);
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 7079f8eef792..be2943f072f6 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -17,10 +17,15 @@
#include <unistd.h>
#include "pidfd.h"
-#include "../kselftest.h"
+#include "../kselftest_harness.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
static pid_t sys_clone3(struct clone_args *args)
{
return syscall(__NR_clone3, args, sizeof(struct clone_args));
@@ -32,9 +37,8 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
return syscall(__NR_waitid, which, pid, info, options, ru);
}
-static int test_pidfd_wait_simple(void)
+TEST(wait_simple)
{
- const char *test_name = "pidfd wait simple";
int pidfd = -1, status = 0;
pid_t parent_tid = -1;
struct clone_args args = {
@@ -50,76 +54,40 @@ static int test_pidfd_wait_simple(void)
};
pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
- if (pidfd < 0)
- ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid == 0)
- ksft_exit_fail_msg(
- "%s test: succeeded to wait on invalid pidfd %s\n",
- test_name, strerror(errno));
- close(pidfd);
+ ASSERT_NE(pid, 0);
+ EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
- if (pidfd == 0)
- ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid == 0)
- ksft_exit_fail_msg(
- "%s test: succeeded to wait on invalid pidfd %s\n",
- test_name, strerror(errno));
- close(pidfd);
+ ASSERT_NE(pid, 0);
+ EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
pid = sys_clone3(&args);
- if (pid < 0)
- ksft_exit_fail_msg("%s test: failed to create new process %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pid, 0);
if (pid == 0)
exit(EXIT_SUCCESS);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status))
- ksft_exit_fail_msg(
- "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
- close(pidfd);
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_EXITED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ksft_test_result_pass("%s test: Passed\n", test_name);
- return 0;
+ ASSERT_GE(pid, 0);
+ ASSERT_EQ(WIFEXITED(info.si_status), true);
+ ASSERT_EQ(WEXITSTATUS(info.si_status), 0);
+ EXPECT_EQ(close(pidfd), 0);
+
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_EXITED);
+ ASSERT_EQ(info.si_pid, parent_tid);
}
-static int test_pidfd_wait_states(void)
+TEST(wait_states)
{
- const char *test_name = "pidfd wait states";
int pidfd = -1, status = 0;
pid_t parent_tid = -1;
struct clone_args args = {
@@ -135,9 +103,7 @@ static int test_pidfd_wait_states(void)
};
pid = sys_clone3(&args);
- if (pid < 0)
- ksft_exit_fail_msg("%s test: failed to create new process %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pid, 0);
if (pid == 0) {
kill(getpid(), SIGSTOP);
@@ -145,127 +111,115 @@ static int test_pidfd_wait_states(void)
exit(EXIT_SUCCESS);
}
- ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_STOPPED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_CONTINUED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_STOPPED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
- ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_KILLED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- close(pidfd);
-
- ksft_test_result_pass("%s test: Passed\n", test_name);
- return 0;
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_CONTINUED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_KILLED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ EXPECT_EQ(close(pidfd), 0);
}
-int main(int argc, char **argv)
+TEST(wait_nonblock)
{
- ksft_print_header();
- ksft_set_plan(2);
+ int pidfd, status = 0;
+ unsigned int flags = 0;
+ pid_t parent_tid = -1;
+ struct clone_args args = {
+ .parent_tid = ptr_to_u64(&parent_tid),
+ .flags = CLONE_PARENT_SETTID,
+ .exit_signal = SIGCHLD,
+ };
+ int ret;
+ pid_t pid;
+ siginfo_t info = {
+ .si_signo = 0,
+ };
+
+ /*
+ * Callers need to see ECHILD with non-blocking pidfds when no child
+ * processes exists.
+ */
+ pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK);
+ EXPECT_GE(pidfd, 0) {
+ /* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+ ASSERT_EQ(errno, EINVAL);
+ SKIP(return, "Skipping PIDFD_NONBLOCK test");
+ }
+
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, ECHILD);
+ EXPECT_EQ(close(pidfd), 0);
+
+ pid = sys_clone3(&args);
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ kill(getpid(), SIGSTOP);
+ exit(EXIT_SUCCESS);
+ }
- test_pidfd_wait_simple();
- test_pidfd_wait_states();
+ pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK);
+ EXPECT_GE(pidfd, 0) {
+ /* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+ ASSERT_EQ(errno, EINVAL);
+ SKIP(return, "Skipping PIDFD_NONBLOCK test");
+ }
+
+ flags = fcntl(pidfd, F_GETFL, 0);
+ ASSERT_GT(flags, 0);
+ ASSERT_GT((flags & O_NONBLOCK), 0);
+
+ /*
+ * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when
+ * child processes exist but none have exited.
+ */
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EAGAIN);
+
+ /*
+ * Callers need to continue seeing 0 with non-blocking pidfd and
+ * WNOHANG raised explicitly when child processes exist but none have
+ * exited.
+ */
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL);
+ ASSERT_EQ(ret, 0);
- return ksft_exit_pass();
+ ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_EXITED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ EXPECT_EQ(close(pidfd), 0);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 644770c3b754..0830e63818c1 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -19,6 +19,7 @@ SUB_DIRS = alignment \
copyloops \
dscr \
mm \
+ nx-gzip \
pmu \
signal \
primitives \
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 0453c50c949c..2a0503bc7e49 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -9,7 +9,17 @@
* This selftest exercises the powerpc alignment fault handler.
*
* We create two sets of source and destination buffers, one in regular memory,
- * the other cache-inhibited (we use /dev/fb0 for this).
+ * the other cache-inhibited (by default we use /dev/fb0 for this, but an
+ * alterative path for cache-inhibited memory may be provided).
+ *
+ * One way to get cache-inhibited memory is to use the "mem" kernel parameter
+ * to limit the kernel to less memory than actually exists. Addresses above
+ * the limit may still be accessed but will be treated as cache-inhibited. For
+ * example, if there is actually 4GB of memory and the parameter "mem=3GB" is
+ * used, memory from address 0xC0000000 onwards is treated as cache-inhibited.
+ * To access this region /dev/mem is used. The kernel should be configured
+ * without CONFIG_STRICT_DEVMEM. In this case use:
+ * ./alignment_handler /dev/mem 0xc0000000
*
* We initialise the source buffers, then use whichever set of load/store
* instructions is under test to copy bytes from the source buffers to the
@@ -45,14 +55,16 @@
#include <setjmp.h>
#include <signal.h>
-#include <asm/cputable.h>
-
#include "utils.h"
+#include "instructions.h"
int bufsize;
int debug;
int testing;
volatile int gotsig;
+bool prefixes_enabled;
+char *cipath = "/dev/fb0";
+long cioffset;
void sighandler(int sig, siginfo_t *info, void *ctx)
{
@@ -64,7 +76,12 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
}
gotsig = sig;
#ifdef __powerpc64__
- ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ if (prefixes_enabled) {
+ u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP];
+ ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4);
+ } else {
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ }
#else
ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
#endif
@@ -84,6 +101,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
} \
rc |= do_test(#name, test_##name)
+#define TESTP(name, ld_op, st_op, ld_reg, st_reg) \
+ void test_##name(char *s, char *d) \
+ { \
+ asm volatile( \
+ ld_op(ld_reg, %0, 0, 0) \
+ st_op(st_reg, %1, 0, 0) \
+ :: "r"(s), "r"(d), "r"(0) \
+ : "memory", "vs0", "vs32", "r31"); \
+ } \
+ rc |= do_test(#name, test_##name)
+
#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
@@ -103,6 +131,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0)
#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0)
+#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0)
+
+#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32)
+#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0)
/* FIXME: Unimplemented tests: */
// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */
@@ -195,17 +234,18 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
printf("\tDoing %s:\t", test_name);
- fd = open("/dev/fb0", O_RDWR);
+ fd = open(cipath, O_RDWR);
if (fd < 0) {
printf("\n");
- perror("Can't open /dev/fb0 now?");
+ perror("Can't open ci file now?");
return 1;
}
- ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
- fd, 0x0);
- ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
- fd, bufsize);
+ ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ fd, cioffset);
+ ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ fd, cioffset + bufsize);
+
if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
printf("\n");
perror("mmap failed");
@@ -270,11 +310,11 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
return rc;
}
-static bool can_open_fb0(void)
+static bool can_open_cifile(void)
{
int fd;
- fd = open("/dev/fb0", O_RDWR);
+ fd = open(cipath, O_RDWR);
if (fd < 0)
return false;
@@ -286,7 +326,7 @@ int test_alignment_handler_vsx_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("VSX: 2.06B\n");
@@ -304,7 +344,7 @@ int test_alignment_handler_vsx_207(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
printf("VSX: 2.07B\n");
@@ -320,7 +360,7 @@ int test_alignment_handler_vsx_300(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
printf("VSX: 3.00B\n");
@@ -348,11 +388,30 @@ int test_alignment_handler_vsx_300(void)
return rc;
}
+int test_alignment_handler_vsx_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("VSX: PREFIX\n");
+ LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1);
+ return rc;
+}
+
int test_alignment_handler_integer(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
printf("Integer\n");
LOAD_DFORM_TEST(lbz);
@@ -408,7 +467,7 @@ int test_alignment_handler_integer_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("Integer: 2.06\n");
@@ -419,11 +478,32 @@ int test_alignment_handler_integer_206(void)
return rc;
}
+int test_alignment_handler_integer_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("Integer: PREFIX\n");
+ LOAD_MLS_PREFIX_TEST(PLBZ);
+ LOAD_MLS_PREFIX_TEST(PLHZ);
+ LOAD_MLS_PREFIX_TEST(PLHA);
+ LOAD_MLS_PREFIX_TEST(PLWZ);
+ LOAD_8LS_PREFIX_TEST(PLWA);
+ LOAD_8LS_PREFIX_TEST(PLD);
+ STORE_MLS_PREFIX_TEST(PSTB);
+ STORE_MLS_PREFIX_TEST(PSTH);
+ STORE_MLS_PREFIX_TEST(PSTW);
+ STORE_8LS_PREFIX_TEST(PSTD);
+ return rc;
+}
+
int test_alignment_handler_vmx(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
printf("VMX\n");
@@ -451,7 +531,7 @@ int test_alignment_handler_fp(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
printf("Floating point\n");
LOAD_FLOAT_DFORM_TEST(lfd);
@@ -479,7 +559,7 @@ int test_alignment_handler_fp_205(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
printf("Floating point: 2.05\n");
@@ -497,7 +577,7 @@ int test_alignment_handler_fp_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("Floating point: 2.06\n");
@@ -507,13 +587,32 @@ int test_alignment_handler_fp_206(void)
return rc;
}
+
+int test_alignment_handler_fp_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("Floating point: PREFIX\n");
+ LOAD_FLOAT_DFORM_TEST(lfs);
+ LOAD_FLOAT_MLS_PREFIX_TEST(PLFS);
+ LOAD_FLOAT_MLS_PREFIX_TEST(PLFD);
+ STORE_FLOAT_MLS_PREFIX_TEST(PSTFS);
+ STORE_FLOAT_MLS_PREFIX_TEST(PSTFD);
+ return rc;
+}
+
void usage(char *prog)
{
- printf("Usage: %s [options]\n", prog);
+ printf("Usage: %s [options] [path [offset]]\n", prog);
printf(" -d Enable debug error output\n");
printf("\n");
- printf("This test requires a POWER8 or POWER9 CPU and a usable ");
- printf("framebuffer at /dev/fb0.\n");
+ printf("This test requires a POWER8, POWER9 or POWER10 CPU ");
+ printf("and either a usable framebuffer at /dev/fb0 or ");
+ printf("the path to usable cache inhibited memory and optional ");
+ printf("offset to be provided\n");
}
int main(int argc, char *argv[])
@@ -533,6 +632,13 @@ int main(int argc, char *argv[])
exit(1);
}
}
+ argc -= optind;
+ argv += optind;
+
+ if (argc > 0)
+ cipath = argv[0];
+ if (argc > 1)
+ cioffset = strtol(argv[1], 0, 0x10);
bufsize = getpagesize();
@@ -546,16 +652,22 @@ int main(int argc, char *argv[])
exit(1);
}
+ prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1);
+
rc |= test_harness(test_alignment_handler_vsx_206,
"test_alignment_handler_vsx_206");
rc |= test_harness(test_alignment_handler_vsx_207,
"test_alignment_handler_vsx_207");
rc |= test_harness(test_alignment_handler_vsx_300,
"test_alignment_handler_vsx_300");
+ rc |= test_harness(test_alignment_handler_vsx_prefix,
+ "test_alignment_handler_vsx_prefix");
rc |= test_harness(test_alignment_handler_integer,
"test_alignment_handler_integer");
rc |= test_harness(test_alignment_handler_integer_206,
"test_alignment_handler_integer_206");
+ rc |= test_harness(test_alignment_handler_integer_prefix,
+ "test_alignment_handler_integer_prefix");
rc |= test_harness(test_alignment_handler_vmx,
"test_alignment_handler_vmx");
rc |= test_harness(test_alignment_handler_fp,
@@ -564,5 +676,7 @@ int main(int argc, char *argv[])
"test_alignment_handler_fp_205");
rc |= test_harness(test_alignment_handler_fp_206,
"test_alignment_handler_fp_206");
+ rc |= test_harness(test_alignment_handler_fp_prefix,
+ "test_alignment_handler_fp_prefix");
return rc;
}
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index a2e8c9da7fa5..96554e2794d1 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -19,6 +19,7 @@
#include <limits.h>
#include <sys/time.h>
#include <sys/syscall.h>
+#include <sys/sysinfo.h>
#include <sys/types.h>
#include <sys/shm.h>
#include <linux/futex.h>
@@ -104,8 +105,9 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
{
- int pid;
- cpu_set_t cpuset;
+ int pid, ncpus;
+ cpu_set_t *cpuset;
+ size_t size;
pid = fork();
if (pid == -1) {
@@ -116,14 +118,23 @@ static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
if (pid)
return;
- CPU_ZERO(&cpuset);
- CPU_SET(cpu, &cpuset);
+ ncpus = get_nprocs();
+ size = CPU_ALLOC_SIZE(ncpus);
+ cpuset = CPU_ALLOC(ncpus);
+ if (!cpuset) {
+ perror("malloc");
+ exit(1);
+ }
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(cpu, size, cpuset);
- if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+ if (sched_setaffinity(0, size, cpuset)) {
perror("sched_setaffinity");
+ CPU_FREE(cpuset);
exit(1);
}
+ CPU_FREE(cpuset);
fn(arg);
exit(0);
@@ -470,6 +481,12 @@ int main(int argc, char *argv[])
else
printf("futex");
+ if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC))
+ touch_altivec = 0;
+
+ if (!have_hwcap(PPC_FEATURE_HAS_VSX))
+ touch_vector = 0;
+
printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no",
touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ddaf140b8255..994b11af765c 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -12,4 +12,4 @@ memcpy_p7_t1
copyuser_64_exc_t0
copyuser_64_exc_t1
copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983a1c78..3095b1f1c02b 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
- memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
+ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
-o $@ $^
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
- -D COPY_LOOP=test_memcpy_mcsafe \
+ -D COPY_LOOP=test_copy_mc_generic \
-o $@ $^
$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 120000
index 000000000000..dcbe06d500fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copy_mc_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3062f6..000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index cfa6eedcb66c..845db6273a1b 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -10,4 +10,4 @@ include ../../lib.mk
$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
index 288a4e2ad156..e76611e608af 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -63,6 +63,8 @@ int dscr_default(void)
unsigned long i, *status[THREADS];
unsigned long orig_dscr_default;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
/* Initial DSCR default */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
index aefcd8d8759b..32fcf2b324b1 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -21,6 +21,8 @@ int dscr_explicit(void)
{
unsigned long i, dscr = 0;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
srand(getpid());
set_dscr(dscr);
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
index 7c1cb46397c6..c6a81b2d6b91 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -44,6 +44,8 @@ int dscr_inherit_exec(void)
unsigned long i, dscr = 0;
pid_t pid;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
for (i = 0; i < COUNT; i++) {
dscr++;
if (dscr > DSCR_MAX)
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
index 04297a69ab59..f9dfd3d3c2d5 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -22,6 +22,8 @@ int dscr_inherit(void)
unsigned long i, dscr = 0;
pid_t pid;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
srand(getpid());
set_dscr(dscr);
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index 02f6b4efde14..fbbdffdb2e5d 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -77,6 +77,8 @@ int dscr_sysfs(void)
unsigned long orig_dscr_default;
int i, j;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
for (i = 0; i < COUNT; i++) {
for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
index 37be2c25f277..191ed126f118 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -56,6 +56,8 @@ int dscr_sysfs_thread(void)
unsigned long orig_dscr_default;
int i, j;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
for (i = 0; i < COUNT; i++) {
for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
index eaf785d11eed..e09072446dd3 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -36,6 +36,8 @@ int dscr_user(void)
{
int i;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
check_dscr("");
for (i = 0; i < COUNT; i++) {
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
index 8a8d0f456946..0d783e1065c8 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
@@ -1,17 +1,19 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only
+KSELFTESTS_SKIP=4
+
. ./eeh-functions.sh
if ! eeh_supported ; then
echo "EEH not supported on this system, skipping"
- exit 0;
+ exit $KSELFTESTS_SKIP;
fi
if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
[ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
echo "debugfs EEH testing files are missing. Is debugfs mounted?"
- exit 1;
+ exit $KSELFTESTS_SKIP;
fi
pre_lspci=`mktemp`
@@ -84,4 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)"
lspci | diff -u $pre_lspci -
rm -f $pre_lspci
-exit $failed
+test "$failed" == 0
+exit $?
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
index f52ed92b53e7..00dc32c0ed75 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
@@ -5,12 +5,17 @@ pe_ok() {
local dev="$1"
local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
- if ! [ -e "$path" ] ; then
+ # if a driver doesn't support the error handling callbacks then the
+ # device is recovered by removing and re-probing it. This causes the
+ # sysfs directory to disappear so read the PE state once and squash
+ # any potential error messages
+ local eeh_state="$(cat $path 2>/dev/null)"
+ if [ -z "$eeh_state" ]; then
return 1;
fi
- local fw_state="$(cut -d' ' -f1 < $path)"
- local sw_state="$(cut -d' ' -f2 < $path)"
+ local fw_state="$(echo $eeh_state | cut -d' ' -f1)"
+ local sw_state="$(echo $eeh_state | cut -d' ' -f2)"
# If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an
# error state or being recovered. Either way, not ok.
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
index f36061eb6f0f..4efa6314bd96 100644
--- a/tools/testing/selftests/powerpc/include/instructions.h
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -66,4 +66,81 @@ static inline int paste_last(void *i)
#define PPC_INST_PASTE __PASTE(0, 0, 0, 0)
#define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1)
+/* This defines the prefixed load/store instructions */
+#ifdef __ASSEMBLY__
+# define stringify_in_c(...) __VA_ARGS__
+#else
+# define __stringify_in_c(...) #__VA_ARGS__
+# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+#endif
+
+#define __PPC_RA(a) (((a) & 0x1f) << 16)
+#define __PPC_RS(s) (((s) & 0x1f) << 21)
+#define __PPC_RT(t) __PPC_RS(t)
+#define __PPC_PREFIX_R(r) (((r) & 0x1) << 20)
+
+#define PPC_PREFIX_MLS 0x06000000
+#define PPC_PREFIX_8LS 0x04000000
+
+#define PPC_INST_LBZ 0x88000000
+#define PPC_INST_LHZ 0xa0000000
+#define PPC_INST_LHA 0xa8000000
+#define PPC_INST_LWZ 0x80000000
+#define PPC_INST_STB 0x98000000
+#define PPC_INST_STH 0xb0000000
+#define PPC_INST_STW 0x90000000
+#define PPC_INST_STD 0xf8000000
+#define PPC_INST_LFS 0xc0000000
+#define PPC_INST_LFD 0xc8000000
+#define PPC_INST_STFS 0xd0000000
+#define PPC_INST_STFD 0xd8000000
+
+#define PREFIX_MLS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \
+ stringify_in_c(.long PPC_PREFIX_MLS | \
+ __PPC_PREFIX_R(r) | \
+ (((d) >> 16) & 0x3ffff);) \
+ stringify_in_c(.long (instr) | \
+ __PPC_RT(t) | \
+ __PPC_RA(a) | \
+ ((d) & 0xffff);\n)
+
+#define PREFIX_8LS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \
+ stringify_in_c(.long PPC_PREFIX_8LS | \
+ __PPC_PREFIX_R(r) | \
+ (((d) >> 16) & 0x3ffff);) \
+ stringify_in_c(.long (instr) | \
+ __PPC_RT(t) | \
+ __PPC_RA(a) | \
+ ((d) & 0xffff);\n)
+
+/* Prefixed Integer Load/Store instructions */
+#define PLBZ(t, a, r, d) PREFIX_MLS(PPC_INST_LBZ, t, a, r, d)
+#define PLHZ(t, a, r, d) PREFIX_MLS(PPC_INST_LHZ, t, a, r, d)
+#define PLHA(t, a, r, d) PREFIX_MLS(PPC_INST_LHA, t, a, r, d)
+#define PLWZ(t, a, r, d) PREFIX_MLS(PPC_INST_LWZ, t, a, r, d)
+#define PLWA(t, a, r, d) PREFIX_8LS(0xa4000000, t, a, r, d)
+#define PLD(t, a, r, d) PREFIX_8LS(0xe4000000, t, a, r, d)
+#define PLQ(t, a, r, d) PREFIX_8LS(0xe0000000, t, a, r, d)
+#define PSTB(s, a, r, d) PREFIX_MLS(PPC_INST_STB, s, a, r, d)
+#define PSTH(s, a, r, d) PREFIX_MLS(PPC_INST_STH, s, a, r, d)
+#define PSTW(s, a, r, d) PREFIX_MLS(PPC_INST_STW, s, a, r, d)
+#define PSTD(s, a, r, d) PREFIX_8LS(0xf4000000, s, a, r, d)
+#define PSTQ(s, a, r, d) PREFIX_8LS(0xf0000000, s, a, r, d)
+
+/* Prefixed Floating-Point Load/Store Instructions */
+#define PLFS(frt, a, r, d) PREFIX_MLS(PPC_INST_LFS, frt, a, r, d)
+#define PLFD(frt, a, r, d) PREFIX_MLS(PPC_INST_LFD, frt, a, r, d)
+#define PSTFS(frs, a, r, d) PREFIX_MLS(PPC_INST_STFS, frs, a, r, d)
+#define PSTFD(frs, a, r, d) PREFIX_MLS(PPC_INST_STFD, frs, a, r, d)
+
+/* Prefixed VSX Load/Store Instructions */
+#define PLXSD(vrt, a, r, d) PREFIX_8LS(0xa8000000, vrt, a, r, d)
+#define PLXSSP(vrt, a, r, d) PREFIX_8LS(0xac000000, vrt, a, r, d)
+#define PLXV0(s, a, r, d) PREFIX_8LS(0xc8000000, s, a, r, d)
+#define PLXV1(s, a, r, d) PREFIX_8LS(0xcc000000, s, a, r, d)
+#define PSTXSD(vrs, a, r, d) PREFIX_8LS(0xb8000000, vrs, a, r, d)
+#define PSTXSSP(vrs, a, r, d) PREFIX_8LS(0xbc000000, vrs, a, r, d)
+#define PSTXV0(s, a, r, d) PREFIX_8LS(0xd8000000, s, a, r, d)
+#define PSTXV1(s, a, r, d) PREFIX_8LS(0xdc000000, s, a, r, d)
+
#endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
new file mode 100644
index 000000000000..3312cb1b058d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PKEYS_H
+#define _SELFTESTS_POWERPC_PKEYS_H
+
+#include <sys/mman.h>
+
+#include "reg.h"
+#include "utils.h"
+
+/*
+ * Older versions of libc use the Intel-specific access rights.
+ * Hence, override the definitions as they might be incorrect.
+ */
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS 0x3
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE 0x2
+
+#undef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+
+/* Older versions of libc do not not define this */
+#ifndef SEGV_PKUERR
+#define SEGV_PKUERR 4
+#endif
+
+#define SI_PKEY_OFFSET 0x20
+
+#define __NR_pkey_mprotect 386
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+
+#define PKEY_BITS_PER_PKEY 2
+#define NR_PKEYS 32
+#define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1)
+
+inline unsigned long pkeyreg_get(void)
+{
+ return mfspr(SPRN_AMR);
+}
+
+inline void pkeyreg_set(unsigned long amr)
+{
+ set_amr(amr);
+}
+
+void pkey_set_rights(int pkey, unsigned long rights)
+{
+ unsigned long amr, shift;
+
+ shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+ amr = pkeyreg_get();
+ amr &= ~(PKEY_BITS_MASK << shift);
+ amr |= (rights & PKEY_BITS_MASK) << shift;
+ pkeyreg_set(amr);
+}
+
+int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
+{
+ return syscall(__NR_pkey_mprotect, addr, len, prot, pkey);
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long rights)
+{
+ return syscall(__NR_pkey_alloc, flags, rights);
+}
+
+int sys_pkey_free(int pkey)
+{
+ return syscall(__NR_pkey_free, pkey);
+}
+
+int pkeys_unsupported(void)
+{
+ bool hash_mmu = false;
+ int pkey;
+
+ /* Protection keys are currently supported on Hash MMU only */
+ FAIL_IF(using_hash_mmu(&hash_mmu));
+ SKIP_IF(!hash_mmu);
+
+ /* Check if the system call is supported */
+ pkey = sys_pkey_alloc(0, 0);
+ SKIP_IF(pkey < 0);
+ sys_pkey_free(pkey);
+
+ return 0;
+}
+
+int siginfo_pkey(siginfo_t *si)
+{
+ /*
+ * In older versions of libc, siginfo_t does not have si_pkey as
+ * a member.
+ */
+#ifdef si_pkey
+ return si->si_pkey;
+#else
+ return *((int *)(((char *) si) + SI_PKEY_OFFSET));
+#endif
+}
+
+#define pkey_rights(r) ({ \
+ static char buf[4] = "rwx"; \
+ unsigned int amr_bits; \
+ if ((r) & PKEY_DISABLE_EXECUTE) \
+ buf[2] = '-'; \
+ amr_bits = (r) & PKEY_BITS_MASK; \
+ if (amr_bits & PKEY_DISABLE_WRITE) \
+ buf[1] = '-'; \
+ if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE) \
+ buf[0] = '-'; \
+ buf; \
+})
+
+unsigned long next_pkey_rights(unsigned long rights)
+{
+ if (rights == PKEY_DISABLE_ACCESS)
+ return PKEY_DISABLE_EXECUTE;
+ else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE))
+ return 0;
+
+ if ((rights & PKEY_BITS_MASK) == 0)
+ rights |= PKEY_DISABLE_WRITE;
+ else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE)
+ rights |= PKEY_DISABLE_ACCESS;
+
+ return rights;
+}
+
+#endif /* _SELFTESTS_POWERPC_PKEYS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 022c5076b2c5..c0f2742a3a59 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -57,6 +57,12 @@
#define SPRN_PPR 896 /* Program Priority Register */
#define SPRN_AMR 13 /* Authority Mask Register - problem state */
+#define set_amr(v) asm volatile("isync;" \
+ "mtspr " __stringify(SPRN_AMR) ",%0;" \
+ "isync" : \
+ : "r" ((unsigned long)(v)) \
+ : "memory")
+
/* TEXASR register bits */
#define TEXASR_FC 0xFE00000000000000
#define TEXASR_FP 0x0100000000000000
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index e089a0c30d9a..052b5a775dc2 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -12,6 +12,7 @@
#include <stdbool.h>
#include <linux/auxvec.h>
#include <linux/perf_event.h>
+#include <asm/cputable.h>
#include "reg.h"
/* Avoid headaches with PRI?64 - just use %ll? always */
@@ -35,13 +36,22 @@ int pick_online_cpu(void);
int read_debugfs_file(char *debugfs_file, int *result);
int write_debugfs_file(char *debugfs_file, int result);
int read_sysfs_file(char *debugfs_file, char *result, size_t result_size);
-void set_dscr(unsigned long val);
int perf_event_open_counter(unsigned int type,
unsigned long config, int group_fd);
int perf_event_enable(int fd);
int perf_event_disable(int fd);
int perf_event_reset(int fd);
+#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
static inline bool have_hwcap(unsigned long ftr)
{
return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -60,6 +70,7 @@ static inline bool have_hwcap2(unsigned long ftr2)
#endif
bool is_ppc64le(void);
+int using_hash_mmu(bool *using_hash);
/* Yes, this is evil */
#define FAIL_IF(x) \
@@ -71,6 +82,15 @@ do { \
} \
} while (0)
+#define FAIL_IF_EXIT(x) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ _exit(1); \
+ } \
+} while (0)
+
/* The test harness uses this, yes it's gross */
#define MAGIC_SKIP_RETURN_VALUE 99
@@ -96,11 +116,20 @@ do { \
#define _str(s) #s
#define str(s) _str(s)
+#define sigsafe_err(msg) ({ \
+ ssize_t nbytes __attribute__((unused)); \
+ nbytes = write(STDERR_FILENO, msg, strlen(msg)); })
+
/* POWER9 feature */
#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
+/* POWER10 feature */
+#ifndef PPC_FEATURE2_ARCH_3_1
+#define PPC_FEATURE2_ARCH_3_1 0x00040000
+#endif
+
#if defined(__powerpc64__)
#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
#define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR]
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
index e31ca6f453ed..d0c23b2e4b60 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -6,3 +6,4 @@ vmx_preempt
fpu_signal
vmx_signal
vsx_preempt
+fpu_denormal
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 11a10d7a2bbd..fcc91c205984 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt
top_srcdir = ../../../../..
include ../../lib.mk
@@ -11,9 +11,9 @@ $(OUTPUT)/fpu_syscall: fpu_asm.S
$(OUTPUT)/fpu_preempt: fpu_asm.S
$(OUTPUT)/fpu_signal: fpu_asm.S
-$(OUTPUT)/vmx_syscall: vmx_asm.S
-$(OUTPUT)/vmx_preempt: vmx_asm.S
-$(OUTPUT)/vmx_signal: vmx_asm.S
+$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c
$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
-$(OUTPUT)/vsx_preempt: vsx_asm.S
+$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c
diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c
new file mode 100644
index 000000000000..5f96682abaa8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately
+ * if the denormal handler is not configured or working properly, this can cause a bad
+ * crash in kernel mode when the kernel tries to save FP registers when the process
+ * exits.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static int test_denormal_fpu(void)
+{
+ unsigned int m32;
+ unsigned long m64;
+ volatile float f;
+ volatile double d;
+
+ /* try to induce lfs <denormal> ; stfd */
+
+ m32 = 0x00715fcf; /* random denormal */
+ memcpy((float *)&f, &m32, sizeof(f));
+ d = f;
+ memcpy(&m64, (double *)&d, sizeof(d));
+
+ FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_denormal_fpu, "fpu_denormal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
index 2e059f154e77..6761d6ce30ec 100644
--- a/tools/testing/selftests/powerpc/math/vmx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -57,6 +57,9 @@ int test_preempt_vmx(void)
int i, rc, threads;
pthread_t *tids;
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
index 785a48e0976f..b340a5c4e79d 100644
--- a/tools/testing/selftests/powerpc/math/vmx_signal.c
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -96,6 +96,9 @@ int test_signal_vmx(void)
void *rc_p;
pthread_t *tids;
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
index 9ee293cc868e..03c78dfe3444 100644
--- a/tools/testing/selftests/powerpc/math/vmx_syscall.c
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -49,9 +49,14 @@ int test_vmx_syscall(void)
* Setup an environment with much context switching
*/
pid_t pid2;
- pid_t pid = fork();
+ pid_t pid;
int ret;
int child_ret;
+
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+ pid = fork();
FAIL_IF(pid == -1);
pid2 = fork();
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
index 63de9c6e2cd3..d1601bb889d4 100644
--- a/tools/testing/selftests/powerpc/math/vsx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -92,6 +92,8 @@ int test_preempt_vsx(void)
int i, rc, threads;
pthread_t *tids;
+ SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 2ca523255b1b..aac4a59f9e28 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -8,3 +8,7 @@ wild_bctr
large_vm_fork_separation
bad_accesses
tlbie_test
+pkey_exec_prot
+pkey_siginfo
+stack_expansion_ldst
+stack_expansion_signal
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index b9103c4bb414..defe488d6bf1 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -3,22 +3,32 @@ noarg:
$(MAKE) -C ../
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
- large_vm_fork_separation bad_accesses
+ large_vm_fork_separation bad_accesses pkey_exec_prot \
+ pkey_siginfo stack_expansion_signal stack_expansion_ldst
+
TEST_GEN_PROGS_EXTENDED := tlbie_test
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
include ../../lib.mk
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
$(OUTPUT)/prot_sao: ../utils.c
$(OUTPUT)/wild_bctr: CFLAGS += -m64
$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
$(OUTPUT)/bad_accesses: CFLAGS += -m64
+$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
+$(OUTPUT)/pkey_siginfo: CFLAGS += -m64
+
+$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c
+
+$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector
+$(OUTPUT)/stack_expansion_ldst: ../utils.c
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
+$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index adc465f499ef..fd747b2ffcfc 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -64,34 +64,6 @@ int bad_access(char *p, bool write)
return 0;
}
-static int using_hash_mmu(bool *using_hash)
-{
- char line[128];
- FILE *f;
- int rc;
-
- f = fopen("/proc/cpuinfo", "r");
- FAIL_IF(!f);
-
- rc = 0;
- while (fgets(line, sizeof(line), f) != NULL) {
- if (strcmp(line, "MMU : Hash\n") == 0) {
- *using_hash = true;
- goto out;
- }
-
- if (strcmp(line, "MMU : Radix\n") == 0) {
- *using_hash = false;
- goto out;
- }
- }
-
- rc = -1;
-out:
- fclose(f);
- return rc;
-}
-
static int test(void)
{
unsigned long i, j, addr, region_shift, page_shift, page_size;
@@ -167,5 +139,6 @@ static int test(void)
int main(void)
{
+ test_harness_set_timeout(300);
return test_harness(test, "bad_accesses");
}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
new file mode 100644
index 000000000000..9e5c7f3f498a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if applying execute protection on pages using memory
+ * protection keys works as expected.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP 0x60000000
+#define PPC_INST_TRAP 0x7fe00008
+#define PPC_INST_BLR 0x4e800020
+
+static volatile sig_atomic_t fault_pkey, fault_code, fault_type;
+static volatile sig_atomic_t remaining_faults;
+static volatile unsigned int *fault_addr;
+static unsigned long pgsize, numinsns;
+static unsigned int *insns;
+
+static void trap_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr)
+ sigsafe_err("got a fault for an unexpected address\n");
+
+ _exit(1);
+}
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ int signal_pkey;
+
+ signal_pkey = siginfo_pkey(sinfo);
+ fault_code = sinfo->si_code;
+
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr) {
+ sigsafe_err("got a fault for an unexpected address\n");
+ _exit(1);
+ }
+
+ /* Check if too many faults have occurred for a single test case */
+ if (!remaining_faults) {
+ sigsafe_err("got too many faults for the same address\n");
+ _exit(1);
+ }
+
+
+ /* Restore permissions in order to continue */
+ switch (fault_code) {
+ case SEGV_ACCERR:
+ if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) {
+ sigsafe_err("failed to set access permissions\n");
+ _exit(1);
+ }
+ break;
+ case SEGV_PKUERR:
+ if (signal_pkey != fault_pkey) {
+ sigsafe_err("got a fault for an unexpected pkey\n");
+ _exit(1);
+ }
+
+ switch (fault_type) {
+ case PKEY_DISABLE_ACCESS:
+ pkey_set_rights(fault_pkey, 0);
+ break;
+ case PKEY_DISABLE_EXECUTE:
+ /*
+ * Reassociate the exec-only pkey with the region
+ * to be able to continue. Unlike AMR, we cannot
+ * set IAMR directly from userspace to restore the
+ * permissions.
+ */
+ if (mprotect(insns, pgsize, PROT_EXEC)) {
+ sigsafe_err("failed to set execute permissions\n");
+ _exit(1);
+ }
+ break;
+ default:
+ sigsafe_err("got a fault with an unexpected type\n");
+ _exit(1);
+ }
+ break;
+ default:
+ sigsafe_err("got a fault with an unexpected code\n");
+ _exit(1);
+ }
+
+ remaining_faults--;
+}
+
+static int test(void)
+{
+ struct sigaction segv_act, trap_act;
+ unsigned long rights;
+ int pkey, ret, i;
+
+ ret = pkeys_unsupported();
+ if (ret)
+ return ret;
+
+ /* Setup SIGSEGV handler */
+ segv_act.sa_handler = 0;
+ segv_act.sa_sigaction = segv_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0);
+ segv_act.sa_flags = SA_SIGINFO;
+ segv_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0);
+
+ /* Setup SIGTRAP handler */
+ trap_act.sa_handler = 0;
+ trap_act.sa_sigaction = trap_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0);
+ trap_act.sa_flags = SA_SIGINFO;
+ trap_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0);
+
+ /* Setup executable region */
+ pgsize = getpagesize();
+ numinsns = pgsize / sizeof(unsigned int);
+ insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(insns == MAP_FAILED);
+
+ /* Write the instruction words */
+ for (i = 1; i < numinsns - 1; i++)
+ insns[i] = PPC_INST_NOP;
+
+ /*
+ * Set the first instruction as an unconditional trap. If
+ * the last write to this address succeeds, this should
+ * get overwritten by a no-op.
+ */
+ insns[0] = PPC_INST_TRAP;
+
+ /*
+ * Later, to jump to the executable region, we use a branch
+ * and link instruction (bctrl) which sets the return address
+ * automatically in LR. Use that to return back.
+ */
+ insns[numinsns - 1] = PPC_INST_BLR;
+
+ /* Allocate a pkey that restricts execution */
+ rights = PKEY_DISABLE_EXECUTE;
+ pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF(pkey < 0);
+
+ /*
+ * Pick the first instruction's address from the executable
+ * region.
+ */
+ fault_addr = insns;
+
+ /* The following two cases will avoid SEGV_PKUERR */
+ fault_type = -1;
+ fault_pkey = -1;
+
+ /*
+ * Read an instruction word from the address when AMR bits
+ * are not set i.e. the pkey permits both read and write
+ * access.
+ *
+ * This should not generate a fault as having PROT_EXEC
+ * implies PROT_READ on GNU systems. The pkey currently
+ * restricts execution only based on the IAMR bits. The
+ * AMR bits are cleared.
+ */
+ remaining_faults = 0;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("read from %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ i = *fault_addr;
+ FAIL_IF(remaining_faults != 0);
+
+ /*
+ * Write an instruction word to the address when AMR bits
+ * are not set i.e. the pkey permits both read and write
+ * access.
+ *
+ * This should generate an access fault as having just
+ * PROT_EXEC also restricts writes. The pkey currently
+ * restricts execution only based on the IAMR bits. The
+ * AMR bits are cleared.
+ */
+ remaining_faults = 1;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("write to %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ *fault_addr = PPC_INST_TRAP;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+ /* The following three cases will generate SEGV_PKUERR */
+ rights |= PKEY_DISABLE_ACCESS;
+ fault_type = PKEY_DISABLE_ACCESS;
+ fault_pkey = pkey;
+
+ /*
+ * Read an instruction word from the address when AMR bits
+ * are set i.e. the pkey permits neither read nor write
+ * access.
+ *
+ * This should generate a pkey fault based on AMR bits only
+ * as having PROT_EXEC implicitly allows reads.
+ */
+ remaining_faults = 1;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ pkey_set_rights(pkey, rights);
+ printf("read from %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ i = *fault_addr;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR);
+
+ /*
+ * Write an instruction word to the address when AMR bits
+ * are set i.e. the pkey permits neither read nor write
+ * access.
+ *
+ * This should generate two faults. First, a pkey fault
+ * based on AMR bits and then an access fault since
+ * PROT_EXEC does not allow writes.
+ */
+ remaining_faults = 2;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ pkey_set_rights(pkey, rights);
+ printf("write to %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ *fault_addr = PPC_INST_NOP;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+ /* Free the current pkey */
+ sys_pkey_free(pkey);
+
+ rights = 0;
+ do {
+ /*
+ * Allocate pkeys with all valid combinations of read,
+ * write and execute restrictions.
+ */
+ pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF(pkey < 0);
+
+ /*
+ * Jump to the executable region. AMR bits may or may not
+ * be set but they should not affect execution.
+ *
+ * This should generate pkey faults based on IAMR bits which
+ * may be set to restrict execution.
+ *
+ * The first iteration also checks if the overwrite of the
+ * first instruction word from a trap to a no-op succeeded.
+ */
+ fault_pkey = pkey;
+ fault_type = -1;
+ remaining_faults = 0;
+ if (rights & PKEY_DISABLE_EXECUTE) {
+ fault_type = PKEY_DISABLE_EXECUTE;
+ remaining_faults = 1;
+ }
+
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("execute at %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ asm volatile("mtctr %0; bctrl" : : "r"(insns));
+ FAIL_IF(remaining_faults != 0);
+ if (rights & PKEY_DISABLE_EXECUTE)
+ FAIL_IF(fault_code != SEGV_PKUERR);
+
+ /* Free the current pkey */
+ sys_pkey_free(pkey);
+
+ /* Find next valid combination of pkey rights */
+ rights = next_pkey_rights(rights);
+ } while (rights);
+
+ /* Cleanup */
+ munmap((void *) insns, pgsize);
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness(test, "pkey_exec_prot");
+}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
new file mode 100644
index 000000000000..4f815d7c1214
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if the signal information reports the correct memory protection
+ * key upon getting a key access violation fault for a page that was
+ * attempted to be protected by two different keys from two competing
+ * threads at the same time.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP 0x60000000
+#define PPC_INST_BLR 0x4e800020
+#define PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC)
+
+#define NUM_ITERATIONS 1000000
+
+static volatile sig_atomic_t perm_pkey, rest_pkey;
+static volatile sig_atomic_t rights, fault_count;
+static volatile unsigned int *volatile fault_addr;
+static pthread_barrier_t iteration_barrier;
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ void *pgstart;
+ size_t pgsize;
+ int pkey;
+
+ pkey = siginfo_pkey(sinfo);
+
+ /* Check if this fault originated from a pkey access violation */
+ if (sinfo->si_code != SEGV_PKUERR) {
+ sigsafe_err("got a fault for an unexpected reason\n");
+ _exit(1);
+ }
+
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr) {
+ sigsafe_err("got a fault for an unexpected address\n");
+ _exit(1);
+ }
+
+ /* Check if this fault originated from the restrictive pkey */
+ if (pkey != rest_pkey) {
+ sigsafe_err("got a fault for an unexpected pkey\n");
+ _exit(1);
+ }
+
+ /* Check if too many faults have occurred for the same iteration */
+ if (fault_count > 0) {
+ sigsafe_err("got too many faults for the same address\n");
+ _exit(1);
+ }
+
+ pgsize = getpagesize();
+ pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1));
+
+ /*
+ * If the current fault occurred due to lack of execute rights,
+ * reassociate the page with the exec-only pkey since execute
+ * rights cannot be changed directly for the faulting pkey as
+ * IAMR is inaccessible from userspace.
+ *
+ * Otherwise, if the current fault occurred due to lack of
+ * read-write rights, change the AMR permission bits for the
+ * pkey.
+ *
+ * This will let the test continue.
+ */
+ if (rights == PKEY_DISABLE_EXECUTE &&
+ mprotect(pgstart, pgsize, PROT_EXEC))
+ _exit(1);
+ else
+ pkey_set_rights(pkey, 0);
+
+ fault_count++;
+}
+
+struct region {
+ unsigned long rights;
+ unsigned int *base;
+ size_t size;
+};
+
+static void *protect(void *p)
+{
+ unsigned long rights;
+ unsigned int *base;
+ size_t size;
+ int tid, i;
+
+ tid = gettid();
+ base = ((struct region *) p)->base;
+ size = ((struct region *) p)->size;
+ FAIL_IF_EXIT(!base);
+
+ /* No read, write and execute restrictions */
+ rights = 0;
+
+ printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+
+ /* Allocate the permissive pkey */
+ perm_pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF_EXIT(perm_pkey < 0);
+
+ /*
+ * Repeatedly try to protect the common region with a permissive
+ * pkey
+ */
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ /*
+ * Wait until the other thread has finished allocating the
+ * restrictive pkey or until the next iteration has begun
+ */
+ pthread_barrier_wait(&iteration_barrier);
+
+ /* Try to associate the permissive pkey with the region */
+ FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+ perm_pkey));
+ }
+
+ /* Free the permissive pkey */
+ sys_pkey_free(perm_pkey);
+
+ return NULL;
+}
+
+static void *protect_access(void *p)
+{
+ size_t size, numinsns;
+ unsigned int *base;
+ int tid, i;
+
+ tid = gettid();
+ base = ((struct region *) p)->base;
+ size = ((struct region *) p)->size;
+ rights = ((struct region *) p)->rights;
+ numinsns = size / sizeof(base[0]);
+ FAIL_IF_EXIT(!base);
+
+ /* Allocate the restrictive pkey */
+ rest_pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF_EXIT(rest_pkey < 0);
+
+ printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+ printf("tid %d, %s randomly in range [%p, %p]\n", tid,
+ (rights == PKEY_DISABLE_EXECUTE) ? "execute" :
+ (rights == PKEY_DISABLE_WRITE) ? "write" : "read",
+ base, base + numinsns);
+
+ /*
+ * Repeatedly try to protect the common region with a restrictive
+ * pkey and read, write or execute from it
+ */
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ /*
+ * Wait until the other thread has finished allocating the
+ * permissive pkey or until the next iteration has begun
+ */
+ pthread_barrier_wait(&iteration_barrier);
+
+ /* Try to associate the restrictive pkey with the region */
+ FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+ rest_pkey));
+
+ /* Choose a random instruction word address from the region */
+ fault_addr = base + (rand() % numinsns);
+ fault_count = 0;
+
+ switch (rights) {
+ /* Read protection test */
+ case PKEY_DISABLE_ACCESS:
+ /*
+ * Read an instruction word from the region and
+ * verify if it has not been overwritten to
+ * something unexpected
+ */
+ FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP &&
+ *fault_addr != PPC_INST_BLR);
+ break;
+
+ /* Write protection test */
+ case PKEY_DISABLE_WRITE:
+ /*
+ * Write an instruction word to the region and
+ * verify if the overwrite has succeeded
+ */
+ *fault_addr = PPC_INST_BLR;
+ FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR);
+ break;
+
+ /* Execute protection test */
+ case PKEY_DISABLE_EXECUTE:
+ /* Jump to the region and execute instructions */
+ asm volatile(
+ "mtctr %0; bctrl"
+ : : "r"(fault_addr) : "ctr", "lr");
+ break;
+ }
+
+ /*
+ * Restore the restrictions originally imposed by the
+ * restrictive pkey as the signal handler would have
+ * cleared out the corresponding AMR bits
+ */
+ pkey_set_rights(rest_pkey, rights);
+ }
+
+ /* Free restrictive pkey */
+ sys_pkey_free(rest_pkey);
+
+ return NULL;
+}
+
+static void reset_pkeys(unsigned long rights)
+{
+ int pkeys[NR_PKEYS], i;
+
+ /* Exhaustively allocate all available pkeys */
+ for (i = 0; i < NR_PKEYS; i++)
+ pkeys[i] = sys_pkey_alloc(0, rights);
+
+ /* Free all allocated pkeys */
+ for (i = 0; i < NR_PKEYS; i++)
+ sys_pkey_free(pkeys[i]);
+}
+
+static int test(void)
+{
+ pthread_t prot_thread, pacc_thread;
+ struct sigaction act;
+ pthread_attr_t attr;
+ size_t numinsns;
+ struct region r;
+ int ret, i;
+
+ srand(time(NULL));
+ ret = pkeys_unsupported();
+ if (ret)
+ return ret;
+
+ /* Allocate the region */
+ r.size = getpagesize();
+ r.base = mmap(NULL, r.size, PROT_RWX,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(r.base == MAP_FAILED);
+
+ /*
+ * Fill the region with no-ops with a branch at the end
+ * for returning to the caller
+ */
+ numinsns = r.size / sizeof(r.base[0]);
+ for (i = 0; i < numinsns - 1; i++)
+ r.base[i] = PPC_INST_NOP;
+ r.base[i] = PPC_INST_BLR;
+
+ /* Setup SIGSEGV handler */
+ act.sa_handler = 0;
+ act.sa_sigaction = segv_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0);
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+ /*
+ * For these tests, the parent process should clear all bits of
+ * AMR and IAMR, i.e. impose no restrictions, for all available
+ * pkeys. This will be the base for the initial AMR and IAMR
+ * values for all the test thread pairs.
+ *
+ * If the AMR and IAMR bits of all available pkeys are cleared
+ * before running the tests and a fault is generated when
+ * attempting to read, write or execute instructions from a
+ * pkey protected region, the pkey responsible for this must be
+ * the one from the protect-and-access thread since the other
+ * one is fully permissive. Despite that, if the pkey reported
+ * by siginfo is not the restrictive pkey, then there must be a
+ * kernel bug.
+ */
+ reset_pkeys(0);
+
+ /* Setup barrier for protect and protect-and-access threads */
+ FAIL_IF(pthread_attr_init(&attr) != 0);
+ FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0);
+
+ /* Setup and start protect and protect-and-read threads */
+ puts("starting thread pair (protect, protect-and-read)");
+ r.rights = PKEY_DISABLE_ACCESS;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Setup and start protect and protect-and-write threads */
+ puts("starting thread pair (protect, protect-and-write)");
+ r.rights = PKEY_DISABLE_WRITE;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Setup and start protect and protect-and-execute threads */
+ puts("starting thread pair (protect, protect-and-execute)");
+ r.rights = PKEY_DISABLE_EXECUTE;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Cleanup */
+ FAIL_IF(pthread_attr_destroy(&attr) != 0);
+ FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0);
+ munmap(r.base, r.size);
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness(test, "pkey_siginfo");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
index e2eed65b7735..30b71b1d78d5 100644
--- a/tools/testing/selftests/powerpc/mm/prot_sao.c
+++ b/tools/testing/selftests/powerpc/mm/prot_sao.c
@@ -7,6 +7,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
+#include <unistd.h>
#include <asm/cputable.h>
@@ -18,8 +19,13 @@ int test_prot_sao(void)
{
char *p;
- /* 2.06 or later should support SAO */
- SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+ /*
+ * SAO was introduced in 2.06 and removed in 3.1. It's disabled in
+ * guests/LPARs by default, so also skip if we are running in a guest.
+ */
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) ||
+ have_hwcap2(PPC_FEATURE2_ARCH_3_1) ||
+ access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0);
/*
* Ensure we can ask for PROT_SAO.
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
new file mode 100644
index 000000000000..ed9143990888
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that loads/stores expand the stack segment, or trigger a SEGV, in
+ * various conditions.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+volatile char *stack_top_ptr;
+volatile unsigned long stack_top_sp;
+volatile char c;
+
+enum access_type {
+ LOAD,
+ STORE,
+};
+
+/*
+ * Consume stack until the stack pointer is below @target_sp, then do an access
+ * (load or store) at offset @delta from either the base of the stack or the
+ * current stack pointer.
+ */
+__attribute__ ((noinline))
+int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type)
+{
+ unsigned long target;
+ char stack_cur;
+
+ if ((unsigned long)&stack_cur > target_sp)
+ return consume_stack(target_sp, stack_high, delta, type);
+ else {
+ // We don't really need this, but without it GCC might not
+ // generate a recursive call above.
+ stack_top_ptr = &stack_cur;
+
+#ifdef __powerpc__
+ asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp));
+#else
+ asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
+#endif
+ target = stack_high - delta + 1;
+ volatile char *p = (char *)target;
+
+ if (type == STORE)
+ *p = c;
+ else
+ c = *p;
+
+ // Do something to prevent the stack frame being popped prior to
+ // our access above.
+ getpid();
+ }
+
+ return 0;
+}
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+ unsigned long start, end;
+ static char buf[4096];
+ char name[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+ &start, &end, name);
+ if (rc == 2)
+ continue;
+
+ if (rc != 3) {
+ printf("sscanf errored\n");
+ rc = -1;
+ break;
+ }
+
+ if (strstr(name, needle)) {
+ *low = start;
+ *high = end - 1;
+ rc = 0;
+ break;
+ }
+ }
+
+ fclose(f);
+
+ return rc;
+}
+
+int child(unsigned int stack_used, int delta, enum access_type type)
+{
+ unsigned long low, stack_high;
+
+ assert(search_proc_maps("[stack]", &low, &stack_high) == 0);
+
+ assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0);
+
+ printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n",
+ type == LOAD ? "load" : "store", delta, stack_used, stack_high,
+ stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1);
+
+ return 0;
+}
+
+static int test_one(unsigned int stack_used, int delta, enum access_type type)
+{
+ pid_t pid;
+ int rc;
+
+ pid = fork();
+ if (pid == 0)
+ exit(child(stack_used, delta, type));
+
+ assert(waitpid(pid, &rc, 0) != -1);
+
+ if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0)
+ return 0;
+
+ // We don't expect a non-zero exit that's not a signal
+ assert(!WIFEXITED(rc));
+
+ printf("Faulted: %s delta %-7d used size 0x%06x signal %d\n",
+ type == LOAD ? "load" : "store", delta, stack_used,
+ WTERMSIG(rc));
+
+ return 1;
+}
+
+// This is fairly arbitrary but is well below any of the targets below,
+// so that the delta between the stack pointer and the target is large.
+#define DEFAULT_SIZE (32 * _KB)
+
+static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
+{
+ unsigned long delta;
+
+ // We should be able to access anywhere within the rlimit
+ for (delta = page_size; delta <= rlim_cur; delta += page_size)
+ assert(test_one(DEFAULT_SIZE, delta, type) == 0);
+
+ assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
+
+ // But if we go past the rlimit it should fail
+ assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
+}
+
+static int test(void)
+{
+ unsigned long page_size;
+ struct rlimit rlimit;
+
+ page_size = getpagesize();
+ getrlimit(RLIMIT_STACK, &rlimit);
+ printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur);
+
+ printf("Testing loads ...\n");
+ test_one_type(LOAD, page_size, rlimit.rlim_cur);
+ printf("Testing stores ...\n");
+ test_one_type(STORE, page_size, rlimit.rlim_cur);
+
+ printf("All OK\n");
+
+ return 0;
+}
+
+#ifdef __powerpc__
+#include "utils.h"
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_ldst");
+}
+#else
+int main(void)
+{
+ return test();
+}
+#endif
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
new file mode 100644
index 000000000000..c8b32a29e274
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that signal delivery is able to expand the stack segment without
+ * triggering a SEGV.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../pmu/lib.h"
+#include "utils.h"
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+static char *stack_base_ptr;
+static char *stack_top_ptr;
+
+static volatile sig_atomic_t sig_occurred = 0;
+
+static void sigusr1_handler(int signal_arg)
+{
+ sig_occurred = 1;
+}
+
+static int consume_stack(unsigned int stack_size, union pipe write_pipe)
+{
+ char stack_cur;
+
+ if ((stack_base_ptr - &stack_cur) < stack_size)
+ return consume_stack(stack_size, write_pipe);
+ else {
+ stack_top_ptr = &stack_cur;
+
+ FAIL_IF(notify_parent(write_pipe));
+
+ while (!sig_occurred)
+ barrier();
+ }
+
+ return 0;
+}
+
+static int child(unsigned int stack_size, union pipe write_pipe)
+{
+ struct sigaction act;
+ char stack_base;
+
+ act.sa_handler = sigusr1_handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ if (sigaction(SIGUSR1, &act, NULL) < 0)
+ err(1, "sigaction");
+
+ stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL);
+
+ FAIL_IF(consume_stack(stack_size, write_pipe));
+
+ printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n",
+ stack_size, stack_base_ptr, stack_top_ptr,
+ stack_base_ptr - stack_top_ptr);
+
+ return 0;
+}
+
+static int test_one_size(unsigned int stack_size)
+{
+ union pipe read_pipe, write_pipe;
+ pid_t pid;
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+ exit(child(stack_size, read_pipe));
+ }
+
+ close(read_pipe.write_fd);
+ close(write_pipe.read_fd);
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ kill(pid, SIGUSR1);
+
+ FAIL_IF(wait_for_child(pid));
+
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+
+ return 0;
+}
+
+int test(void)
+{
+ unsigned int i, size;
+
+ // Test with used stack from 1MB - 64K to 1MB + 64K
+ // Increment by 64 to get more coverage of odd sizes
+ for (i = 0; i < (128 * _KB); i += 64) {
+ size = i + (1 * _MB) - (64 * _KB);
+ FAIL_IF(test_one_size(size));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_signal");
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
new file mode 100644
index 000000000000..5a7118495cb3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
@@ -0,0 +1 @@
+SUBSYSTEM=="nxgzip", KERNEL=="nx-gzip", MODE="0666"
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
new file mode 100644
index 000000000000..640fad6cc2c7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -0,0 +1,8 @@
+CFLAGS = -O3 -m64 -I./include
+
+TEST_GEN_FILES := gzfht_test gunz_test
+TEST_PROGS := nx-gzip-test.sh
+
+include ../../lib.mk
+
+$(TEST_GEN_FILES): gzip_vas.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/README b/tools/testing/selftests/powerpc/nx-gzip/README
new file mode 100644
index 000000000000..9809dbaa1905
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/README
@@ -0,0 +1,45 @@
+Test the nx-gzip function:
+=========================
+
+Verify that following device exists:
+ /dev/crypto/nx-gzip
+If you get a permission error run as sudo or set the device permissions:
+ sudo chmod go+rw /dev/crypto/nx-gzip
+However, chmod may not survive across boots. You may create a udev file such
+as:
+ /etc/udev/rules.d/99-nx-gzip.rules
+
+
+To manually build and run:
+$ gcc -O3 -I./include -o gzfht_test gzfht_test.c gzip_vas.c
+$ gcc -O3 -I./include -o gunz_test gunz_test.c gzip_vas.c
+
+
+Compress any file using Fixed Huffman mode. Output will have a .nx.gz suffix:
+$ ./gzfht_test gzip_vas.c
+file gzip_vas.c read, 6413 bytes
+compressed 6413 to 3124 bytes total, crc32 checksum = abd15e8a
+
+
+Uncompress the previous output. Output will have a .nx.gunzip suffix:
+./gunz_test gzip_vas.c.nx.gz
+gzHeader FLG 0
+00 00 00 00 04 03
+gzHeader MTIME, XFL, OS ignored
+computed checksum abd15e8a isize 0000190d
+stored checksum abd15e8a isize 0000190d
+decomp is complete: fclose
+
+
+Compare two files:
+$ sha1sum gzip_vas.c.nx.gz.nx.gunzip gzip_vas.c
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6 gzip_vas.c.nx.gz.nx.gunzip
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6 gzip_vas.c
+
+
+Note that the code here are intended for testing the nx-gzip hardware function.
+They are not intended for demonstrating performance or compression ratio.
+By being simplistic these selftests expect to allocate the entire set of source
+and target pages in the memory so it needs enough memory to work.
+For more information and source code consider using:
+https://github.com/libnxz/power-gzip
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
new file mode 100644
index 000000000000..7c23d3dd7d6d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
@@ -0,0 +1,1028 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gunzip sample code for demonstrating the P9 NX hardware
+ * interface. Not intended for productive uses or for performance or
+ * compression ratio measurements. Note also that /dev/crypto/gzip,
+ * VAS and skiboot support are required
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce: completion extension
+ * cpb: coprocessor parameter block (metadata)
+ * crb: coprocessor request block (command)
+ * csb: coprocessor status block (status)
+ * dht: dynamic huffman table
+ * dde: data descriptor element (address, length)
+ * ddl: list of ddes
+ * dh/fh: dynamic and fixed huffman types
+ * fc: coprocessor function code
+ * histlen: history/dictionary length
+ * history: sliding window of up to 32KB of data
+ * lzcount: Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt: source final block type; last block's type during decomp
+ * spbc: source processed byte count
+ * subc: source unprocessed bit count
+ * tebc: target ending bit count; valid bits in the last byte
+ * tpbc: target processed byte count
+ * vas: virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE // For aligned_alloc()
+#define _DEFAULT_SOURCE // For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+#include "crb.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y))?(X):(Y))
+#define NX_MAX(X, Y) (((X) > (Y))?(X):(Y))
+
+#define GETINPC(X) fgetc(X)
+#define FNAME_MAX 1024
+
+/* fifo queue management */
+#define fifo_used_bytes(used) (used)
+#define fifo_free_bytes(used, len) ((len)-(used))
+/* amount of free bytes in the first and last parts */
+#define fifo_free_first_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? (len)-((cur)+(used)) : 0)
+#define fifo_free_last_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? (cur) : (len)-(used))
+/* amount of used bytes in the first and last parts */
+#define fifo_used_first_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? (used) : (len)-(cur))
+#define fifo_used_last_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? 0 : ((used)+(cur))-(len))
+/* first and last free parts start here */
+#define fifo_free_first_offset(cur, used) ((cur)+(used))
+#define fifo_free_last_offset(cur, used, len) \
+ fifo_used_last_bytes(cur, used, len)
+/* first and last used parts start here */
+#define fifo_used_first_offset(cur) (cur)
+#define fifo_used_last_offset(cur) (0)
+
+const int fifo_in_len = 1<<24;
+const int fifo_out_len = 1<<24;
+const int page_sz = 1<<16;
+const int line_sz = 1<<7;
+const int window_max = 1<<15;
+
+/*
+ * Adds an (address, len) pair to the list of ddes (ddl) and updates
+ * the base dde. ddl[0] is the only dde in a direct dde which
+ * contains a single (addr,len) pair. For more pairs, ddl[0] becomes
+ * the indirect (base) dde that points to a list of direct ddes.
+ * See Section 6.4 of the NX-gzip user manual for DDE description.
+ * Addr=NULL, len=0 clears the ddl[0]. Returns the total number of
+ * bytes in ddl. Caller is responsible for allocting the array of
+ * nx_dde_t *ddl. If N addresses are required in the scatter-gather
+ * list, the ddl array must have N+1 entries minimum.
+ */
+static inline uint32_t nx_append_dde(struct nx_dde_t *ddl, void *addr,
+ uint32_t len)
+{
+ uint32_t ddecnt;
+ uint32_t bytes;
+
+ if (addr == NULL && len == 0) {
+ clearp_dde(ddl);
+ return 0;
+ }
+
+ NXPRT(fprintf(stderr, "%d: %s addr %p len %x\n", __LINE__, addr,
+ __func__, len));
+
+ /* Number of ddes in the dde list ; == 0 when it is a direct dde */
+ ddecnt = getpnn(ddl, dde_count);
+ bytes = getp32(ddl, ddebc);
+
+ if (ddecnt == 0 && bytes == 0) {
+ /* First dde is unused; make it a direct dde */
+ bytes = len;
+ putp32(ddl, ddebc, bytes);
+ putp64(ddl, ddead, (uint64_t) addr);
+ } else if (ddecnt == 0) {
+ /* Converting direct to indirect dde
+ * ddl[0] becomes head dde of ddl
+ * copy direct to indirect first.
+ */
+ ddl[1] = ddl[0];
+
+ /* Add the new dde next */
+ clear_dde(ddl[2]);
+ put32(ddl[2], ddebc, len);
+ put64(ddl[2], ddead, (uint64_t) addr);
+
+ /* Ddl head points to 2 direct ddes */
+ ddecnt = 2;
+ putpnn(ddl, dde_count, ddecnt);
+ bytes = bytes + len;
+ putp32(ddl, ddebc, bytes);
+ /* Pointer to the first direct dde */
+ putp64(ddl, ddead, (uint64_t) &ddl[1]);
+ } else {
+ /* Append a dde to an existing indirect ddl */
+ ++ddecnt;
+ clear_dde(ddl[ddecnt]);
+ put64(ddl[ddecnt], ddead, (uint64_t) addr);
+ put32(ddl[ddecnt], ddebc, len);
+
+ putpnn(ddl, dde_count, ddecnt);
+ bytes = bytes + len;
+ putp32(ddl, ddebc, bytes); /* byte sum of all dde */
+ }
+ return bytes;
+}
+
+/*
+ * Touch specified number of pages represented in number bytes
+ * beginning from the first buffer in a dde list.
+ * Do not touch the pages past buf_sz-th byte's page.
+ *
+ * Set buf_sz = 0 to touch all pages described by the ddep.
+ */
+static int nx_touch_pages_dde(struct nx_dde_t *ddep, long buf_sz, long page_sz,
+ int wr)
+{
+ uint32_t indirect_count;
+ uint32_t buf_len;
+ long total;
+ uint64_t buf_addr;
+ struct nx_dde_t *dde_list;
+ int i;
+
+ assert(!!ddep);
+
+ indirect_count = getpnn(ddep, dde_count);
+
+ NXPRT(fprintf(stderr, "%s dde_count %d request len ", __func__,
+ indirect_count));
+ NXPRT(fprintf(stderr, "0x%lx\n", buf_sz));
+
+ if (indirect_count == 0) {
+ /* Direct dde */
+ buf_len = getp32(ddep, ddebc);
+ buf_addr = getp64(ddep, ddead);
+
+ NXPRT(fprintf(stderr, "touch direct ddebc 0x%x ddead %p\n",
+ buf_len, (void *)buf_addr));
+
+ if (buf_sz == 0)
+ nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+ else
+ nxu_touch_pages((void *)buf_addr, NX_MIN(buf_len,
+ buf_sz), page_sz, wr);
+
+ return ERR_NX_OK;
+ }
+
+ /* Indirect dde */
+ if (indirect_count > MAX_DDE_COUNT)
+ return ERR_NX_EXCESSIVE_DDE;
+
+ /* First address of the list */
+ dde_list = (struct nx_dde_t *) getp64(ddep, ddead);
+
+ if (buf_sz == 0)
+ buf_sz = getp32(ddep, ddebc);
+
+ total = 0;
+ for (i = 0; i < indirect_count; i++) {
+ buf_len = get32(dde_list[i], ddebc);
+ buf_addr = get64(dde_list[i], ddead);
+ total += buf_len;
+
+ NXPRT(fprintf(stderr, "touch loop len 0x%x ddead %p total ",
+ buf_len, (void *)buf_addr));
+ NXPRT(fprintf(stderr, "0x%lx\n", total));
+
+ /* Touching fewer pages than encoded in the ddebc */
+ if (total > buf_sz) {
+ buf_len = NX_MIN(buf_len, total - buf_sz);
+ nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+ NXPRT(fprintf(stderr, "touch loop break len 0x%x ",
+ buf_len));
+ NXPRT(fprintf(stderr, "ddead %p\n", (void *)buf_addr));
+ break;
+ }
+ nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+ }
+ return ERR_NX_OK;
+}
+
+/*
+ * Src and dst buffers are supplied in scatter gather lists.
+ * NX function code and other parameters supplied in cmdp.
+ */
+static int nx_submit_job(struct nx_dde_t *src, struct nx_dde_t *dst,
+ struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ uint64_t csbaddr;
+
+ memset((void *)&cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+ cmdp->crb.source_dde = *src;
+ cmdp->crb.target_dde = *dst;
+
+ /* Status, output byte count in tpbc */
+ csbaddr = ((uint64_t) &cmdp->crb.csb) & csb_address_mask;
+ put64(cmdp->crb, csb_address, csbaddr);
+
+ /* NX reports input bytes in spbc; cleared */
+ cmdp->cpb.out_spbc_comp_wrap = 0;
+ cmdp->cpb.out_spbc_comp_with_count = 0;
+ cmdp->cpb.out_spbc_decomp = 0;
+
+ /* Clear output */
+ put32(cmdp->cpb, out_crc, INIT_CRC);
+ put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+ /* Submit the crb, the job descriptor, to the accelerator. */
+ return nxu_submit_job(cmdp, handle);
+}
+
+int decompress_file(int argc, char **argv, void *devhandle)
+{
+ FILE *inpf = NULL;
+ FILE *outf = NULL;
+
+ int c, expect, i, cc, rc = 0;
+ char gzfname[FNAME_MAX];
+
+ /* Queuing, file ops, byte counting */
+ char *fifo_in, *fifo_out;
+ int used_in, cur_in, used_out, cur_out, read_sz, n;
+ int first_free, last_free, first_used, last_used;
+ int first_offset, last_offset;
+ int write_sz, free_space, source_sz;
+ int source_sz_estimate, target_sz_estimate;
+ uint64_t last_comp_ratio = 0; /* 1000 max */
+ uint64_t total_out = 0;
+ int is_final, is_eof;
+
+ /* nx hardware */
+ int sfbt, subc, spbc, tpbc, nx_ce, fc, resuming = 0;
+ int history_len = 0;
+ struct nx_gzip_crb_cpb_t cmd, *cmdp;
+ struct nx_dde_t *ddl_in;
+ struct nx_dde_t dde_in[6] __aligned(128);
+ struct nx_dde_t *ddl_out;
+ struct nx_dde_t dde_out[6] __aligned(128);
+ int pgfault_retries;
+
+ /* when using mmap'ed files */
+ off_t input_file_offset;
+
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s <fname> or stdin\n", argv[0]);
+ fprintf(stderr, " writes to stdout or <fname>.nx.gunzip\n");
+ return -1;
+ }
+
+ if (argc == 1) {
+ inpf = stdin;
+ outf = stdout;
+ } else if (argc == 2) {
+ char w[1024];
+ char *wp;
+
+ inpf = fopen(argv[1], "r");
+ if (inpf == NULL) {
+ perror(argv[1]);
+ return -1;
+ }
+
+ /* Make a new file name to write to. Ignoring '.gz' */
+ wp = (NULL != (wp = strrchr(argv[1], '/'))) ? (wp+1) : argv[1];
+ strcpy(w, wp);
+ strcat(w, ".nx.gunzip");
+
+ outf = fopen(w, "w");
+ if (outf == NULL) {
+ perror(w);
+ return -1;
+ }
+ }
+
+ /* Decode the gzip header */
+ c = GETINPC(inpf); expect = 0x1f; /* ID1 */
+ if (c != expect)
+ goto err1;
+
+ c = GETINPC(inpf); expect = 0x8b; /* ID2 */
+ if (c != expect)
+ goto err1;
+
+ c = GETINPC(inpf); expect = 0x08; /* CM */
+ if (c != expect)
+ goto err1;
+
+ int flg = GETINPC(inpf); /* FLG */
+
+ if (flg & 0xE0 || flg & 0x4 || flg == EOF)
+ goto err2;
+
+ fprintf(stderr, "gzHeader FLG %x\n", flg);
+
+ /* Read 6 bytes; ignoring the MTIME, XFL, OS fields in this
+ * sample code.
+ */
+ for (i = 0; i < 6; i++) {
+ char tmp[10];
+
+ tmp[i] = GETINPC(inpf);
+ if (tmp[i] == EOF)
+ goto err3;
+ fprintf(stderr, "%02x ", tmp[i]);
+ if (i == 5)
+ fprintf(stderr, "\n");
+ }
+ fprintf(stderr, "gzHeader MTIME, XFL, OS ignored\n");
+
+ /* FNAME */
+ if (flg & 0x8) {
+ int k = 0;
+
+ do {
+ c = GETINPC(inpf);
+ if (c == EOF || k >= FNAME_MAX)
+ goto err3;
+ gzfname[k++] = c;
+ } while (c);
+ fprintf(stderr, "gzHeader FNAME: %s\n", gzfname);
+ }
+
+ /* FHCRC */
+ if (flg & 0x2) {
+ c = GETINPC(inpf);
+ if (c == EOF)
+ goto err3;
+ c = GETINPC(inpf);
+ if (c == EOF)
+ goto err3;
+ fprintf(stderr, "gzHeader FHCRC: ignored\n");
+ }
+
+ used_in = cur_in = used_out = cur_out = 0;
+ is_final = is_eof = 0;
+
+ /* Allocate one page larger to prevent page faults due to NX
+ * overfetching.
+ * Either do this (char*)(uintptr_t)aligned_alloc or use
+ * -std=c11 flag to make the int-to-pointer warning go away.
+ */
+ assert((fifo_in = (char *)(uintptr_t)aligned_alloc(line_sz,
+ fifo_in_len + page_sz)) != NULL);
+ assert((fifo_out = (char *)(uintptr_t)aligned_alloc(line_sz,
+ fifo_out_len + page_sz + line_sz)) != NULL);
+ /* Leave unused space due to history rounding rules */
+ fifo_out = fifo_out + line_sz;
+ nxu_touch_pages(fifo_out, fifo_out_len, page_sz, 1);
+
+ ddl_in = &dde_in[0];
+ ddl_out = &dde_out[0];
+ cmdp = &cmd;
+ memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+read_state:
+
+ /* Read from .gz file */
+
+ NXPRT(fprintf(stderr, "read_state:\n"));
+
+ if (is_eof != 0)
+ goto write_state;
+
+ /* We read in to fifo_in in two steps: first: read in to from
+ * cur_in to the end of the buffer. last: if free space wrapped
+ * around, read from fifo_in offset 0 to offset cur_in.
+ */
+
+ /* Reset fifo head to reduce unnecessary wrap arounds */
+ cur_in = (used_in == 0) ? 0 : cur_in;
+
+ /* Free space total is reduced by a gap */
+ free_space = NX_MAX(0, fifo_free_bytes(used_in, fifo_in_len)
+ - line_sz);
+
+ /* Free space may wrap around as first and last */
+ first_free = fifo_free_first_bytes(cur_in, used_in, fifo_in_len);
+ last_free = fifo_free_last_bytes(cur_in, used_in, fifo_in_len);
+
+ /* Start offsets of the free memory */
+ first_offset = fifo_free_first_offset(cur_in, used_in);
+ last_offset = fifo_free_last_offset(cur_in, used_in, fifo_in_len);
+
+ /* Reduce read_sz because of the line_sz gap */
+ read_sz = NX_MIN(free_space, first_free);
+ n = 0;
+ if (read_sz > 0) {
+ /* Read in to offset cur_in + used_in */
+ n = fread(fifo_in + first_offset, 1, read_sz, inpf);
+ used_in = used_in + n;
+ free_space = free_space - n;
+ assert(n <= read_sz);
+ if (n != read_sz) {
+ /* Either EOF or error; exit the read loop */
+ is_eof = 1;
+ goto write_state;
+ }
+ }
+
+ /* If free space wrapped around */
+ if (last_free > 0) {
+ /* Reduce read_sz because of the line_sz gap */
+ read_sz = NX_MIN(free_space, last_free);
+ n = 0;
+ if (read_sz > 0) {
+ n = fread(fifo_in + last_offset, 1, read_sz, inpf);
+ used_in = used_in + n; /* Increase used space */
+ free_space = free_space - n; /* Decrease free space */
+ assert(n <= read_sz);
+ if (n != read_sz) {
+ /* Either EOF or error; exit the read loop */
+ is_eof = 1;
+ goto write_state;
+ }
+ }
+ }
+
+ /* At this point we have used_in bytes in fifo_in with the
+ * data head starting at cur_in and possibly wrapping around.
+ */
+
+write_state:
+
+ /* Write decompressed data to output file */
+
+ NXPRT(fprintf(stderr, "write_state:\n"));
+
+ if (used_out == 0)
+ goto decomp_state;
+
+ /* If fifo_out has data waiting, write it out to the file to
+ * make free target space for the accelerator used bytes in
+ * the first and last parts of fifo_out.
+ */
+
+ first_used = fifo_used_first_bytes(cur_out, used_out, fifo_out_len);
+ last_used = fifo_used_last_bytes(cur_out, used_out, fifo_out_len);
+
+ write_sz = first_used;
+
+ n = 0;
+ if (write_sz > 0) {
+ n = fwrite(fifo_out + cur_out, 1, write_sz, outf);
+ used_out = used_out - n;
+ /* Move head of the fifo */
+ cur_out = (cur_out + n) % fifo_out_len;
+ assert(n <= write_sz);
+ if (n != write_sz) {
+ fprintf(stderr, "error: write\n");
+ rc = -1;
+ goto err5;
+ }
+ }
+
+ if (last_used > 0) { /* If more data available in the last part */
+ write_sz = last_used; /* Keep it here for later */
+ n = 0;
+ if (write_sz > 0) {
+ n = fwrite(fifo_out, 1, write_sz, outf);
+ used_out = used_out - n;
+ cur_out = (cur_out + n) % fifo_out_len;
+ assert(n <= write_sz);
+ if (n != write_sz) {
+ fprintf(stderr, "error: write\n");
+ rc = -1;
+ goto err5;
+ }
+ }
+ }
+
+decomp_state:
+
+ /* NX decompresses input data */
+
+ NXPRT(fprintf(stderr, "decomp_state:\n"));
+
+ if (is_final)
+ goto finish_state;
+
+ /* Address/len lists */
+ clearp_dde(ddl_in);
+ clearp_dde(ddl_out);
+
+ /* FC, CRC, HistLen, Table 6-6 */
+ if (resuming) {
+ /* Resuming a partially decompressed input.
+ * The key to resume is supplying the 32KB
+ * dictionary (history) to NX, which is basically
+ * the last 32KB of output produced.
+ */
+ fc = GZIP_FC_DECOMPRESS_RESUME;
+
+ cmdp->cpb.in_crc = cmdp->cpb.out_crc;
+ cmdp->cpb.in_adler = cmdp->cpb.out_adler;
+
+ /* Round up the history size to quadword. Section 2.10 */
+ history_len = (history_len + 15) / 16;
+ putnn(cmdp->cpb, in_histlen, history_len);
+ history_len = history_len * 16; /* bytes */
+
+ if (history_len > 0) {
+ /* Chain in the history buffer to the DDE list */
+ if (cur_out >= history_len) {
+ nx_append_dde(ddl_in, fifo_out
+ + (cur_out - history_len),
+ history_len);
+ } else {
+ nx_append_dde(ddl_in, fifo_out
+ + ((fifo_out_len + cur_out)
+ - history_len),
+ history_len - cur_out);
+ /* Up to 32KB history wraps around fifo_out */
+ nx_append_dde(ddl_in, fifo_out, cur_out);
+ }
+
+ }
+ } else {
+ /* First decompress job */
+ fc = GZIP_FC_DECOMPRESS;
+
+ history_len = 0;
+ /* Writing 0 clears out subc as well */
+ cmdp->cpb.in_histlen = 0;
+ total_out = 0;
+
+ put32(cmdp->cpb, in_crc, INIT_CRC);
+ put32(cmdp->cpb, in_adler, INIT_ADLER);
+ put32(cmdp->cpb, out_crc, INIT_CRC);
+ put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+ /* Assuming 10% compression ratio initially; use the
+ * most recently measured compression ratio as a
+ * heuristic to estimate the input and output
+ * sizes. If we give too much input, the target buffer
+ * overflows and NX cycles are wasted, and then we
+ * must retry with smaller input size. 1000 is 100%.
+ */
+ last_comp_ratio = 100UL;
+ }
+ cmdp->crb.gzip_fc = 0;
+ putnn(cmdp->crb, gzip_fc, fc);
+
+ /*
+ * NX source buffers
+ */
+ first_used = fifo_used_first_bytes(cur_in, used_in, fifo_in_len);
+ last_used = fifo_used_last_bytes(cur_in, used_in, fifo_in_len);
+
+ if (first_used > 0)
+ nx_append_dde(ddl_in, fifo_in + cur_in, first_used);
+
+ if (last_used > 0)
+ nx_append_dde(ddl_in, fifo_in, last_used);
+
+ /*
+ * NX target buffers
+ */
+ first_free = fifo_free_first_bytes(cur_out, used_out, fifo_out_len);
+ last_free = fifo_free_last_bytes(cur_out, used_out, fifo_out_len);
+
+ /* Reduce output free space amount not to overwrite the history */
+ int target_max = NX_MAX(0, fifo_free_bytes(used_out, fifo_out_len)
+ - (1<<16));
+
+ NXPRT(fprintf(stderr, "target_max %d (0x%x)\n", target_max,
+ target_max));
+
+ first_free = NX_MIN(target_max, first_free);
+ if (first_free > 0) {
+ first_offset = fifo_free_first_offset(cur_out, used_out);
+ nx_append_dde(ddl_out, fifo_out + first_offset, first_free);
+ }
+
+ if (last_free > 0) {
+ last_free = NX_MIN(target_max - first_free, last_free);
+ if (last_free > 0) {
+ last_offset = fifo_free_last_offset(cur_out, used_out,
+ fifo_out_len);
+ nx_append_dde(ddl_out, fifo_out + last_offset,
+ last_free);
+ }
+ }
+
+ /* Target buffer size is used to limit the source data size
+ * based on previous measurements of compression ratio.
+ */
+
+ /* source_sz includes history */
+ source_sz = getp32(ddl_in, ddebc);
+ assert(source_sz > history_len);
+ source_sz = source_sz - history_len;
+
+ /* Estimating how much source is needed to 3/4 fill a
+ * target_max size target buffer. If we overshoot, then NX
+ * must repeat the job with smaller input and we waste
+ * bandwidth. If we undershoot then we use more NX calls than
+ * necessary.
+ */
+
+ source_sz_estimate = ((uint64_t)target_max * last_comp_ratio * 3UL)
+ / 4000;
+
+ if (source_sz_estimate < source_sz) {
+ /* Target might be small, therefore limiting the
+ * source data.
+ */
+ source_sz = source_sz_estimate;
+ target_sz_estimate = target_max;
+ } else {
+ /* Source file might be small, therefore limiting target
+ * touch pages to a smaller value to save processor cycles.
+ */
+ target_sz_estimate = ((uint64_t)source_sz * 1000UL)
+ / (last_comp_ratio + 1);
+ target_sz_estimate = NX_MIN(2 * target_sz_estimate,
+ target_max);
+ }
+
+ source_sz = source_sz + history_len;
+
+ /* Some NX condition codes require submitting the NX job again.
+ * Kernel doesn't handle NX page faults. Expects user code to
+ * touch pages.
+ */
+ pgfault_retries = NX_MAX_FAULTS;
+
+restart_nx:
+
+ putp32(ddl_in, ddebc, source_sz);
+
+ /* Fault in pages */
+ nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), page_sz, 1);
+ nx_touch_pages_dde(ddl_in, 0, page_sz, 0);
+ nx_touch_pages_dde(ddl_out, target_sz_estimate, page_sz, 1);
+
+ /* Send job to NX */
+ cc = nx_submit_job(ddl_in, ddl_out, cmdp, devhandle);
+
+ switch (cc) {
+
+ case ERR_NX_AT_FAULT:
+
+ /* We touched the pages ahead of time. In the most common case
+ * we shouldn't be here. But may be some pages were paged out.
+ * Kernel should have placed the faulting address to fsaddr.
+ */
+ NXPRT(fprintf(stderr, "ERR_NX_AT_FAULT %p\n",
+ (void *)cmdp->crb.csb.fsaddr));
+
+ if (pgfault_retries == NX_MAX_FAULTS) {
+ /* Try once with exact number of pages */
+ --pgfault_retries;
+ goto restart_nx;
+ } else if (pgfault_retries > 0) {
+ /* If still faulting try fewer input pages
+ * assuming memory outage
+ */
+ if (source_sz > page_sz)
+ source_sz = NX_MAX(source_sz / 2, page_sz);
+ --pgfault_retries;
+ goto restart_nx;
+ } else {
+ fprintf(stderr, "cannot make progress; too many ");
+ fprintf(stderr, "page fault retries cc= %d\n", cc);
+ rc = -1;
+ goto err5;
+ }
+
+ case ERR_NX_DATA_LENGTH:
+
+ NXPRT(fprintf(stderr, "ERR_NX_DATA_LENGTH; "));
+ NXPRT(fprintf(stderr, "stream may have trailing data\n"));
+
+ /* Not an error in the most common case; it just says
+ * there is trailing data that we must examine.
+ *
+ * CC=3 CE(1)=0 CE(0)=1 indicates partial completion
+ * Fig.6-7 and Table 6-8.
+ */
+ nx_ce = get_csb_ce_ms3b(cmdp->crb.csb);
+
+ if (!csb_ce_termination(nx_ce) &&
+ csb_ce_partial_completion(nx_ce)) {
+ /* Check CPB for more information
+ * spbc and tpbc are valid
+ */
+ sfbt = getnn(cmdp->cpb, out_sfbt); /* Table 6-4 */
+ subc = getnn(cmdp->cpb, out_subc); /* Table 6-4 */
+ spbc = get32(cmdp->cpb, out_spbc_decomp);
+ tpbc = get32(cmdp->crb.csb, tpbc);
+ assert(target_max >= tpbc);
+
+ goto ok_cc3; /* not an error */
+ } else {
+ /* History length error when CE(1)=1 CE(0)=0. */
+ rc = -1;
+ fprintf(stderr, "history length error cc= %d\n", cc);
+ goto err5;
+ }
+
+ case ERR_NX_TARGET_SPACE:
+
+ /* Target buffer not large enough; retry smaller input
+ * data; give at least 1 byte. SPBC/TPBC are not valid.
+ */
+ assert(source_sz > history_len);
+ source_sz = ((source_sz - history_len + 2) / 2) + history_len;
+ NXPRT(fprintf(stderr, "ERR_NX_TARGET_SPACE; retry with "));
+ NXPRT(fprintf(stderr, "smaller input data src %d hist %d\n",
+ source_sz, history_len));
+ goto restart_nx;
+
+ case ERR_NX_OK:
+
+ /* This should not happen for gzip formatted data;
+ * we need trailing crc and isize
+ */
+ fprintf(stderr, "ERR_NX_OK\n");
+ spbc = get32(cmdp->cpb, out_spbc_decomp);
+ tpbc = get32(cmdp->crb.csb, tpbc);
+ assert(target_max >= tpbc);
+ assert(spbc >= history_len);
+ source_sz = spbc - history_len;
+ goto offsets_state;
+
+ default:
+ fprintf(stderr, "error: cc= %d\n", cc);
+ rc = -1;
+ goto err5;
+ }
+
+ok_cc3:
+
+ NXPRT(fprintf(stderr, "cc3: sfbt: %x\n", sfbt));
+
+ assert(spbc > history_len);
+ source_sz = spbc - history_len;
+
+ /* Table 6-4: Source Final Block Type (SFBT) describes the
+ * last processed deflate block and clues the software how to
+ * resume the next job. SUBC indicates how many input bits NX
+ * consumed but did not process. SPBC indicates how many
+ * bytes of source were given to the accelerator including
+ * history bytes.
+ */
+
+ switch (sfbt) {
+ int dhtlen;
+
+ case 0x0: /* Deflate final EOB received */
+
+ /* Calculating the checksum start position. */
+
+ source_sz = source_sz - subc / 8;
+ is_final = 1;
+ break;
+
+ /* Resume decompression cases are below. Basically
+ * indicates where NX has suspended and how to resume
+ * the input stream.
+ */
+
+ case 0x8: /* Within a literal block; use rembytecount */
+ case 0x9: /* Within a literal block; use rembytecount; bfinal=1 */
+
+ /* Supply the partially processed source byte again */
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* SUBC LS 3bits: number of bits in the first source byte need
+ * to be processed.
+ * 000 means all 8 bits; Table 6-3
+ * Clear subc, histlen, sfbt, rembytecnt, dhtlen
+ */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+ putnn(cmdp->cpb, in_rembytecnt, getnn(cmdp->cpb,
+ out_rembytecnt));
+ break;
+
+ case 0xA: /* Within a FH block; */
+ case 0xB: /* Within a FH block; bfinal=1 */
+
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+ break;
+
+ case 0xC: /* Within a DH block; */
+ case 0xD: /* Within a DH block; bfinal=1 */
+
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+
+ dhtlen = getnn(cmdp->cpb, out_dhtlen);
+ putnn(cmdp->cpb, in_dhtlen, dhtlen);
+ assert(dhtlen >= 42);
+
+ /* Round up to a qword */
+ dhtlen = (dhtlen + 127) / 128;
+
+ while (dhtlen > 0) { /* Copy dht from cpb.out to cpb.in */
+ --dhtlen;
+ cmdp->cpb.in_dht[dhtlen] = cmdp->cpb.out_dht[dhtlen];
+ }
+ break;
+
+ case 0xE: /* Within a block header; bfinal=0; */
+ /* Also given if source data exactly ends (SUBC=0) with
+ * EOB code with BFINAL=0. Means the next byte will
+ * contain a block header.
+ */
+ case 0xF: /* within a block header with BFINAL=1. */
+
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+
+ /* Engine did not process any data */
+ if (is_eof && (source_sz == 0))
+ is_final = 1;
+ }
+
+offsets_state:
+
+ /* Adjust the source and target buffer offsets and lengths */
+
+ NXPRT(fprintf(stderr, "offsets_state:\n"));
+
+ /* Delete input data from fifo_in */
+ used_in = used_in - source_sz;
+ cur_in = (cur_in + source_sz) % fifo_in_len;
+ input_file_offset = input_file_offset + source_sz;
+
+ /* Add output data to fifo_out */
+ used_out = used_out + tpbc;
+
+ assert(used_out <= fifo_out_len);
+
+ total_out = total_out + tpbc;
+
+ /* Deflate history is 32KB max. No need to supply more
+ * than 32KB on a resume.
+ */
+ history_len = (total_out > window_max) ? window_max : total_out;
+
+ /* To estimate expected expansion in the next NX job; 500 means 50%.
+ * Deflate best case is around 1 to 1000.
+ */
+ last_comp_ratio = (1000UL * ((uint64_t)source_sz + 1))
+ / ((uint64_t)tpbc + 1);
+ last_comp_ratio = NX_MAX(NX_MIN(1000UL, last_comp_ratio), 1);
+ NXPRT(fprintf(stderr, "comp_ratio %ld source_sz %d spbc %d tpbc %d\n",
+ last_comp_ratio, source_sz, spbc, tpbc));
+
+ resuming = 1;
+
+finish_state:
+
+ NXPRT(fprintf(stderr, "finish_state:\n"));
+
+ if (is_final) {
+ if (used_out)
+ goto write_state; /* More data to write out */
+ else if (used_in < 8) {
+ /* Need at least 8 more bytes containing gzip crc
+ * and isize.
+ */
+ rc = -1;
+ goto err4;
+ } else {
+ /* Compare checksums and exit */
+ int i;
+ unsigned char tail[8];
+ uint32_t cksum, isize;
+
+ for (i = 0; i < 8; i++)
+ tail[i] = fifo_in[(cur_in + i) % fifo_in_len];
+ fprintf(stderr, "computed checksum %08x isize %08x\n",
+ cmdp->cpb.out_crc, (uint32_t) (total_out
+ % (1ULL<<32)));
+ cksum = ((uint32_t) tail[0] | (uint32_t) tail[1]<<8
+ | (uint32_t) tail[2]<<16
+ | (uint32_t) tail[3]<<24);
+ isize = ((uint32_t) tail[4] | (uint32_t) tail[5]<<8
+ | (uint32_t) tail[6]<<16
+ | (uint32_t) tail[7]<<24);
+ fprintf(stderr, "stored checksum %08x isize %08x\n",
+ cksum, isize);
+
+ if (cksum == cmdp->cpb.out_crc && isize == (uint32_t)
+ (total_out % (1ULL<<32))) {
+ rc = 0; goto ok1;
+ } else {
+ rc = -1; goto err4;
+ }
+ }
+ } else
+ goto read_state;
+
+ return -1;
+
+err1:
+ fprintf(stderr, "error: not a gzip file, expect %x, read %x\n",
+ expect, c);
+ return -1;
+
+err2:
+ fprintf(stderr, "error: the FLG byte is wrong or not being handled\n");
+ return -1;
+
+err3:
+ fprintf(stderr, "error: gzip header\n");
+ return -1;
+
+err4:
+ fprintf(stderr, "error: checksum missing or mismatch\n");
+
+err5:
+ok1:
+ fprintf(stderr, "decomp is complete: fclose\n");
+ fclose(outf);
+
+ return rc;
+}
+
+
+int main(int argc, char **argv)
+{
+ int rc;
+ struct sigaction act;
+ void *handle;
+
+ nx_dbg = 0;
+ nx_gzip_log = NULL;
+ act.sa_handler = 0;
+ act.sa_sigaction = nxu_sigsegv_handler;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGSEGV, &act, NULL);
+
+ handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+ if (!handle) {
+ fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+ exit(-1);
+ }
+
+ rc = decompress_file(argc, argv, handle);
+
+ nx_function_end(handle);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
new file mode 100644
index 000000000000..02dffb65de48
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
+ * Not intended for productive uses or for performance or compression
+ * ratio measurements. For simplicity of demonstration, this sample
+ * code compresses in to fixed Huffman blocks only (Deflate btype=1)
+ * and has very simple memory management. Dynamic Huffman blocks
+ * (Deflate btype=2) are more involved as detailed in the user guide.
+ * Note also that /dev/crypto/gzip, VAS and skiboot support are
+ * required.
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce: completion extension
+ * cpb: coprocessor parameter block (metadata)
+ * crb: coprocessor request block (command)
+ * csb: coprocessor status block (status)
+ * dht: dynamic huffman table
+ * dde: data descriptor element (address, length)
+ * ddl: list of ddes
+ * dh/fh: dynamic and fixed huffman types
+ * fc: coprocessor function code
+ * histlen: history/dictionary length
+ * history: sliding window of up to 32KB of data
+ * lzcount: Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt: source final block type; last block's type during decomp
+ * spbc: source processed byte count
+ * subc: source unprocessed bit count
+ * tebc: target ending bit count; valid bits in the last byte
+ * tpbc: target processed byte count
+ * vas: virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE // For aligned_alloc()
+#define _DEFAULT_SOURCE // For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define FNAME_MAX 1024
+#define FEXT ".nx.gz"
+
+/*
+ * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
+ */
+static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
+ uint32_t dstlen, int with_count,
+ struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ uint32_t fc;
+
+ assert(!!cmdp);
+
+ put32(cmdp->crb, gzip_fc, 0); /* clear */
+ fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
+ GZIP_FC_COMPRESS_RESUME_FHT;
+ putnn(cmdp->crb, gzip_fc, fc);
+ putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
+ memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+ /* Section 6.6 programming notes; spbc may be in two different
+ * places depending on FC.
+ */
+ if (!with_count)
+ put32(cmdp->cpb, out_spbc_comp, 0);
+ else
+ put32(cmdp->cpb, out_spbc_comp_with_count, 0);
+
+ /* Figure 6-3 6-4; CSB location */
+ put64(cmdp->crb, csb_address, 0);
+ put64(cmdp->crb, csb_address,
+ (uint64_t) &cmdp->crb.csb & csb_address_mask);
+
+ /* Source direct dde (scatter-gather list) */
+ clear_dde(cmdp->crb.source_dde);
+ putnn(cmdp->crb.source_dde, dde_count, 0);
+ put32(cmdp->crb.source_dde, ddebc, srclen);
+ put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
+
+ /* Target direct dde (scatter-gather list) */
+ clear_dde(cmdp->crb.target_dde);
+ putnn(cmdp->crb.target_dde, dde_count, 0);
+ put32(cmdp->crb.target_dde, ddebc, dstlen);
+ put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
+
+ /* Submit the crb, the job descriptor, to the accelerator */
+ return nxu_submit_job(cmdp, handle);
+}
+
+/*
+ * Prepares a blank no filename no timestamp gzip header and returns
+ * the number of bytes written to buf.
+ * Gzip specification at https://tools.ietf.org/html/rfc1952
+ */
+int gzip_header_blank(char *buf)
+{
+ int i = 0;
+
+ buf[i++] = 0x1f; /* ID1 */
+ buf[i++] = 0x8b; /* ID2 */
+ buf[i++] = 0x08; /* CM */
+ buf[i++] = 0x00; /* FLG */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x04; /* XFL 4=fastest */
+ buf[i++] = 0x03; /* OS UNIX */
+
+ return i;
+}
+
+/* Caller must free the allocated buffer return nonzero on error. */
+int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
+{
+ struct stat statbuf;
+ FILE *fp;
+ char *p;
+ size_t num_bytes;
+
+ if (stat(fname, &statbuf)) {
+ perror(fname);
+ return(-1);
+ }
+ fp = fopen(fname, "r");
+ if (fp == NULL) {
+ perror(fname);
+ return(-1);
+ }
+ assert(NULL != (p = (char *) malloc(statbuf.st_size)));
+ num_bytes = fread(p, 1, statbuf.st_size, fp);
+ if (ferror(fp) || (num_bytes != statbuf.st_size)) {
+ perror(fname);
+ return(-1);
+ }
+ *buf = p;
+ *bufsize = num_bytes;
+ return 0;
+}
+
+/* Returns nonzero on error */
+int write_output_file(char *fname, char *buf, size_t bufsize)
+{
+ FILE *fp;
+ size_t num_bytes;
+
+ fp = fopen(fname, "w");
+ if (fp == NULL) {
+ perror(fname);
+ return(-1);
+ }
+ num_bytes = fwrite(buf, 1, bufsize, fp);
+ if (ferror(fp) || (num_bytes != bufsize)) {
+ perror(fname);
+ return(-1);
+ }
+ fclose(fp);
+ return 0;
+}
+
+/*
+ * Z_SYNC_FLUSH as described in zlib.h.
+ * Returns number of appended bytes
+ */
+int append_sync_flush(char *buf, int tebc, int final)
+{
+ uint64_t flush;
+ int shift = (tebc & 0x7);
+
+ if (tebc > 0) {
+ /* Last byte is partially full */
+ buf = buf - 1;
+ *buf = *buf & (unsigned char) ((1<<tebc)-1);
+ } else
+ *buf = 0;
+ flush = ((0x1ULL & final) << shift) | *buf;
+ shift = shift + 3; /* BFINAL and BTYPE written */
+ shift = (shift <= 8) ? 8 : 16;
+ flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
+ shift = shift + 32;
+ while (shift > 0) {
+ *buf++ = (unsigned char) (flush & 0xffULL);
+ flush = flush >> 8;
+ shift = shift - 8;
+ }
+ return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
+}
+
+/*
+ * Final deflate block bit. This call assumes the block
+ * beginning is byte aligned.
+ */
+static void set_bfinal(void *buf, int bfinal)
+{
+ char *b = buf;
+
+ if (bfinal)
+ *b = *b | (unsigned char) 0x01;
+ else
+ *b = *b & (unsigned char) 0xfe;
+}
+
+int compress_file(int argc, char **argv, void *handle)
+{
+ char *inbuf, *outbuf, *srcbuf, *dstbuf;
+ char outname[FNAME_MAX];
+ uint32_t srclen, dstlen;
+ uint32_t flushlen, chunk;
+ size_t inlen, outlen, dsttotlen, srctotlen;
+ uint32_t crc, spbc, tpbc, tebc;
+ int lzcounts = 0;
+ int cc;
+ int num_hdr_bytes;
+ struct nx_gzip_crb_cpb_t *cmdp;
+ uint32_t pagelen = 65536;
+ int fault_tries = NX_MAX_FAULTS;
+
+ cmdp = (void *)(uintptr_t)
+ aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
+ sizeof(struct nx_gzip_crb_cpb_t));
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s <fname>\n", argv[0]);
+ exit(-1);
+ }
+ if (read_alloc_input_file(argv[1], &inbuf, &inlen))
+ exit(-1);
+ fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
+
+ /* Generous output buffer for header/trailer */
+ outlen = 2 * inlen + 1024;
+
+ assert(NULL != (outbuf = (char *)malloc(outlen)));
+ nxu_touch_pages(outbuf, outlen, pagelen, 1);
+
+ /* Compress piecemeal in smallish chunks */
+ chunk = 1<<22;
+
+ /* Write the gzip header to the stream */
+ num_hdr_bytes = gzip_header_blank(outbuf);
+ dstbuf = outbuf + num_hdr_bytes;
+ outlen = outlen - num_hdr_bytes;
+ dsttotlen = num_hdr_bytes;
+
+ srcbuf = inbuf;
+ srctotlen = 0;
+
+ /* Init the CRB, the coprocessor request block */
+ memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+ /* Initial gzip crc32 */
+ put32(cmdp->cpb, in_crc, 0);
+
+ while (inlen > 0) {
+
+ /* Submit chunk size source data per job */
+ srclen = NX_MIN(chunk, inlen);
+ /* Supply large target in case data expands */
+ dstlen = NX_MIN(2*srclen, outlen);
+
+ /* Page faults are handled by the user code */
+
+ /* Fault-in pages; an improved code wouldn't touch so
+ * many pages but would try to estimate the
+ * compression ratio and adjust both the src and dst
+ * touch amounts.
+ */
+ nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
+ 1);
+ nxu_touch_pages(srcbuf, srclen, pagelen, 0);
+ nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
+
+ cc = compress_fht_sample(
+ srcbuf, srclen,
+ dstbuf, dstlen,
+ lzcounts, cmdp, handle);
+
+ if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
+ cc != ERR_NX_AT_FAULT) {
+ fprintf(stderr, "nx error: cc= %d\n", cc);
+ exit(-1);
+ }
+
+ /* Page faults are handled by the user code */
+ if (cc == ERR_NX_AT_FAULT) {
+ NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
+ NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
+ fault_tries,
+ (unsigned long long) cmdp->crb.csb.fsaddr));
+ fault_tries--;
+ if (fault_tries > 0) {
+ continue;
+ } else {
+ fprintf(stderr, "error: cannot progress; ");
+ fprintf(stderr, "too many faults\n");
+ exit(-1);
+ };
+ }
+
+ fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
+
+ inlen = inlen - srclen;
+ srcbuf = srcbuf + srclen;
+ srctotlen = srctotlen + srclen;
+
+ /* Two possible locations for spbc depending on the function
+ * code.
+ */
+ spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
+ get32(cmdp->cpb, out_spbc_comp_with_count);
+ assert(spbc == srclen);
+
+ /* Target byte count */
+ tpbc = get32(cmdp->crb.csb, tpbc);
+ /* Target ending bit count */
+ tebc = getnn(cmdp->cpb, out_tebc);
+ NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
+ NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
+
+ if (inlen > 0) { /* More chunks to go */
+ set_bfinal(dstbuf, 0);
+ dstbuf = dstbuf + tpbc;
+ dsttotlen = dsttotlen + tpbc;
+ outlen = outlen - tpbc;
+ /* Round up to the next byte with a flush
+ * block; do not set the BFINAqL bit.
+ */
+ flushlen = append_sync_flush(dstbuf, tebc, 0);
+ dsttotlen = dsttotlen + flushlen;
+ outlen = outlen - flushlen;
+ dstbuf = dstbuf + flushlen;
+ NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
+ flushlen));
+ } else { /* Done */
+ /* Set the BFINAL bit of the last block per Deflate
+ * specification.
+ */
+ set_bfinal(dstbuf, 1);
+ dstbuf = dstbuf + tpbc;
+ dsttotlen = dsttotlen + tpbc;
+ outlen = outlen - tpbc;
+ }
+
+ /* Resuming crc32 for the next chunk */
+ crc = get32(cmdp->cpb, out_crc);
+ put32(cmdp->cpb, in_crc, crc);
+ crc = be32toh(crc);
+ }
+
+ /* Append crc32 and ISIZE to the end */
+ memcpy(dstbuf, &crc, 4);
+ memcpy(dstbuf+4, &srctotlen, 4);
+ dsttotlen = dsttotlen + 8;
+ outlen = outlen - 8;
+
+ assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
+ strcpy(outname, argv[1]);
+ strcat(outname, FEXT);
+ if (write_output_file(outname, outbuf, dsttotlen)) {
+ fprintf(stderr, "write error: %s\n", outname);
+ exit(-1);
+ }
+
+ fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
+ dsttotlen);
+ fprintf(stderr, "crc32 checksum = %08x\n", crc);
+
+ if (inbuf != NULL)
+ free(inbuf);
+
+ if (outbuf != NULL)
+ free(outbuf);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int rc;
+ struct sigaction act;
+ void *handle;
+
+ nx_dbg = 0;
+ nx_gzip_log = NULL;
+ act.sa_handler = 0;
+ act.sa_sigaction = nxu_sigsegv_handler;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGSEGV, &act, NULL);
+
+ handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+ if (!handle) {
+ fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+ exit(-1);
+ }
+
+ rc = compress_file(argc, argv, handle);
+
+ nx_function_end(handle);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
new file mode 100644
index 000000000000..c055885da40a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "vas-api.h"
+#include "nx.h"
+#include "copy-paste.h"
+#include "nxu.h"
+#include "nx_dbg.h"
+#include <sys/platform/ppc.h>
+
+#define barrier()
+#define hwsync() ({ asm volatile("sync" ::: "memory"); })
+
+#ifndef NX_NO_CPU_PRI
+#define cpu_pri_default() ({ asm volatile ("or 2, 2, 2"); })
+#define cpu_pri_low() ({ asm volatile ("or 31, 31, 31"); })
+#else
+#define cpu_pri_default()
+#define cpu_pri_low()
+#endif
+
+void *nx_fault_storage_address;
+
+struct nx_handle {
+ int fd;
+ int function;
+ void *paste_addr;
+};
+
+static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
+{
+ int rc, fd;
+ void *addr;
+ struct vas_tx_win_open_attr txattr;
+
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, " open device name %s\n", devname);
+ return -errno;
+ }
+
+ memset(&txattr, 0, sizeof(txattr));
+ txattr.version = 1;
+ txattr.vas_id = pri;
+ rc = ioctl(fd, VAS_TX_WIN_OPEN, (unsigned long)&txattr);
+ if (rc < 0) {
+ fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
+ rc = -errno;
+ goto out;
+ }
+
+ addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
+ if (addr == MAP_FAILED) {
+ fprintf(stderr, "mmap() failed, errno %d\n", errno);
+ rc = -errno;
+ goto out;
+ }
+ handle->fd = fd;
+ handle->paste_addr = (void *)((char *)addr + 0x400);
+
+ rc = 0;
+out:
+ close(fd);
+ return rc;
+}
+
+void *nx_function_begin(int function, int pri)
+{
+ int rc;
+ char *devname = "/dev/crypto/nx-gzip";
+ struct nx_handle *nxhandle;
+
+ if (function != NX_FUNC_COMP_GZIP) {
+ errno = EINVAL;
+ fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
+ return NULL;
+ }
+
+
+ nxhandle = malloc(sizeof(*nxhandle));
+ if (!nxhandle) {
+ errno = ENOMEM;
+ fprintf(stderr, " No memory\n");
+ return NULL;
+ }
+
+ nxhandle->function = function;
+ rc = open_device_nodes(devname, pri, nxhandle);
+ if (rc < 0) {
+ errno = -rc;
+ fprintf(stderr, " open_device_nodes failed\n");
+ return NULL;
+ }
+
+ return nxhandle;
+}
+
+int nx_function_end(void *handle)
+{
+ int rc = 0;
+ struct nx_handle *nxhandle = handle;
+
+ rc = munmap(nxhandle->paste_addr - 0x400, 4096);
+ if (rc < 0) {
+ fprintf(stderr, "munmap() failed, errno %d\n", errno);
+ return rc;
+ }
+ close(nxhandle->fd);
+ free(nxhandle);
+
+ return rc;
+}
+
+static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
+{
+ long poll = 0;
+ uint64_t t;
+
+ /* Save power and let other threads use the h/w. top may show
+ * 100% but only because OS doesn't know we slowed the this
+ * h/w thread while polling. We're letting other threads have
+ * higher throughput on the core.
+ */
+ cpu_pri_low();
+
+#define CSB_MAX_POLL 200000000UL
+#define USLEEP_TH 300000UL
+
+ t = __ppc_get_timebase();
+
+ while (getnn(cmdp->crb.csb, csb_v) == 0) {
+ ++poll;
+ hwsync();
+
+ cpu_pri_low();
+
+ /* usleep(0) takes around 29000 ticks ~60 us.
+ * 300000 is spinning for about 600 us then
+ * start sleeping.
+ */
+ if ((__ppc_get_timebase() - t) > USLEEP_TH) {
+ cpu_pri_default();
+ usleep(1);
+ }
+
+ if (poll > CSB_MAX_POLL)
+ break;
+
+ /* Fault address from signal handler */
+ if (nx_fault_storage_address) {
+ cpu_pri_default();
+ return -EAGAIN;
+ }
+
+ }
+
+ cpu_pri_default();
+
+ /* hw has updated csb and output buffer */
+ hwsync();
+
+ /* Check CSB flags. */
+ if (getnn(cmdp->crb.csb, csb_v) == 0) {
+ fprintf(stderr, "CSB still not valid after %d polls.\n",
+ (int) poll);
+ prt_err("CSB still not valid after %d polls, giving up.\n",
+ (int) poll);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ int i, ret, retries;
+ struct nx_handle *nxhandle = handle;
+
+ assert(handle != NULL);
+ i = 0;
+ retries = 5000;
+ while (i++ < retries) {
+ hwsync();
+ vas_copy(&cmdp->crb, 0);
+ ret = vas_paste(nxhandle->paste_addr, 0);
+ hwsync();
+
+ NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
+ i, retries, ret));
+
+ if ((ret == 2) || (ret == 3)) {
+
+ ret = nx_wait_for_csb(cmdp);
+ if (!ret) {
+ goto out;
+ } else if (ret == -EAGAIN) {
+ long x;
+
+ prt_err("Touching address %p, 0x%lx\n",
+ nx_fault_storage_address,
+ *(long *) nx_fault_storage_address);
+ x = *(long *) nx_fault_storage_address;
+ *(long *) nx_fault_storage_address = x;
+ nx_fault_storage_address = 0;
+ continue;
+ } else {
+ prt_err("wait_for_csb() returns %d\n", ret);
+ break;
+ }
+ } else {
+ if (i < 10) {
+ /* spin for few ticks */
+#define SPIN_TH 500UL
+ uint64_t fail_spin;
+
+ fail_spin = __ppc_get_timebase();
+ while ((__ppc_get_timebase() - fail_spin) <
+ SPIN_TH)
+ ;
+ } else {
+ /* sleep */
+ unsigned int pr = 0;
+
+ if (pr++ % 100 == 0) {
+ prt_err("Paste attempt %d/", i);
+ prt_err("%d, failed pid= %d\n", retries,
+ getpid());
+ }
+ usleep(1);
+ }
+ continue;
+ }
+ }
+
+out:
+ cpu_pri_default();
+
+ return ret;
+}
+
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ int cc;
+
+ cc = nxu_run_job(cmdp, handle);
+
+ if (!cc)
+ cc = getnn(cmdp->crb.csb, csb_cc); /* CC Table 6-8 */
+
+ return cc;
+}
+
+
+void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx)
+{
+ fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
+ sig, info->si_code, info->si_addr);
+
+ nx_fault_storage_address = info->si_addr;
+}
+
+/*
+ * Fault in pages prior to NX job submission. wr=1 may be required to
+ * touch writeable pages. System zero pages do not fault-in the page as
+ * intended. Typically set wr=1 for NX target pages and set wr=0 for NX
+ * source pages.
+ */
+int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr)
+{
+ char *begin = buf;
+ char *end = (char *) buf + buf_len - 1;
+ volatile char t;
+
+ assert(buf_len >= 0 && !!buf);
+
+ NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
+ (buf + buf_len), buf_len, wr));
+
+ if (buf_len <= 0 || buf == NULL)
+ return -1;
+
+ do {
+ t = *begin;
+ if (wr)
+ *begin = t;
+ begin = begin + page_len;
+ } while (begin < end);
+
+ /* When buf_sz is small or buf tail is in another page */
+ t = *end;
+ if (wr)
+ *end = t;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
new file mode 100644
index 000000000000..0db2d6485037
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* From asm-compat.h */
+#define __stringify_in_c(...) #__VA_ARGS__
+#define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+
+/*
+ * Macros taken from arch/powerpc/include/asm/ppc-opcode.h and other
+ * header files.
+ */
+#define ___PPC_RA(a) (((a) & 0x1f) << 16)
+#define ___PPC_RB(b) (((b) & 0x1f) << 11)
+
+#define PPC_INST_COPY 0x7c20060c
+#define PPC_INST_PASTE 0x7c20070d
+
+#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \
+ ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \
+ ___PPC_RA(a) | ___PPC_RB(b))
+#define CR0_SHIFT 28
+#define CR0_MASK 0xF
+/*
+ * Copy/paste instructions:
+ *
+ * copy RA,RB
+ * Copy contents of address (RA) + effective_address(RB)
+ * to internal copy-buffer.
+ *
+ * paste RA,RB
+ * Paste contents of internal copy-buffer to the address
+ * (RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+ asm volatile(PPC_COPY(%0, %1)";"
+ :
+ : "b" (offset), "b" (crb)
+ : "memory");
+
+ return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+ __u32 cr;
+
+ cr = 0;
+ asm volatile(PPC_PASTE(%1, %2)";"
+ "mfocrf %0, 0x80;"
+ : "=r" (cr)
+ : "b" (offset), "b" (paste_address)
+ : "memory", "cr0");
+
+ return (cr >> CR0_SHIFT) & CR0_MASK;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/crb.h b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
new file mode 100644
index 000000000000..ab101085fa7e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __CRB_H
+#define __CRB_H
+#include <linux/types.h>
+#include "nx.h"
+
+/* CCW 842 CI/FC masks
+ * NX P8 workbook, section 4.3.1, figure 4-6
+ * "CI/FC Boundary by NX CT type"
+ */
+#define CCW_CI_842 (0x00003ff8)
+#define CCW_FC_842 (0x00000007)
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE (0x3fffffffffffffff)
+#define CCB_ADDRESS (0xfffffffffffffff8)
+#define CCB_CM (0x0000000000000007)
+#define CCB_CM0 (0x0000000000000004)
+#define CCB_CM12 (0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS (0x0)
+#define CCB_CM0_LAST_IN_CHAIN (0x4)
+#define CCB_CM12_STORE (0x0)
+#define CCB_CM12_INTERRUPT (0x1)
+
+#define CCB_SIZE (0x10)
+#define CCB_ALIGN CCB_SIZE
+
+struct coprocessor_completion_block {
+ __be64 value;
+ __be64 address;
+} __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V (0x80)
+#define CSB_F (0x04)
+#define CSB_CH (0x03)
+#define CSB_CE_INCOMPLETE (0x80)
+#define CSB_CE_TERMINATION (0x40)
+#define CSB_CE_TPBC (0x20)
+
+#define CSB_CC_SUCCESS (0)
+#define CSB_CC_INVALID_ALIGN (1)
+#define CSB_CC_OPERAND_OVERLAP (2)
+#define CSB_CC_DATA_LENGTH (3)
+#define CSB_CC_TRANSLATION (5)
+#define CSB_CC_PROTECTION (6)
+#define CSB_CC_RD_EXTERNAL (7)
+#define CSB_CC_INVALID_OPERAND (8)
+#define CSB_CC_PRIVILEGE (9)
+#define CSB_CC_INTERNAL (10)
+#define CSB_CC_WR_EXTERNAL (12)
+#define CSB_CC_NOSPC (13)
+#define CSB_CC_EXCESSIVE_DDE (14)
+#define CSB_CC_WR_TRANSLATION (15)
+#define CSB_CC_WR_PROTECTION (16)
+#define CSB_CC_UNKNOWN_CODE (17)
+#define CSB_CC_ABORT (18)
+#define CSB_CC_TRANSPORT (20)
+#define CSB_CC_SEGMENTED_DDL (31)
+#define CSB_CC_PROGRESS_POINT (32)
+#define CSB_CC_DDE_OVERFLOW (33)
+#define CSB_CC_SESSION (34)
+#define CSB_CC_PROVISION (36)
+#define CSB_CC_CHAIN (37)
+#define CSB_CC_SEQUENCE (38)
+#define CSB_CC_HW (39)
+
+#define CSB_SIZE (0x10)
+#define CSB_ALIGN CSB_SIZE
+
+struct coprocessor_status_block {
+ __u8 flags;
+ __u8 cs;
+ __u8 cc;
+ __u8 ce;
+ __be32 count;
+ __be64 address;
+} __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P (0x8000)
+
+#define DDE_SIZE (0x10)
+#define DDE_ALIGN DDE_SIZE
+
+struct data_descriptor_entry {
+ __be16 flags;
+ __u8 count;
+ __u8 index;
+ __be32 length;
+ __be64 address;
+} __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE (0x80)
+#define CRB_ALIGN (0x100) /* Errata: requires 256 alignment */
+
+
+/* Coprocessor Status Block field
+ * ADDRESS address of CSB
+ * C CCB is valid
+ * AT 0 = addrs are virtual, 1 = addrs are phys
+ * M enable perf monitor
+ */
+#define CRB_CSB_ADDRESS (0xfffffffffffffff0)
+#define CRB_CSB_C (0x0000000000000008)
+#define CRB_CSB_AT (0x0000000000000002)
+#define CRB_CSB_M (0x0000000000000001)
+
+struct coprocessor_request_block {
+ __be32 ccw;
+ __be32 flags;
+ __be64 csb_addr;
+
+ struct data_descriptor_entry source;
+ struct data_descriptor_entry target;
+
+ struct coprocessor_completion_block ccb;
+
+ __u8 reserved[48];
+
+ struct coprocessor_status_block csb;
+} __aligned(CRB_ALIGN);
+
+#define crb_csb_addr(c) __be64_to_cpu(c->csb_addr)
+#define crb_nx_fault_addr(c) __be64_to_cpu(c->stamp.nx.fault_storage_addr)
+#define crb_nx_flags(c) c->stamp.nx.flags
+#define crb_nx_fault_status(c) c->stamp.nx.fault_status
+#define crb_nx_pswid(c) c->stamp.nx.pswid
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS (0xff000000)
+#define CCW_CT (0x00ff0000)
+#define CCW_CD (0x0000ffff)
+#define CCW_CL (0x0000c000)
+
+#endif
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
new file mode 100644
index 000000000000..1abe23fc29e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020 IBM Corp.
+ *
+ */
+#ifndef _NX_H
+#define _NX_H
+
+#include <stdbool.h>
+
+#define NX_FUNC_COMP_842 1
+#define NX_FUNC_COMP_GZIP 2
+
+#ifndef __aligned
+#define __aligned(x) __attribute__((aligned(x)))
+#endif
+
+struct nx842_func_args {
+ bool use_crc;
+ bool decompress; /* true decompress; false compress */
+ bool move_data;
+ int timeout; /* seconds */
+};
+
+struct nxbuf_t {
+ int len;
+ char *buf;
+};
+
+/* @function should be EFT (aka 842), GZIP etc */
+void *nx_function_begin(int function, int pri);
+
+int nx_function(void *handle, struct nxbuf_t *in, struct nxbuf_t *out,
+ void *arg);
+
+int nx_function_end(void *handle);
+
+#endif /* _NX_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
new file mode 100644
index 000000000000..16464e19c47f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020 IBM Corporation
+ *
+ */
+
+#ifndef _NXU_DBG_H_
+#define _NXU_DBG_H_
+
+#include <sys/file.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+#include <pthread.h>
+
+extern FILE * nx_gzip_log;
+extern int nx_gzip_trace;
+extern unsigned int nx_gzip_inflate_impl;
+extern unsigned int nx_gzip_deflate_impl;
+extern unsigned int nx_gzip_inflate_flags;
+extern unsigned int nx_gzip_deflate_flags;
+
+extern int nx_dbg;
+pthread_mutex_t mutex_log;
+
+#define nx_gzip_trace_enabled() (nx_gzip_trace & 0x1)
+#define nx_gzip_hw_trace_enabled() (nx_gzip_trace & 0x2)
+#define nx_gzip_sw_trace_enabled() (nx_gzip_trace & 0x4)
+#define nx_gzip_gather_statistics() (nx_gzip_trace & 0x8)
+#define nx_gzip_per_stream_stat() (nx_gzip_trace & 0x10)
+
+#define prt(fmt, ...) do { \
+ pthread_mutex_lock(&mutex_log); \
+ flock(nx_gzip_log->_fileno, LOCK_EX); \
+ time_t t; struct tm *m; time(&t); m = localtime(&t); \
+ fprintf(nx_gzip_log, "[%04d/%02d/%02d %02d:%02d:%02d] " \
+ "pid %d: " fmt, \
+ (int)m->tm_year + 1900, (int)m->tm_mon+1, (int)m->tm_mday, \
+ (int)m->tm_hour, (int)m->tm_min, (int)m->tm_sec, \
+ (int)getpid(), ## __VA_ARGS__); \
+ fflush(nx_gzip_log); \
+ flock(nx_gzip_log->_fileno, LOCK_UN); \
+ pthread_mutex_unlock(&mutex_log); \
+} while (0)
+
+/* Use in case of an error */
+#define prt_err(fmt, ...) do { if (nx_dbg >= 0) { \
+ prt("%s:%u: Error: "fmt, \
+ __FILE__, __LINE__, ## __VA_ARGS__); \
+}} while (0)
+
+/* Use in case of an warning */
+#define prt_warn(fmt, ...) do { if (nx_dbg >= 1) { \
+ prt("%s:%u: Warning: "fmt, \
+ __FILE__, __LINE__, ## __VA_ARGS__); \
+}} while (0)
+
+/* Informational printouts */
+#define prt_info(fmt, ...) do { if (nx_dbg >= 2) { \
+ prt("Info: "fmt, ## __VA_ARGS__); \
+}} while (0)
+
+/* Trace zlib wrapper code */
+#define prt_trace(fmt, ...) do { if (nx_gzip_trace_enabled()) { \
+ prt("### "fmt, ## __VA_ARGS__); \
+}} while (0)
+
+/* Trace statistics */
+#define prt_stat(fmt, ...) do { if (nx_gzip_gather_statistics()) { \
+ prt("### "fmt, ## __VA_ARGS__); \
+}} while (0)
+
+/* Trace zlib hardware implementation */
+#define hw_trace(fmt, ...) do { \
+ if (nx_gzip_hw_trace_enabled()) \
+ fprintf(nx_gzip_log, "hhh " fmt, ## __VA_ARGS__); \
+ } while (0)
+
+/* Trace zlib software implementation */
+#define sw_trace(fmt, ...) do { \
+ if (nx_gzip_sw_trace_enabled()) \
+ fprintf(nx_gzip_log, "sss " fmt, ## __VA_ARGS__); \
+ } while (0)
+
+
+/**
+ * str_to_num - Convert string into number and copy with endings like
+ * KiB for kilobyte
+ * MiB for megabyte
+ * GiB for gigabyte
+ */
+uint64_t str_to_num(char *str);
+void nx_lib_debug(int onoff);
+
+#endif /* _NXU_DBG_H_ */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
new file mode 100644
index 000000000000..20a4e883e0d3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
@@ -0,0 +1,650 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Hardware interface of the NX-GZIP compression accelerator
+ *
+ * Copyright (C) IBM Corporation, 2020
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+
+#ifndef _NXU_H
+#define _NXU_H
+
+#include <stdint.h>
+#include <endian.h>
+#include "nx.h"
+
+/* deflate */
+#define LLSZ 286
+#define DSZ 30
+
+/* nx */
+#define DHTSZ 18
+#define DHT_MAXSZ 288
+#define MAX_DDE_COUNT 256
+
+/* util */
+#ifdef NXDBG
+#define NXPRT(X) X
+#else
+#define NXPRT(X)
+#endif
+
+#ifdef NXTIMER
+#include <sys/platform/ppc.h>
+#define NX_CLK(X) X
+#define nx_get_time() __ppc_get_timebase()
+#define nx_get_freq() __ppc_get_timebase_freq()
+#else
+#define NX_CLK(X)
+#define nx_get_time() (-1)
+#define nx_get_freq() (-1)
+#endif
+
+#define NX_MAX_FAULTS 500
+
+/*
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce: completion extension
+ * cpb: coprocessor parameter block (metadata)
+ * crb: coprocessor request block (command)
+ * csb: coprocessor status block (status)
+ * dht: dynamic huffman table
+ * dde: data descriptor element (address, length)
+ * ddl: list of ddes
+ * dh/fh: dynamic and fixed huffman types
+ * fc: coprocessor function code
+ * histlen: history/dictionary length
+ * history: sliding window of up to 32KB of data
+ * lzcount: Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt: source final block type; last block's type during decomp
+ * spbc: source processed byte count
+ * subc: source unprocessed bit count
+ * tebc: target ending bit count; valid bits in the last byte
+ * tpbc: target processed byte count
+ * vas: virtual accelerator switch; the user mode interface
+ */
+
+union nx_qw_t {
+ uint32_t word[4];
+ uint64_t dword[2];
+} __aligned(16);
+
+/*
+ * Note: NX registers with fewer than 32 bits are declared by
+ * convention as uint32_t variables in unions. If *_offset and *_mask
+ * are defined for a variable, then use get_ put_ macros to
+ * conveniently access the register fields for endian conversions.
+ */
+
+struct nx_dde_t {
+ /* Data Descriptor Element, Section 6.4 */
+ union {
+ uint32_t dde_count;
+ /* When dde_count == 0 ddead is a pointer to a data buffer;
+ * ddebc is the buffer length bytes.
+ * When dde_count > 0 dde is an indirect dde; ddead is a
+ * pointer to a contiguous list of direct ddes; ddebc is the
+ * total length of all data pointed to by the list of direct
+ * ddes. Note that only one level of indirection is permitted.
+ * See Section 6.4 of the user manual for additional details.
+ */
+ };
+ uint32_t ddebc; /* dde byte count */
+ uint64_t ddead; /* dde address */
+} __aligned(16);
+
+struct nx_csb_t {
+ /* Coprocessor Status Block, Section 6.6 */
+ union {
+ uint32_t csb_v;
+ /* Valid bit. v must be set to 0 by the program
+ * before submitting the coprocessor command.
+ * Software can poll for the v bit
+ */
+
+ uint32_t csb_f;
+ /* 16B CSB size. Written to 0 by DMA when it writes the CPB */
+
+ uint32_t csb_cs;
+ /* cs completion sequence; unused */
+
+ uint32_t csb_cc;
+ /* cc completion code; cc != 0 exception occurred */
+
+ uint32_t csb_ce;
+ /* ce completion extension */
+
+ };
+ uint32_t tpbc;
+ /* target processed byte count TPBC */
+
+ uint64_t fsaddr;
+ /* Section 6.12.1 CSB NonZero error summary. FSA Failing storage
+ * address. Address where error occurred. When available, written
+ * to A field of CSB
+ */
+} __aligned(16);
+
+struct nx_ccb_t {
+ /* Coprocessor Completion Block, Section 6.7 */
+
+ uint32_t reserved[3];
+ union {
+ /* When crb.c==0 (no ccb defined) it is reserved;
+ * When crb.c==1 (ccb defined) it is cm
+ */
+
+ uint32_t ccb_cm;
+ /* Signal interrupt of crb.c==1 and cm==1 */
+
+ uint32_t word;
+ /* generic access to the 32bit word */
+ };
+} __aligned(16);
+
+struct vas_stamped_crb_t {
+ /*
+ * CRB operand of the paste coprocessor instruction is stamped
+ * in quadword 4 with the information shown here as its written
+ * in to the receive FIFO of the coprocessor
+ */
+
+ union {
+ uint32_t vas_buf_num;
+ /* Verification only vas buffer number which correlates to
+ * the low order bits of the atag in the paste command
+ */
+
+ uint32_t send_wc_id;
+ /* Pointer to Send Window Context that provides for NX address
+ * translation information, such as MSR and LPCR bits, job
+ * completion interrupt RA, PSWID, and job utilization counter.
+ */
+
+ };
+ union {
+ uint32_t recv_wc_id;
+ /* Pointer to Receive Window Context. NX uses this to return
+ * credits to a Receive FIFO as entries are dequeued.
+ */
+
+ };
+ uint32_t reserved2;
+ union {
+ uint32_t vas_invalid;
+ /* Invalid bit. If this bit is 1 the CRB is discarded by
+ * NX upon fetching from the receive FIFO. If this bit is 0
+ * the CRB is processed normally. The bit is stamped to 0
+ * by VAS and may be written to 1 by hypervisor while
+ * the CRB is in the receive FIFO (in memory).
+ */
+
+ };
+};
+
+struct nx_stamped_fault_crb_t {
+ /*
+ * A CRB that has a translation fault is stamped by NX in quadword 4
+ * and pasted to the Fault Send Window in VAS.
+ */
+ uint64_t fsa;
+ union {
+ uint32_t nxsf_t;
+ uint32_t nxsf_fs;
+ };
+ uint32_t pswid;
+};
+
+union stamped_crb_t {
+ struct vas_stamped_crb_t vas;
+ struct nx_stamped_fault_crb_t nx;
+};
+
+struct nx_gzip_cpb_t {
+ /*
+ * Coprocessor Parameter Block In/Out are used to pass metadata
+ * to/from accelerator. Tables 6.5 and 6.6 of the user manual.
+ */
+
+ /* CPBInput */
+
+ struct {
+ union {
+ union nx_qw_t qw0;
+ struct {
+ uint32_t in_adler; /* bits 0:31 */
+ uint32_t in_crc; /* bits 32:63 */
+ union {
+ uint32_t in_histlen; /* bits 64:75 */
+ uint32_t in_subc; /* bits 93:95 */
+ };
+ union {
+ /* bits 108:111 */
+ uint32_t in_sfbt;
+ /* bits 112:127 */
+ uint32_t in_rembytecnt;
+ /* bits 116:127 */
+ uint32_t in_dhtlen;
+ };
+ };
+ };
+ union {
+ union nx_qw_t in_dht[DHTSZ]; /* qw[1:18] */
+ char in_dht_char[DHT_MAXSZ]; /* byte access */
+ };
+ union nx_qw_t reserved[5]; /* qw[19:23] */
+ };
+
+ /* CPBOutput */
+
+ volatile struct {
+ union {
+ union nx_qw_t qw24;
+ struct {
+ uint32_t out_adler; /* bits 0:31 qw[24] */
+ uint32_t out_crc; /* bits 32:63 qw[24] */
+ union {
+ /* bits 77:79 qw[24] */
+ uint32_t out_tebc;
+ /* bits 80:95 qw[24] */
+ uint32_t out_subc;
+ };
+ union {
+ /* bits 108:111 qw[24] */
+ uint32_t out_sfbt;
+ /* bits 112:127 qw[24] */
+ uint32_t out_rembytecnt;
+ /* bits 116:127 qw[24] */
+ uint32_t out_dhtlen;
+ };
+ };
+ };
+ union {
+ union nx_qw_t qw25[79]; /* qw[25:103] */
+ /* qw[25] compress no lzcounts or wrap */
+ uint32_t out_spbc_comp_wrap;
+ uint32_t out_spbc_wrap; /* qw[25] wrap */
+ /* qw[25] compress no lzcounts */
+ uint32_t out_spbc_comp;
+ /* 286 LL and 30 D symbol counts */
+ uint32_t out_lzcount[LLSZ+DSZ];
+ struct {
+ union nx_qw_t out_dht[DHTSZ]; /* qw[25:42] */
+ /* qw[43] decompress */
+ uint32_t out_spbc_decomp;
+ };
+ };
+ /* qw[104] compress with lzcounts */
+ uint32_t out_spbc_comp_with_count;
+ };
+} __aligned(128);
+
+struct nx_gzip_crb_t {
+ union { /* byte[0:3] */
+ uint32_t gzip_fc; /* bits[24-31] */
+ };
+ uint32_t reserved1; /* byte[4:7] */
+ union {
+ uint64_t csb_address; /* byte[8:15] */
+ struct {
+ uint32_t reserved2;
+ union {
+ uint32_t crb_c;
+ /* c==0 no ccb defined */
+
+ uint32_t crb_at;
+ /* at==0 address type is ignored;
+ * all addrs effective assumed.
+ */
+
+ };
+ };
+ };
+ struct nx_dde_t source_dde; /* byte[16:31] */
+ struct nx_dde_t target_dde; /* byte[32:47] */
+ volatile struct nx_ccb_t ccb; /* byte[48:63] */
+ volatile union {
+ /* byte[64:239] shift csb by 128 bytes out of the crb; csb was
+ * in crb earlier; JReilly says csb written with partial inject
+ */
+ union nx_qw_t reserved64[11];
+ union stamped_crb_t stamp; /* byte[64:79] */
+ };
+ volatile struct nx_csb_t csb;
+} __aligned(128);
+
+struct nx_gzip_crb_cpb_t {
+ struct nx_gzip_crb_t crb;
+ struct nx_gzip_cpb_t cpb;
+} __aligned(2048);
+
+
+/*
+ * NX hardware convention has the msb bit on the left numbered 0.
+ * The defines below has *_offset defined as the right most bit
+ * position of a field. x of size_mask(x) is the field width in bits.
+ */
+
+#define size_mask(x) ((1U<<(x))-1)
+
+/*
+ * Offsets and Widths within the containing 32 bits of the various NX
+ * gzip hardware registers. Use the getnn/putnn macros to access
+ * these regs
+ */
+
+#define dde_count_mask size_mask(8)
+#define dde_count_offset 23
+
+/* CSB */
+
+#define csb_v_mask size_mask(1)
+#define csb_v_offset 0
+#define csb_f_mask size_mask(1)
+#define csb_f_offset 6
+#define csb_cs_mask size_mask(8)
+#define csb_cs_offset 15
+#define csb_cc_mask size_mask(8)
+#define csb_cc_offset 23
+#define csb_ce_mask size_mask(8)
+#define csb_ce_offset 31
+
+/* CCB */
+
+#define ccb_cm_mask size_mask(3)
+#define ccb_cm_offset 31
+
+/* VAS stamped CRB fields */
+
+#define vas_buf_num_mask size_mask(6)
+#define vas_buf_num_offset 5
+#define send_wc_id_mask size_mask(16)
+#define send_wc_id_offset 31
+#define recv_wc_id_mask size_mask(16)
+#define recv_wc_id_offset 31
+#define vas_invalid_mask size_mask(1)
+#define vas_invalid_offset 31
+
+/* NX stamped fault CRB fields */
+
+#define nxsf_t_mask size_mask(1)
+#define nxsf_t_offset 23
+#define nxsf_fs_mask size_mask(8)
+#define nxsf_fs_offset 31
+
+/* CPB input */
+
+#define in_histlen_mask size_mask(12)
+#define in_histlen_offset 11
+#define in_dhtlen_mask size_mask(12)
+#define in_dhtlen_offset 31
+#define in_subc_mask size_mask(3)
+#define in_subc_offset 31
+#define in_sfbt_mask size_mask(4)
+#define in_sfbt_offset 15
+#define in_rembytecnt_mask size_mask(16)
+#define in_rembytecnt_offset 31
+
+/* CPB output */
+
+#define out_tebc_mask size_mask(3)
+#define out_tebc_offset 15
+#define out_subc_mask size_mask(16)
+#define out_subc_offset 31
+#define out_sfbt_mask size_mask(4)
+#define out_sfbt_offset 15
+#define out_rembytecnt_mask size_mask(16)
+#define out_rembytecnt_offset 31
+#define out_dhtlen_mask size_mask(12)
+#define out_dhtlen_offset 31
+
+/* CRB */
+
+#define gzip_fc_mask size_mask(8)
+#define gzip_fc_offset 31
+#define crb_c_mask size_mask(1)
+#define crb_c_offset 28
+#define crb_at_mask size_mask(1)
+#define crb_at_offset 30
+#define csb_address_mask ~(15UL) /* mask off bottom 4b */
+
+/*
+ * Access macros for the registers. Do not access registers directly
+ * because of the endian conversion. P9 processor may run either as
+ * Little or Big endian. However the NX coprocessor regs are always
+ * big endian.
+ * Use the 32 and 64b macros to access respective
+ * register sizes.
+ * Use nn forms for the register fields shorter than 32 bits.
+ */
+
+#define getnn(ST, REG) ((be32toh(ST.REG) >> (31-REG##_offset)) \
+ & REG##_mask)
+#define getpnn(ST, REG) ((be32toh((ST)->REG) >> (31-REG##_offset)) \
+ & REG##_mask)
+#define get32(ST, REG) (be32toh(ST.REG))
+#define getp32(ST, REG) (be32toh((ST)->REG))
+#define get64(ST, REG) (be64toh(ST.REG))
+#define getp64(ST, REG) (be64toh((ST)->REG))
+
+#define unget32(ST, REG) (get32(ST, REG) & ~((REG##_mask) \
+ << (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define ungetp32(ST, REG) (getp32(ST, REG) & ~((REG##_mask) \
+ << (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define clear_regs(ST) memset((void *)(&(ST)), 0, sizeof(ST))
+#define clear_dde(ST) do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \
+ } while (0)
+#define clearp_dde(ST) do { (ST)->dde_count = (ST)->ddebc = 0; \
+ (ST)->ddead = 0; \
+ } while (0)
+#define clear_struct(ST) memset((void *)(&(ST)), 0, sizeof(ST))
+#define putnn(ST, REG, X) (ST.REG = htobe32(unget32(ST, REG) | (((X) \
+ & REG##_mask) << (31-REG##_offset))))
+#define putpnn(ST, REG, X) ((ST)->REG = htobe32(ungetp32(ST, REG) \
+ | (((X) & REG##_mask) << (31-REG##_offset))))
+
+#define put32(ST, REG, X) (ST.REG = htobe32(X))
+#define putp32(ST, REG, X) ((ST)->REG = htobe32(X))
+#define put64(ST, REG, X) (ST.REG = htobe64(X))
+#define putp64(ST, REG, X) ((ST)->REG = htobe64(X))
+
+/*
+ * Completion extension ce(0) ce(1) ce(2). Bits ce(3-7)
+ * unused. Section 6.6 Figure 6.7.
+ */
+
+#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce))
+#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5)
+#define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5))
+
+#define CSB_CE_PARTIAL 0x4
+#define CSB_CE_TERMINATE 0x2
+#define CSB_CE_TPBC_VALID 0x1
+
+#define csb_ce_termination(X) (!!((X) & CSB_CE_TERMINATE))
+/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */
+
+#define csb_ce_check_completion(X) (!csb_ce_termination(X))
+/* if not terminated then check full or partial completion */
+
+#define csb_ce_partial_completion(X) (!!((X) & CSB_CE_PARTIAL))
+#define csb_ce_full_completion(X) (!csb_ce_partial_completion(X))
+#define csb_ce_tpbc_valid(X) (!!((X) & CSB_CE_TPBC_VALID))
+/* TPBC indicates successfully stored data count */
+
+#define csb_ce_default_err(X) csb_ce_termination(X)
+/* most error CEs have CE(0)=0 and CE(1)=1 */
+
+#define csb_ce_cc3_partial(X) csb_ce_partial_completion(X)
+/* some CC=3 are partially completed, Table 6-8 */
+
+#define csb_ce_cc64(X) ((X)&(CSB_CE_PARTIAL \
+ | CSB_CE_TERMINATE) == 0)
+/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't
+ * compress smaller than source.
+ */
+
+/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */
+
+#define SFBT_BFINAL 0x1
+#define SFBT_LIT 0x4
+#define SFBT_FHT 0x5
+#define SFBT_DHT 0x6
+#define SFBT_HDR 0x7
+
+/*
+ * NX gzip function codes. Table 6.2.
+ * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to
+ * select one of the two Byte Count Limits.
+ */
+
+#define GZIP_FC_LIMIT_MASK 0x01
+#define GZIP_FC_COMPRESS_FHT 0x00
+#define GZIP_FC_COMPRESS_DHT 0x02
+#define GZIP_FC_COMPRESS_FHT_COUNT 0x04
+#define GZIP_FC_COMPRESS_DHT_COUNT 0x06
+#define GZIP_FC_COMPRESS_RESUME_FHT 0x08
+#define GZIP_FC_COMPRESS_RESUME_DHT 0x0a
+#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT 0x0c
+#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT 0x0e
+#define GZIP_FC_DECOMPRESS 0x10
+#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND 0x12
+#define GZIP_FC_DECOMPRESS_RESUME 0x14
+#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND 0x16
+#define GZIP_FC_WRAP 0x1e
+
+#define fc_is_compress(fc) (((fc) & 0x10) == 0)
+#define fc_has_count(fc) (fc_is_compress(fc) && (((fc) & 0x4) != 0))
+
+/* CSB.CC Error codes */
+
+#define ERR_NX_OK 0
+#define ERR_NX_ALIGNMENT 1
+#define ERR_NX_OPOVERLAP 2
+#define ERR_NX_DATA_LENGTH 3
+#define ERR_NX_TRANSLATION 5
+#define ERR_NX_PROTECTION 6
+#define ERR_NX_EXTERNAL_UE7 7
+#define ERR_NX_INVALID_OP 8
+#define ERR_NX_PRIVILEGE 9
+#define ERR_NX_INTERNAL_UE 10
+#define ERR_NX_EXTERN_UE_WR 12
+#define ERR_NX_TARGET_SPACE 13
+#define ERR_NX_EXCESSIVE_DDE 14
+#define ERR_NX_TRANSL_WR 15
+#define ERR_NX_PROTECT_WR 16
+#define ERR_NX_SUBFUNCTION 17
+#define ERR_NX_FUNC_ABORT 18
+#define ERR_NX_BYTE_MAX 19
+#define ERR_NX_CORRUPT_CRB 20
+#define ERR_NX_INVALID_CRB 21
+#define ERR_NX_INVALID_DDE 30
+#define ERR_NX_SEGMENTED_DDL 31
+#define ERR_NX_DDE_OVERFLOW 33
+#define ERR_NX_TPBC_GT_SPBC 64
+#define ERR_NX_MISSING_CODE 66
+#define ERR_NX_INVALID_DIST 67
+#define ERR_NX_INVALID_DHT 68
+#define ERR_NX_EXTERNAL_UE90 90
+#define ERR_NX_WDOG_TIMER 224
+#define ERR_NX_AT_FAULT 250
+#define ERR_NX_INTR_SERVER 252
+#define ERR_NX_UE253 253
+#define ERR_NX_NO_HW 254
+#define ERR_NX_HUNG_OP 255
+#define ERR_NX_END 256
+
+/* initial values for non-resume operations */
+#define INIT_CRC 0 /* crc32(0L, Z_NULL, 0) */
+#define INIT_ADLER 1 /* adler32(0L, Z_NULL, 0) adler is initialized to 1 */
+
+/* prototypes */
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *c, void *handle);
+
+extern void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx);
+extern int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr);
+
+/* caller supplies a print buffer 4*sizeof(crb) */
+
+char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf);
+char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_prt_hex(void *cp, int sz, char *prbuf);
+char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_strerror(int e);
+
+#ifdef NX_SIM
+#include <stdio.h>
+int nx_sim_init(void *ctx);
+int nx_sim_end(void *ctx);
+int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx);
+#endif /* NX_SIM */
+
+/* Deflate stream manipulation */
+
+#define set_final_bit(x) (x |= (unsigned char)1)
+#define clr_final_bit(x) (x &= ~(unsigned char)1)
+
+#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \
+ } while (0)
+/* append 10 bits 0000001b 00...... ;
+ * assumes appending starts on a byte boundary; b is the final bit.
+ */
+
+
+#ifdef NX_842
+
+/* 842 Engine */
+
+struct nx_eft_crb_t {
+ union { /* byte[0:3] */
+ uint32_t eft_fc; /* bits[29-31] */
+ };
+ uint32_t reserved1; /* byte[4:7] */
+ union {
+ uint64_t csb_address; /* byte[8:15] */
+ struct {
+ uint32_t reserved2;
+ union {
+ uint32_t crb_c;
+ /* c==0 no ccb defined */
+
+ uint32_t crb_at;
+ /* at==0 address type is ignored;
+ * all addrs effective assumed.
+ */
+
+ };
+ };
+ };
+ struct nx_dde_t source_dde; /* byte[16:31] */
+ struct nx_dde_t target_dde; /* byte[32:47] */
+ struct nx_ccb_t ccb; /* byte[48:63] */
+ union {
+ union nx_qw_t reserved64[3]; /* byte[64:96] */
+ };
+ struct nx_csb_t csb;
+} __aligned(128);
+
+/* 842 CRB */
+
+#define EFT_FC_MASK size_mask(3)
+#define EFT_FC_OFFSET 31
+#define EFT_FC_COMPRESS 0x0
+#define EFT_FC_COMPRESS_WITH_CRC 0x1
+#define EFT_FC_DECOMPRESS 0x2
+#define EFT_FC_DECOMPRESS_WITH_CRC 0x3
+#define EFT_FC_BLK_DATA_MOVE 0x4
+#endif /* NX_842 */
+
+#endif /* _NXU_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
new file mode 120000
index 000000000000..77fb4c7236d0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/uapi/asm/vas-api.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
new file mode 100755
index 000000000000..c7b46c5fd7b3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+if [[ ! -w /dev/crypto/nx-gzip ]]; then
+ echo "Can't access /dev/crypto/nx-gzip, skipping"
+ echo "skip: $0"
+ exit 4
+fi
+
+set -e
+
+function cleanup
+{
+ rm -f nx-tempfile*
+}
+
+trap cleanup EXIT
+
+function test_sizes
+{
+ local n=$1
+ local fname="nx-tempfile.$n"
+
+ for size in 4K 64K 1M 64M
+ do
+ echo "Testing $size ($n) ..."
+ dd if=/dev/urandom of=$fname bs=$size count=1
+ ./gzfht_test $fname
+ ./gunz_test ${fname}.nx.gz
+ done
+}
+
+echo "Doing basic test of different sizes ..."
+test_sizes 0
+
+echo "Running tests in parallel ..."
+for i in {1..16}
+do
+ test_sizes $i &
+done
+
+wait
+
+echo "OK"
+
+exit 0
diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore
index ff7896903d7b..f69b1e2641a1 100644
--- a/tools/testing/selftests/powerpc/pmu/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/.gitignore
@@ -2,3 +2,4 @@
count_instructions
l3_bank_test
per_event_excludes
+count_stcx_fail
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 19046db995fe..904672fb78dd 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -2,7 +2,7 @@
noarg:
$(MAKE) -C ../
-TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_GEN_PROGS := count_instructions count_stcx_fail l3_bank_test per_event_excludes
EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
top_srcdir = ../../../../..
@@ -13,8 +13,12 @@ all: $(TEST_GEN_PROGS) ebb
$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
# loop.S can only be built 64-bit
+$(OUTPUT)/count_instructions: CFLAGS += -m64
$(OUTPUT)/count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
- $(CC) $(CFLAGS) -m64 -o $@ $^
+
+$(OUTPUT)/count_stcx_fail: CFLAGS += -m64
+$(OUTPUT)/count_stcx_fail: loop.S $(EXTRA_SOURCES)
+
$(OUTPUT)/per_event_excludes: ../utils.c
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
new file mode 100644
index 000000000000..2070a1e2b3a5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "utils.h"
+#include "lib.h"
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+static void setup_event(struct event *e, u64 config, int type, char *name)
+{
+ event_init_opts(e, config, type, name);
+
+ e->attr.disabled = 1;
+ e->attr.exclude_kernel = 1;
+ e->attr.exclude_hv = 1;
+ e->attr.exclude_idle = 1;
+}
+
+static int do_count_loop(struct event *events, u64 instructions,
+ u64 overhead, bool report)
+{
+ s64 difference, expected;
+ double percentage;
+ u64 dummy;
+
+ prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+ /* Run for 1M instructions */
+ thirty_two_instruction_loop_with_ll_sc(instructions >> 5, &dummy);
+
+ prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+ event_read(&events[0]);
+ event_read(&events[1]);
+ event_read(&events[2]);
+
+ expected = instructions + overhead + (events[2].result.value * 10);
+ difference = events[0].result.value - expected;
+ percentage = (double)difference / events[0].result.value * 100;
+
+ if (report) {
+ printf("-----\n");
+ event_report(&events[0]);
+ event_report(&events[1]);
+ event_report(&events[2]);
+
+ printf("Looped for %llu instructions, overhead %llu\n", instructions, overhead);
+ printf("Expected %llu\n", expected);
+ printf("Actual %llu\n", events[0].result.value);
+ printf("Delta %lld, %f%%\n", difference, percentage);
+ }
+
+ event_reset(&events[0]);
+ event_reset(&events[1]);
+ event_reset(&events[2]);
+
+ if (difference < 0)
+ difference = -difference;
+
+ /* Tolerate a difference below 0.0001 % */
+ difference *= 10000 * 100;
+ if (difference / events[0].result.value)
+ return -1;
+
+ return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static u64 determine_overhead(struct event *events)
+{
+ u64 current, overhead;
+ int i;
+
+ do_count_loop(events, 0, 0, false);
+ overhead = events[0].result.value;
+
+ for (i = 0; i < 100; i++) {
+ do_count_loop(events, 0, 0, false);
+ current = events[0].result.value;
+ if (current < overhead) {
+ printf("Replacing overhead %llu with %llu\n", overhead, current);
+ overhead = current;
+ }
+ }
+
+ return overhead;
+}
+
+#define PM_MRK_STCX_FAIL 0x03e158
+#define PM_STCX_FAIL 0x01e058
+
+static int test_body(void)
+{
+ struct event events[3];
+ u64 overhead;
+
+ // The STCX_FAIL event we use works on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+ setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions");
+ setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles");
+ setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail");
+
+ if (event_open(&events[0])) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ if (event_open_with_group(&events[1], events[0].fd)) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ if (event_open_with_group(&events[2], events[0].fd)) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ overhead = determine_overhead(events);
+ printf("Overhead of null loop: %llu instructions\n", overhead);
+
+ /* Run for 1Mi instructions */
+ FAIL_IF(do_count_loop(events, 1000000, overhead, true));
+
+ /* Run for 10Mi instructions */
+ FAIL_IF(do_count_loop(events, 10000000, overhead, true));
+
+ /* Run for 100Mi instructions */
+ FAIL_IF(do_count_loop(events, 100000000, overhead, true));
+
+ /* Run for 1Bi instructions */
+ FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
+
+ /* Run for 16Bi instructions */
+ FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
+
+ /* Run for 64Bi instructions */
+ FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
+
+ event_close(&events[0]);
+ event_close(&events[1]);
+
+ return 0;
+}
+
+static int count_ll_sc(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(void)
+{
+ return test_harness(count_ll_sc, "count_ll_sc");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index ca35dd8848b0..af3df79d8163 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -7,7 +7,7 @@ noarg:
# The EBB handler is 64-bit code and everything links against it
CFLAGS += -m64
-TMPOUT = $(OUTPUT)/
+TMPOUT = $(OUTPUT)/TMPDIR/
# Toolchains may build PIE by default which breaks the assembly
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
index a2d7b0e3dca9..a26ac122c759 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -91,8 +91,6 @@ int back_to_back_ebbs(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
index bc893813483e..bb9f587fa76e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@@ -42,8 +42,6 @@ int cycles(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
index dcd351d20328..9ae795ce314e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -99,8 +99,6 @@ int cycles_with_freeze(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
printf("EBBs while frozen %d\n", ebbs_while_frozen);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
index 94c99c12c0f2..4b45a2e70f62 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -71,8 +71,6 @@ int cycles_with_mmcr2(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
index dfbc5c3ad52d..21537d6eb6b7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -396,8 +396,6 @@ int ebb_child(union pipe read_pipe, union pipe write_pipe)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
index ca2f7d729155..b208bf6ad58d 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -38,8 +38,6 @@ static int victim_child(union pipe read_pipe, union pipe write_pipe)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
FAIL_IF(ebb_state.stats.ebb_count == 0);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
index ac3e6e182614..ba2681a12cc7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -75,7 +75,6 @@ static int test_body(void)
ebb_freeze_pmcs();
ebb_global_disable();
- count_pmc(4, sample_period);
mtspr(SPRN_PMC4, 0xdead);
dump_summary_ebb_state();
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
index b8242e9d97d2..791d37ba327b 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
@@ -70,13 +70,6 @@ int multi_counter(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
- count_pmc(2, sample_period);
- count_pmc(3, sample_period);
- count_pmc(4, sample_period);
- count_pmc(5, sample_period);
- count_pmc(6, sample_period);
-
dump_ebb_state();
for (i = 0; i < 6; i++)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
index a05c0e18ded6..9b0f70d59702 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -61,8 +61,6 @@ static int cycles_child(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_summary_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
index 153ebc92234f..2904c741e04e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
@@ -82,8 +82,6 @@ static int test_body(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
if (mmcr0_mismatch)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
index eadad75ed7e6..b29f8ba22d1e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -76,8 +76,6 @@ int pmc56_overflow(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(2, sample_period);
-
dump_ebb_state();
printf("PMC5/6 overflow %d\n", pmc56_overflowed);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
index 7c0fb5d2bdb1..da2a3be5441f 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/trace.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
@@ -18,7 +18,7 @@ struct trace_entry
{
u8 type;
u8 length;
- u8 data[0];
+ u8 data[];
};
struct trace_buffer
@@ -26,7 +26,7 @@ struct trace_buffer
u64 size;
bool overflow;
void *tail;
- u8 data[0];
+ u8 data[];
};
struct trace_buffer *trace_buffer_allocate(u64 size);
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
index a96d512a18c4..a5dfa9bf3b9f 100644
--- a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -20,6 +20,9 @@ static int l3_bank_test(void)
char *p;
int i;
+ // The L3 bank logic is only used on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
p = malloc(MALLOC_SIZE);
FAIL_IF(!p);
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index fa12e7d0b4d3..bf1bec013bbb 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -6,6 +6,7 @@
#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
#define __SELFTESTS_POWERPC_PMU_LIB_H
+#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
index 8cc9b5e2c9de..c52ba09b6fed 100644
--- a/tools/testing/selftests/powerpc/pmu/loop.S
+++ b/tools/testing/selftests/powerpc/pmu/loop.S
@@ -41,3 +41,38 @@ FUNC_START(thirty_two_instruction_loop)
subi r3,r3,1
b FUNC_NAME(thirty_two_instruction_loop)
FUNC_END(thirty_two_instruction_loop)
+
+FUNC_START(thirty_two_instruction_loop_with_ll_sc)
+ cmpdi r3,0
+ beqlr
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1 # 5
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+1: ldarx r6,0,r4 # 10
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1 # 15
+ addi r5,r5,1
+ addi r5,r5,1
+ stdcx. r6,0,r4
+ bne- 1b
+ addi r5,r5,1 # 20
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1 # 25
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1 # 30
+ subi r3,r3,1
+ b FUNC_NAME(thirty_two_instruction_loop_with_ll_sc)
+FUNC_END(thirty_two_instruction_loop_with_ll_sc)
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2756fe2efdc5..ad32a09a6540 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -23,12 +23,9 @@
static int per_event_excludes(void)
{
struct event *e, events[4];
- char *platform;
int i;
- platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
- FAIL_IF(!platform);
- SKIP_IF(strcmp(platform, "power8") != 0);
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
/*
* We need to create the events disabled, otherwise the running/enabled
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index d5c64fee032d..bbc05ffc5860 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -150,7 +150,7 @@ static int child(struct shared_info *info)
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
user_write, info->amr, pkey1, pkey2, pkey3);
- mtspr(SPRN_AMR, info->amr);
+ set_amr(info->amr);
/*
* We won't use pkey3. This tests whether the kernel restores the UAMOR
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index fc477dfe86a2..2e0d86e0687e 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -20,6 +20,8 @@
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <linux/limits.h>
#include "ptrace.h"
#define SPRN_PVR 0x11F
@@ -44,6 +46,7 @@ struct gstruct {
};
static volatile struct gstruct gstruct __attribute__((aligned(512)));
+static volatile char cwd[PATH_MAX] __attribute__((aligned(8)));
static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
{
@@ -138,6 +141,9 @@ static void test_workload(void)
write_var(len);
}
+ /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+ syscall(__NR_getcwd, &cwd, PATH_MAX);
+
/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
write_var(1);
@@ -150,6 +156,9 @@ static void test_workload(void)
else
read_var(1);
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+ syscall(__NR_getcwd, &cwd, PATH_MAX);
+
/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
gstruct.a[rand() % A_LEN] = 'a';
@@ -293,6 +302,24 @@ static int test_set_debugreg(pid_t child_pid)
return 0;
}
+static int test_set_debugreg_kernel_userspace(pid_t child_pid)
+{
+ unsigned long wp_addr = (unsigned long)cwd;
+ char *name = "PTRACE_SET_DEBUGREG";
+
+ /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+ wp_addr &= ~0x7UL;
+ wp_addr |= (1Ul << DABR_READ_SHIFT);
+ wp_addr |= (1UL << DABR_WRITE_SHIFT);
+ wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+ ptrace_set_debugreg(child_pid, wp_addr);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8);
+
+ ptrace_set_debugreg(child_pid, 0);
+ return 0;
+}
+
static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
unsigned long addr, int len)
{
@@ -338,6 +365,22 @@ static void test_sethwdebug_exact(pid_t child_pid)
ptrace_delhwdebug(child_pid, wh);
}
+static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info;
+ unsigned long wp_addr = (unsigned long)&cwd;
+ char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT";
+ int len = 1; /* hardcoded in kernel */
+ int wh;
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+}
+
static void test_sethwdebug_range_aligned(pid_t child_pid)
{
struct ppc_hw_breakpoint info;
@@ -452,9 +495,10 @@ static void
run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
{
test_set_debugreg(child_pid);
+ test_set_debugreg_kernel_userspace(child_pid);
+ test_sethwdebug_exact(child_pid);
+ test_sethwdebug_exact_kernel_userspace(child_pid);
if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) {
- test_sethwdebug_exact(child_pid);
-
test_sethwdebug_range_aligned(child_pid);
if (dawr || is_8xx) {
test_sethwdebug_range_unaligned(child_pid);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index bdbbbe8431e0..bc454f899124 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -44,7 +44,7 @@ struct shared_info {
unsigned long amr2;
/* AMR value that ptrace should refuse to write to the child. */
- unsigned long amr3;
+ unsigned long invalid_amr;
/* IAMR value the parent expects to read from the child. */
unsigned long expected_iamr;
@@ -57,8 +57,8 @@ struct shared_info {
* (even though they're valid ones) because userspace doesn't have
* access to those registers.
*/
- unsigned long new_iamr;
- unsigned long new_uamor;
+ unsigned long invalid_iamr;
+ unsigned long invalid_uamor;
};
static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
@@ -66,11 +66,6 @@ static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
return syscall(__NR_pkey_alloc, flags, init_access_rights);
}
-static int sys_pkey_free(int pkey)
-{
- return syscall(__NR_pkey_free, pkey);
-}
-
static int child(struct shared_info *info)
{
unsigned long reg;
@@ -100,33 +95,37 @@ static int child(struct shared_info *info)
info->amr1 |= 3ul << pkeyshift(pkey1);
info->amr2 |= 3ul << pkeyshift(pkey2);
- info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
+ /*
+ * invalid amr value where we try to force write
+ * things which are deined by a uamor setting.
+ */
+ info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor);
+ /*
+ * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr
+ */
if (disable_execute)
info->expected_iamr |= 1ul << pkeyshift(pkey1);
else
info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
- info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
-
- info->expected_uamor |= 3ul << pkeyshift(pkey1) |
- 3ul << pkeyshift(pkey2);
- info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
- info->new_uamor |= 3ul << pkeyshift(pkey1);
+ /*
+ * We allocated pkey2 and pkey 3 above. Clear the IAMR bits.
+ */
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey2));
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey3));
/*
- * We won't use pkey3. We just want a plausible but invalid key to test
- * whether ptrace will let us write to AMR bits we are not supposed to.
- *
- * This also tests whether the kernel restores the UAMOR permissions
- * after a key is freed.
+ * Create an IAMR value different from expected value.
+ * Kernel will reject an IAMR and UAMOR change.
*/
- sys_pkey_free(pkey3);
+ info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2));
+ info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1));
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
user_write, info->amr1, pkey1, pkey2, pkey3);
- mtspr(SPRN_AMR, info->amr1);
+ set_amr(info->amr1);
/* Wait for parent to read our AMR value and write a new one. */
ret = prod_parent(&info->child_sync);
@@ -196,9 +195,9 @@ static int parent(struct shared_info *info, pid_t pid)
PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
PARENT_FAIL_IF(ret, &info->child_sync);
- info->amr1 = info->amr2 = info->amr3 = regs[0];
- info->expected_iamr = info->new_iamr = regs[1];
- info->expected_uamor = info->new_uamor = regs[2];
+ info->amr1 = info->amr2 = regs[0];
+ info->expected_iamr = regs[1];
+ info->expected_uamor = regs[2];
/* Wake up child so that it can set itself up. */
ret = prod_child(&info->child_sync);
@@ -234,10 +233,10 @@ static int parent(struct shared_info *info, pid_t pid)
return ret;
/* Write invalid AMR value in child. */
- ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1);
PARENT_FAIL_IF(ret, &info->child_sync);
- printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr);
/* Wake up child so that it can verify it didn't change. */
ret = prod_child(&info->child_sync);
@@ -249,7 +248,7 @@ static int parent(struct shared_info *info, pid_t pid)
/* Try to write to IAMR. */
regs[0] = info->amr1;
- regs[1] = info->new_iamr;
+ regs[1] = info->invalid_iamr;
ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
PARENT_FAIL_IF(!ret, &info->child_sync);
@@ -257,7 +256,7 @@ static int parent(struct shared_info *info, pid_t pid)
ptrace_write_running, regs[0], regs[1]);
/* Try to write to IAMR and UAMOR. */
- regs[2] = info->new_uamor;
+ regs[2] = info->invalid_uamor;
ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
PARENT_FAIL_IF(!ret, &info->child_sync);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
index 58cb1a860cc9..4436ca9d3caf 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -78,6 +78,9 @@ int ptrace_tar(void)
pid_t pid;
int ret, status;
+ // TAR was added in v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
index c4fe0e893306..cb9875f764ca 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -61,6 +61,8 @@ int ptrace_vsx(void)
pid_t pid;
int ret, status, i;
+ SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
for (i = 0; i < VEC_MAX; i++)
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 0a7d0afb26b8..93a65bd1f231 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -10,6 +10,7 @@
#include <stdint.h>
#include <malloc.h>
#include <unistd.h>
+#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -41,6 +42,40 @@ static void syscall_loop(char *p, unsigned long iterations,
}
}
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+ static int warned = 0;
+ ucontext_t *ctx = (ucontext_t *)unused;
+ unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+ /* mtspr 3,RS to check for move to DSCR below */
+ if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+ if (!warned++)
+ printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+ *pc += 4;
+ } else {
+ printf("SIGILL at %p\n", pc);
+ abort();
+ }
+}
+
+static void set_dscr(unsigned long val)
+{
+ static int init = 0;
+ struct sigaction sa;
+
+ if (!init) {
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigill_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGILL, &sa, NULL))
+ perror("sigill_handler");
+ init = 1;
+ }
+
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
int rfi_flush_test(void)
{
char *p;
@@ -54,6 +89,9 @@ int rfi_flush_test(void)
SKIP_IF(geteuid() != 0);
+ // The PMU event we use only works on Power7 or later
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
perror("Unable to read powerpc/rfi_flush debugfs file");
SKIP_IF(1);
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index 8c6b982af2a8..adc2b7294e5f 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -134,6 +134,9 @@ int spectre_v2_test(void)
s64 miss_percent;
bool is_p9;
+ // The PMU events we use only work on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
state = get_sysfs_state();
if (state == UNKNOWN) {
printf("Error: couldn't determine spectre_v2 mitigation state?\n");
@@ -183,6 +186,16 @@ int spectre_v2_test(void)
if (miss_percent > 15) {
printf("Branch misses > 15%% unexpected in this configuration!\n");
printf("Possible mis-match between reported & actual mitigation\n");
+ /*
+ * Such a mismatch may be caused by a guest system
+ * reporting as vulnerable when the host is mitigated.
+ * Return skip code to avoid detecting this as an error.
+ * We are not vulnerable and reporting otherwise, so
+ * missing such a mismatch is safe.
+ */
+ if (state == VULNERABLE)
+ return 4;
+
return 1;
}
break;
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index 932a032bf036..d6ae54663aed 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso
+TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
CFLAGS += -maltivec
$(OUTPUT)/signal_tm: CFLAGS += -mhtm
diff --git a/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
new file mode 100644
index 000000000000..e3972264615b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test that a syscall does not get restarted twice, handled by trap_norestart()
+ *
+ * Based on Al's description, and a test for the bug fixed in this commit:
+ *
+ * commit 9a81c16b527528ad307843be5571111aa8d35a80
+ * Author: Al Viro <viro@zeniv.linux.org.uk>
+ * Date: Mon Sep 20 21:48:57 2010 +0100
+ *
+ * powerpc: fix double syscall restarts
+ *
+ * Make sigreturn zero regs->trap, make do_signal() do the same on all
+ * paths. As it is, signal interrupting e.g. read() from fd 512 (==
+ * ERESTARTSYS) with another signal getting unblocked when the first
+ * handler finishes will lead to restart one insn earlier than it ought
+ * to. Same for multiple signals with in-kernel handlers interrupting
+ * that sucker at the same time. Same for multiple signals of any kind
+ * interrupting that sucker on 64bit...
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static void SIGUSR1_handler(int sig)
+{
+ kill(getpid(), SIGUSR2);
+ /*
+ * SIGUSR2 is blocked until the handler exits, at which point it will
+ * be raised again and think there is a restart to be done because the
+ * pending restarted syscall has 512 (ERESTARTSYS) in r3. The second
+ * restart will retreat NIP another 4 bytes to fail case branch.
+ */
+}
+
+static void SIGUSR2_handler(int sig)
+{
+}
+
+static ssize_t raw_read(int fd, void *buf, size_t count)
+{
+ register long nr asm("r0") = __NR_read;
+ register long _fd asm("r3") = fd;
+ register void *_buf asm("r4") = buf;
+ register size_t _count asm("r5") = count;
+
+ asm volatile(
+" b 0f \n"
+" b 1f \n"
+" 0: sc 0 \n"
+" bns 2f \n"
+" neg %0,%0 \n"
+" b 2f \n"
+" 1: \n"
+" li %0,%4 \n"
+" 2: \n"
+ : "+r"(_fd), "+r"(nr), "+r"(_buf), "+r"(_count)
+ : "i"(-ENOANO)
+ : "memory", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "ctr", "cr0");
+
+ if (_fd < 0) {
+ errno = -_fd;
+ _fd = -1;
+ }
+
+ return _fd;
+}
+
+#define DATA "test 123"
+#define DLEN (strlen(DATA)+1)
+
+int test_restart(void)
+{
+ int pipefd[2];
+ pid_t pid;
+ char buf[512];
+
+ if (pipe(pipefd) == -1) {
+ perror("pipe");
+ exit(EXIT_FAILURE);
+ }
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pid == 0) { /* Child reads from pipe */
+ struct sigaction act;
+ int fd;
+
+ memset(&act, 0, sizeof(act));
+ sigaddset(&act.sa_mask, SIGUSR2);
+ act.sa_handler = SIGUSR1_handler;
+ act.sa_flags = SA_RESTART;
+ if (sigaction(SIGUSR1, &act, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = SIGUSR2_handler;
+ act.sa_flags = SA_RESTART;
+ if (sigaction(SIGUSR2, &act, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Let's get ERESTARTSYS into r3 */
+ while ((fd = dup(pipefd[0])) != 512) {
+ if (fd == -1) {
+ perror("dup");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (raw_read(fd, buf, 512) == -1) {
+ if (errno == ENOANO) {
+ fprintf(stderr, "Double restart moved restart before sc instruction.\n");
+ _exit(EXIT_FAILURE);
+ }
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+
+ if (strncmp(buf, DATA, DLEN)) {
+ fprintf(stderr, "bad test string %s\n", buf);
+ exit(EXIT_FAILURE);
+ }
+
+ return 0;
+
+ } else {
+ int wstatus;
+
+ usleep(100000); /* Hack to get reader waiting */
+ kill(pid, SIGUSR1);
+ usleep(100000);
+ if (write(pipefd[1], DATA, DLEN) != DLEN) {
+ perror("write");
+ exit(EXIT_FAILURE);
+ }
+ close(pipefd[0]);
+ close(pipefd[1]);
+ if (wait(&wstatus) == -1) {
+ perror("wait");
+ exit(EXIT_FAILURE);
+ }
+ if (!WIFEXITED(wstatus)) {
+ fprintf(stderr, "child exited abnormally\n");
+ exit(EXIT_FAILURE);
+ }
+
+ FAIL_IF(WEXITSTATUS(wstatus) != EXIT_SUCCESS);
+
+ return 0;
+ }
+}
+
+int main(void)
+{
+ test_harness_set_timeout(10);
+ return test_harness(test_restart, "sig sys restart");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 7fc0623d85c3..9c39f55a58ff 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -8,7 +8,7 @@ build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null
TEST_GEN_PROGS := memcmp_64 strlen
-$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: memcmp.c ../utils.c
$(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
ifeq ($(build_32bit),1)
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index b1fa7546957f..cb2f18855c8d 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -2,7 +2,9 @@
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <time.h>
+
#include "utils.h"
#define SIZE 256
@@ -13,6 +15,9 @@
#define LARGE_MAX_OFFSET 32
#define LARGE_SIZE_START 4096
+/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */
+#define MAP_SIZE (64 * 1024)
+
#define MAX_OFFSET_DIFF_S1_S2 48
int vmx_count;
@@ -68,25 +73,25 @@ static void test_one(char *s1, char *s2, unsigned long max_offset,
static int testcase(bool islarge)
{
- char *s1;
- char *s2;
- unsigned long i;
-
- unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
- unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
- int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
-
- s1 = memalign(128, alloc_size);
- if (!s1) {
- perror("memalign");
- exit(1);
- }
+ unsigned long i, comp_size, alloc_size;
+ char *p, *s1, *s2;
+ int iterations;
- s2 = memalign(128, alloc_size);
- if (!s2) {
- perror("memalign");
- exit(1);
- }
+ comp_size = (islarge ? LARGE_SIZE : SIZE);
+ alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+ iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+ p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ FAIL_IF(p == MAP_FAILED);
+
+ /* Put s1/s2 at the end of a page */
+ s1 = p + MAP_SIZE - alloc_size;
+ s2 = p + 3 * MAP_SIZE - alloc_size;
+
+ /* And unmap the subsequent page to force a fault if we overread */
+ munmap(p + MAP_SIZE, MAP_SIZE);
+ munmap(p + 3 * MAP_SIZE, MAP_SIZE);
srandom(time(0));
@@ -147,6 +152,11 @@ static int testcase(bool islarge)
static int testcases(void)
{
+#ifdef __powerpc64__
+ // vcmpequd used in memcmp_64.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+#endif
+
testcase(0);
testcase(1);
return 0;
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
index cc4930467235..7887f78cf072 100644
--- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -3,9 +3,13 @@
.data
.balign 8
-message:
+success_message:
.ascii "success: switch_endian_test\n\0"
+ .balign 8
+failure_message:
+ .ascii "failure: switch_endian_test\n\0"
+
.section ".toc"
.balign 8
pattern:
@@ -64,6 +68,9 @@ FUNC_START(_start)
li r0, __NR_switch_endian
sc
+ tdi 0, 0, 0x48 // b +8 if the endian was switched
+ b .Lfail // exit if endian didn't switch
+
#include "check-reversed.S"
/* Flip back, r0 already has the switch syscall number */
@@ -71,12 +78,20 @@ FUNC_START(_start)
#include "check.S"
+ ld r4, success_message@got(%r2)
+ li r5, 28 // strlen(success_message)
+ li r14, 0 // exit status
+.Lout:
li r0, __NR_write
li r3, 1 /* stdout */
- ld r4, message@got(%r2)
- li r5, 28 /* strlen(message3) */
sc
li r0, __NR_exit
- li r3, 0
+ mr r3, r14
sc
b .
+
+.Lfail:
+ ld r4, failure_message@got(%r2)
+ li r5, 28 // strlen(failure_message)
+ li r14, 1
+ b .Lout
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index 01b22775ca87..b63f8459c704 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-TEST_GEN_PROGS := ipc_unmuxed
+TEST_GEN_PROGS := ipc_unmuxed rtas_filter
CFLAGS += -I../../../../../usr/include
diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
new file mode 100644
index 000000000000..03b487f18d00
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2005-2020 IBM Corporation.
+ *
+ * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/)
+ */
+
+#include <byteswap.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "utils.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x) bswap_32(x)
+#define be32_to_cpu(x) bswap_32(x)
+#else
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#endif
+
+#define RTAS_IO_ASSERT -1098 /* Unexpected I/O Error */
+#define RTAS_UNKNOWN_OP -1099 /* No Firmware Implementation of Function */
+#define BLOCK_SIZE 4096
+#define PAGE_SIZE 4096
+#define MAX_PAGES 64
+
+static const char *ofdt_rtas_path = "/proc/device-tree/rtas";
+
+typedef __be32 uint32_t;
+struct rtas_args {
+ __be32 token;
+ __be32 nargs;
+ __be32 nret;
+ __be32 args[16];
+ __be32 *rets; /* Pointer to return values in args[]. */
+};
+
+struct region {
+ uint64_t addr;
+ uint32_t size;
+ struct region *next;
+};
+
+int read_entire_file(int fd, char **buf, size_t *len)
+{
+ size_t buf_size = 0;
+ size_t off = 0;
+ int rc;
+
+ *buf = NULL;
+ do {
+ buf_size += BLOCK_SIZE;
+ if (*buf == NULL)
+ *buf = malloc(buf_size);
+ else
+ *buf = realloc(*buf, buf_size);
+
+ if (*buf == NULL)
+ return -ENOMEM;
+
+ rc = read(fd, *buf + off, BLOCK_SIZE);
+ if (rc < 0)
+ return -EIO;
+
+ off += rc;
+ } while (rc == BLOCK_SIZE);
+
+ if (len)
+ *len = off;
+
+ return 0;
+}
+
+static int open_prop_file(const char *prop_path, const char *prop_name, int *fd)
+{
+ char *path;
+ int len;
+
+ /* allocate enough for two string, a slash and trailing NULL */
+ len = strlen(prop_path) + strlen(prop_name) + 1 + 1;
+ path = malloc(len);
+ if (path == NULL)
+ return -ENOMEM;
+
+ snprintf(path, len, "%s/%s", prop_path, prop_name);
+
+ *fd = open(path, O_RDONLY);
+ free(path);
+ if (*fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int get_property(const char *prop_path, const char *prop_name,
+ char **prop_val, size_t *prop_len)
+{
+ int rc, fd;
+
+ rc = open_prop_file(prop_path, prop_name, &fd);
+ if (rc)
+ return rc;
+
+ rc = read_entire_file(fd, prop_val, prop_len);
+ close(fd);
+
+ return rc;
+}
+
+int rtas_token(const char *call_name)
+{
+ char *prop_buf = NULL;
+ size_t len;
+ int rc;
+
+ rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len);
+ if (rc < 0) {
+ rc = RTAS_UNKNOWN_OP;
+ goto err;
+ }
+
+ rc = be32_to_cpu(*(int *)prop_buf);
+
+err:
+ free(prop_buf);
+ return rc;
+}
+
+static int read_kregion_bounds(struct region *kregion)
+{
+ char *buf;
+ int fd;
+ int rc;
+
+ fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY);
+ if (fd < 0) {
+ printf("Could not open rmo_buffer file\n");
+ return RTAS_IO_ASSERT;
+ }
+
+ rc = read_entire_file(fd, &buf, NULL);
+ close(fd);
+ if (rc) {
+ free(buf);
+ return rc;
+ }
+
+ sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size);
+ free(buf);
+
+ if (!(kregion->size && kregion->addr) ||
+ (kregion->size > (PAGE_SIZE * MAX_PAGES))) {
+ printf("Unexpected kregion bounds\n");
+ return RTAS_IO_ASSERT;
+ }
+
+ return 0;
+}
+
+static int rtas_call(const char *name, int nargs,
+ int nrets, ...)
+{
+ struct rtas_args args;
+ __be32 *rets[16];
+ int i, rc, token;
+ va_list ap;
+
+ va_start(ap, nrets);
+
+ token = rtas_token(name);
+ if (token == RTAS_UNKNOWN_OP) {
+ // We don't care if the call doesn't exist
+ printf("call '%s' not available, skipping...", name);
+ rc = RTAS_UNKNOWN_OP;
+ goto err;
+ }
+
+ args.token = cpu_to_be32(token);
+ args.nargs = cpu_to_be32(nargs);
+ args.nret = cpu_to_be32(nrets);
+
+ for (i = 0; i < nargs; i++)
+ args.args[i] = (__be32) va_arg(ap, unsigned long);
+
+ for (i = 0; i < nrets; i++)
+ rets[i] = (__be32 *) va_arg(ap, unsigned long);
+
+ rc = syscall(__NR_rtas, &args);
+ if (rc) {
+ rc = -errno;
+ goto err;
+ }
+
+ if (nrets) {
+ *(rets[0]) = be32_to_cpu(args.args[nargs]);
+
+ for (i = 1; i < nrets; i++) {
+ *(rets[i]) = args.args[nargs + i];
+ }
+ }
+
+err:
+ va_end(ap);
+ return rc;
+}
+
+static int test(void)
+{
+ struct region rmo_region;
+ uint32_t rmo_start;
+ uint32_t rmo_end;
+ __be32 rets[1];
+ int rc;
+
+ // Test a legitimate harmless call
+ // Expected: call succeeds
+ printf("Test a permitted call, no parameters... ");
+ rc = rtas_call("get-time-of-day", 0, 1, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+ // Test a prohibited call
+ // Expected: call returns -EINVAL
+ printf("Test a prohibited call... ");
+ rc = rtas_call("nvram-fetch", 0, 1, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Get RMO
+ rc = read_kregion_bounds(&rmo_region);
+ if (rc) {
+ printf("Couldn't read RMO region bounds, skipping remaining cases\n");
+ return 0;
+ }
+ rmo_start = rmo_region.addr;
+ rmo_end = rmo_start + rmo_region.size - 1;
+ printf("RMO range: %08x - %08x\n", rmo_start, rmo_end);
+
+ // Test a permitted call, user-supplied size, buffer inside RMO
+ // Expected: call succeeds
+ printf("Test a permitted call, user-supplied size, buffer inside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+ cpu_to_be32(rmo_end - rmo_start + 1), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, user-supplied size, buffer start outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, user-supplied size, buffer start outside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1),
+ cpu_to_be32(4000), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, user-supplied size, buffer end outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, user-supplied size, buffer end outside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+ cpu_to_be32(rmo_end - rmo_start + 2), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, fixed size, buffer end outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, fixed size, buffer end outside RMO... ");
+ rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test, "rtas_filter");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
index 977558497c16..29e5f26af7b9 100644
--- a/tools/testing/selftests/powerpc/tm/tm-poison.c
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -26,7 +26,7 @@
int tm_poison_test(void)
{
- int pid;
+ int cpu, pid;
cpu_set_t cpuset;
uint64_t poison = 0xdeadbeefc0dec0fe;
uint64_t unknown = 0;
@@ -35,10 +35,13 @@ int tm_poison_test(void)
SKIP_IF(!have_htm());
- /* Attach both Child and Parent to CPU 0 */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Attach both Child and Parent to the same CPU
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ CPU_SET(cpu, &cpuset);
+ FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0);
pid = fork();
if (!pid) {
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 17becf3dcee4..794d574db784 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -33,19 +33,13 @@
#include "utils.h"
#include "tm.h"
-int num_loops = 10000;
+int num_loops = 1000000;
int passed = 1;
void tfiar_tfhar(void *in)
{
- int i, cpu;
unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- cpu = (unsigned long)in >> 1;
- CPU_SET(cpu, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ int i;
/* TFIAR: Last bit has to be high so userspace can read register */
tfiar = ((unsigned long)in) + 1;
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 601f0c1d450d..c75960af8018 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -247,8 +247,7 @@ void *pong(void *not_used)
int tm_trap_test(void)
{
uint16_t k = 1;
-
- int rc;
+ int cpu, rc;
pthread_attr_t attr;
cpu_set_t cpuset;
@@ -267,9 +266,12 @@ int tm_trap_test(void)
usr1_sa.sa_sigaction = usr1_signal_handler;
sigaction(SIGUSR1, &usr1_sa, NULL);
- /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Set only one CPU in the mask. Both threads will be bound to that CPU.
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
+ CPU_SET(cpu, &cpuset);
/* Init pthread attribute */
rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 2ca2fccb0a3e..a1348a5f721a 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -338,16 +338,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
int tm_unavailable_test(void)
{
- int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
+ int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
pthread_t t1;
pthread_attr_t attr;
cpu_set_t cpuset;
SKIP_IF(!have_htm());
- /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Set only one CPU in the mask. Both threads will be bound to that CPU.
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
+ CPU_SET(cpu, &cpuset);
/* Init pthread attribute. */
rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index c402464b038f..c5a1e5c163fc 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -6,9 +6,8 @@
#ifndef _SELFTESTS_POWERPC_TM_TM_H
#define _SELFTESTS_POWERPC_TM_TM_H
-#include <asm/tm.h>
-#include <asm/cputable.h>
#include <stdbool.h>
+#include <asm/tm.h>
#include "utils.h"
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 5ee0e98c4896..1f36ee1a909a 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -10,12 +10,12 @@
#include <fcntl.h>
#include <link.h>
#include <sched.h>
-#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
+#include <sys/sysinfo.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>
@@ -88,28 +88,40 @@ void *get_auxv_entry(int type)
int pick_online_cpu(void)
{
- cpu_set_t mask;
- int cpu;
+ int ncpus, cpu = -1;
+ cpu_set_t *mask;
+ size_t size;
+
+ ncpus = get_nprocs_conf();
+ size = CPU_ALLOC_SIZE(ncpus);
+ mask = CPU_ALLOC(ncpus);
+ if (!mask) {
+ perror("malloc");
+ return -1;
+ }
- CPU_ZERO(&mask);
+ CPU_ZERO_S(size, mask);
- if (sched_getaffinity(0, sizeof(mask), &mask)) {
+ if (sched_getaffinity(0, size, mask)) {
perror("sched_getaffinity");
- return -1;
+ goto done;
}
/* We prefer a primary thread, but skip 0 */
- for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8)
- if (CPU_ISSET(cpu, &mask))
- return cpu;
+ for (cpu = 8; cpu < ncpus; cpu += 8)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
/* Search for anything, but in reverse */
- for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--)
- if (CPU_ISSET(cpu, &mask))
- return cpu;
+ for (cpu = ncpus - 1; cpu >= 0; cpu--)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
printf("No cpus in affinity mask?!\n");
- return -1;
+
+done:
+ CPU_FREE(mask);
+ return cpu;
}
bool is_ppc64le(void)
@@ -260,36 +272,32 @@ int perf_event_reset(int fd)
return 0;
}
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
+int using_hash_mmu(bool *using_hash)
{
- static int warned = 0;
- ucontext_t *ctx = (ucontext_t *)unused;
- unsigned long *pc = &UCONTEXT_NIA(ctx);
-
- /* mtspr 3,RS to check for move to DSCR below */
- if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
- if (!warned++)
- printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
- *pc += 4;
- } else {
- printf("SIGILL at %p\n", pc);
- abort();
- }
-}
+ char line[128];
+ FILE *f;
+ int rc;
-void set_dscr(unsigned long val)
-{
- static int init = 0;
- struct sigaction sa;
-
- if (!init) {
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = sigill_handler;
- sa.sa_flags = SA_SIGINFO;
- if (sigaction(SIGILL, &sa, NULL))
- perror("sigill_handler");
- init = 1;
+ f = fopen("/proc/cpuinfo", "r");
+ FAIL_IF(!f);
+
+ rc = 0;
+ while (fgets(line, sizeof(line), f) != NULL) {
+ if (!strcmp(line, "MMU : Hash\n") ||
+ !strcmp(line, "platform : Cell\n") ||
+ !strcmp(line, "platform : PowerMac\n")) {
+ *using_hash = true;
+ goto out;
+ }
+
+ if (strcmp(line, "MMU : Radix\n") == 0) {
+ *using_hash = false;
+ goto out;
+ }
}
- asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+ rc = -1;
+out:
+ fclose(f);
+ return rc;
}
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 4bca5a9327a4..bed4b5318a86 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -2,7 +2,9 @@
/fd-001-lookup
/fd-002-posix-eq
/fd-003-kthread
+/proc-fsconfig-hidepid
/proc-loadavg-001
+/proc-multiple-procfs
/proc-pid-vm
/proc-self-map-files-001
/proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index a8ed0f684829..8be8a03d2973 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -19,5 +19,7 @@ TEST_GEN_PROGS += self
TEST_GEN_PROGS += setns-dcache
TEST_GEN_PROGS += setns-sysvipc
TEST_GEN_PROGS += thread-self
+TEST_GEN_PROGS += proc-multiple-procfs
+TEST_GEN_PROGS += proc-fsconfig-hidepid
include ../lib.mk
diff --git a/tools/testing/selftests/proc/proc-fsconfig-hidepid.c b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c
new file mode 100644
index 000000000000..b9af8f537185
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <linux/mount.h>
+#include <linux/unistd.h>
+
+static inline int fsopen(const char *fsname, unsigned int flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int fsconfig(int fd, unsigned int cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fd, cmd, key, val, aux);
+}
+
+int main(void)
+{
+ int fsfd, ret;
+ int hidepid = 2;
+
+ assert((fsfd = fsopen("proc", 0)) != -1);
+
+ ret = fsconfig(fsfd, FSCONFIG_SET_BINARY, "hidepid", &hidepid, 0);
+ assert(ret == -1);
+ assert(errno == EINVAL);
+
+ assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "2", 0));
+ assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "invisible", 0));
+
+ assert(!close(fsfd));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-multiple-procfs.c b/tools/testing/selftests/proc/proc-multiple-procfs.c
new file mode 100644
index 000000000000..ab912ad95dab
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-multiple-procfs.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(void)
+{
+ struct stat proc_st1, proc_st2;
+ char procbuff[] = "/tmp/proc.XXXXXX/meminfo";
+ char procdir1[] = "/tmp/proc.XXXXXX";
+ char procdir2[] = "/tmp/proc.XXXXXX";
+
+ assert(mkdtemp(procdir1) != NULL);
+ assert(mkdtemp(procdir2) != NULL);
+
+ assert(!mount("proc", procdir1, "proc", 0, "hidepid=1"));
+ assert(!mount("proc", procdir2, "proc", 0, "hidepid=2"));
+
+ snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir1);
+ assert(!stat(procbuff, &proc_st1));
+
+ snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir2);
+ assert(!stat(procbuff, &proc_st2));
+
+ umount(procdir1);
+ umount(procdir2);
+
+ assert(proc_st1.st_dev != proc_st2.st_dev);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests
index 1cef54458aff..2aa9a3852a84 100755
--- a/tools/testing/selftests/pstore/pstore_tests
+++ b/tools/testing/selftests/pstore/pstore_tests
@@ -10,7 +10,7 @@
. ./common_tests
prlog -n "Checking pstore console is registered ... "
-dmesg | grep -q "console \[pstore"
+dmesg | grep -Eq "console \[(pstore|${backend})"
show_result $?
prlog -n "Checking /dev/pmsg0 exists ... "
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index c0dd10257df5..f7911aaeb007 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -35,6 +35,8 @@
#define CLOCK_INVALID -1
#endif
+#define NSEC_PER_SEC 1000000000LL
+
/* clock_adjtime is not available in GLIBC < 2.14 */
#if !__GLIBC_PREREQ(2, 14)
#include <sys/syscall.h>
@@ -132,6 +134,8 @@ static void usage(char *progname)
" 1 - external time stamp\n"
" 2 - periodic output\n"
" -p val enable output with a period of 'val' nanoseconds\n"
+ " -H val set output phase to 'val' nanoseconds (requires -p)\n"
+ " -w val set output pulse width to 'val' nanoseconds (requires -p)\n"
" -P val enable or disable (val=1|0) the system clock PPS\n"
" -s set the ptp clock time from the system time\n"
" -S set the system time from the ptp clock time\n"
@@ -169,7 +173,6 @@ int main(int argc, char *argv[])
int list_pins = 0;
int pct_offset = 0;
int n_samples = 0;
- int perout = -1;
int pin_index = -1, pin_func;
int pps = -1;
int seconds = 0;
@@ -177,10 +180,13 @@ int main(int argc, char *argv[])
int64_t t1, t2, tp;
int64_t interval, offset;
+ int64_t perout_phase = -1;
+ int64_t pulsewidth = -1;
+ int64_t perout = -1;
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:p:P:sSt:T:w:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -197,6 +203,9 @@ int main(int argc, char *argv[])
case 'g':
gettime = 1;
break;
+ case 'H':
+ perout_phase = atoll(optarg);
+ break;
case 'i':
index = atoi(optarg);
break;
@@ -215,7 +224,7 @@ int main(int argc, char *argv[])
}
break;
case 'p':
- perout = atoi(optarg);
+ perout = atoll(optarg);
break;
case 'P':
pps = atoi(optarg);
@@ -233,6 +242,9 @@ int main(int argc, char *argv[])
settime = 3;
seconds = atoi(optarg);
break;
+ case 'w':
+ pulsewidth = atoi(optarg);
+ break;
case 'z':
flagtest = 1;
break;
@@ -269,14 +281,16 @@ int main(int argc, char *argv[])
" %d programmable periodic signals\n"
" %d pulse per second\n"
" %d programmable pins\n"
- " %d cross timestamping\n",
+ " %d cross timestamping\n"
+ " %d adjust_phase\n",
caps.max_adj,
caps.n_alarm,
caps.n_ext_ts,
caps.n_per_out,
caps.pps,
caps.n_pins,
- caps.cross_timestamping);
+ caps.cross_timestamping,
+ caps.adjust_phase);
}
}
@@ -389,6 +403,16 @@ int main(int argc, char *argv[])
}
}
+ if (pulsewidth >= 0 && perout < 0) {
+ puts("-w can only be specified together with -p");
+ return -1;
+ }
+
+ if (perout_phase >= 0 && perout < 0) {
+ puts("-H can only be specified together with -p");
+ return -1;
+ }
+
if (perout >= 0) {
if (clock_gettime(clkid, &ts)) {
perror("clock_gettime");
@@ -396,11 +420,24 @@ int main(int argc, char *argv[])
}
memset(&perout_request, 0, sizeof(perout_request));
perout_request.index = index;
- perout_request.start.sec = ts.tv_sec + 2;
- perout_request.start.nsec = 0;
- perout_request.period.sec = 0;
- perout_request.period.nsec = perout;
- if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+ perout_request.period.sec = perout / NSEC_PER_SEC;
+ perout_request.period.nsec = perout % NSEC_PER_SEC;
+ perout_request.flags = 0;
+ if (pulsewidth >= 0) {
+ perout_request.flags |= PTP_PEROUT_DUTY_CYCLE;
+ perout_request.on.sec = pulsewidth / NSEC_PER_SEC;
+ perout_request.on.nsec = pulsewidth % NSEC_PER_SEC;
+ }
+ if (perout_phase >= 0) {
+ perout_request.flags |= PTP_PEROUT_PHASE;
+ perout_request.phase.sec = perout_phase / NSEC_PER_SEC;
+ perout_request.phase.nsec = perout_phase % NSEC_PER_SEC;
+ } else {
+ perout_request.start.sec = ts.tv_sec + 2;
+ perout_request.start.nsec = 0;
+ }
+
+ if (ioctl(fd, PTP_PEROUT_REQUEST2, &perout_request)) {
perror("PTP_PEROUT_REQUEST");
} else {
puts("periodic output request okay");
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
index 7bebf9534a86..792318aaa30c 100644
--- a/tools/testing/selftests/ptrace/.gitignore
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
get_syscall_info
peeksiginfo
+vmaccess
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 93e80a42249a..d6e5ce084b1c 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -32,11 +32,11 @@ if test -z "$TORTURE_TRUST_MAKE"
then
make clean > $resdir/Make.clean 2>&1
fi
-make $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
+make $TORTURE_KMAKE_ARG $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
mv .config .config.sav
sh $T/upd.sh < .config.sav > .config
cp .config .config.new
-yes '' | make oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
+yes '' | make $TORTURE_KMAKE_ARG oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
# verify new config matches specification.
configcheck.sh .config $c
diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
new file mode 100755
index 000000000000..0e4c0b2eb7f0
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Scan standard input for error messages, dumping any found to standard
+# output.
+#
+# Usage: console-badness.sh
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
+grep -v 'ODEBUG: ' |
+grep -v 'This means that this is a DEBUG kernel and it is' |
+grep -v 'Warning: unable to open an initial console'
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 12810229fddc..51f3464b96d3 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -215,9 +215,6 @@ identify_qemu_args () {
then
echo -device spapr-vlan,netdev=net0,mac=$TORTURE_QEMU_MAC
echo -netdev bridge,br=br0,id=net0
- elif test -n "$TORTURE_QEMU_INTERACTIVE"
- then
- echo -net nic -net user
fi
;;
esac
@@ -234,7 +231,7 @@ identify_qemu_args () {
# Returns the number of virtual CPUs available to the aggregate of the
# guest OSes.
identify_qemu_vcpus () {
- lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://'
+ lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://' -e 's/[ ]*//g'
}
# print_bug
@@ -275,3 +272,21 @@ specify_qemu_cpus () {
esac
fi
}
+
+# specify_qemu_net qemu-args
+#
+# Appends a string containing "-net none" to qemu-args, unless the incoming
+# qemu-args already contains "-smp" or unless the TORTURE_QEMU_INTERACTIVE
+# environment variable is set, in which case the string that is be added is
+# instead "-net nic -net user".
+specify_qemu_net () {
+ if echo $1 | grep -q -e -net
+ then
+ echo $1
+ elif test -n "$TORTURE_QEMU_INTERACTIVE"
+ then
+ echo $1 -net nic -net user
+ else
+ echo $1 -net none
+ fi
+}
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 30cb5b27d32e..188b864bc4bf 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -46,6 +46,12 @@ do
exit 0;
fi
+ # Check for stop request.
+ if test -f "$TORTURE_STOPFILE"
+ then
+ exit 1;
+ fi
+
# Set affinity to randomly selected online CPU
if cpus=`grep 1 /sys/devices/system/cpu/*/online 2>&1 |
sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'`
diff --git a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
new file mode 100755
index 000000000000..e5cc6b2f195e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# If this was a KCSAN run, collapse the reports in the various console.log
+# files onto pairs of functions.
+#
+# Usage: kcsan-collapse.sh resultsdir
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+if test -z "$TORTURE_KCONFIG_KCSAN_ARG"
+then
+ exit 0
+fi
+cat $1/*/console.log |
+ grep "BUG: KCSAN: " |
+ sed -e 's/^\[[^]]*] //' |
+ sort |
+ uniq -c |
+ sort -k1nr > $1/kcsan.sum
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 18d6518504ee..115e1822b26f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -9,6 +9,12 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+if test -f "$TORTURE_STOPFILE"
+then
+ echo "kvm-build.sh early exit due to run STOP request"
+ exit 1
+fi
+
config_template=${1}
if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template"
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
new file mode 100755
index 000000000000..6e65c134e5f1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Run a group of kvm.sh tests on the specified commits. This currently
+# unconditionally does three-minute runs on each scenario in CFLIST,
+# taking advantage of all available CPUs and trusting the "make" utility.
+# In the short term, adjustments can be made by editing this script and
+# CFLIST. If some adjustments appear to have ongoing value, this script
+# might grow some command-line arguments.
+#
+# Usage: kvm-check-branches.sh commit1 commit2..commit3 commit4 ...
+#
+# This script considers its arguments one at a time. If more elaborate
+# specification of commits is needed, please use "git rev-list" to
+# produce something that this simple script can understand. The reason
+# for retaining the simplicity is that it allows the user to more easily
+# see which commit came from which branch.
+#
+# This script creates a yyyy.mm.dd-hh.mm.ss-group entry in the "res"
+# directory. The calls to kvm.sh create the usual entries, but this script
+# moves them under the yyyy.mm.dd-hh.mm.ss-group entry, each in its own
+# directory numbered in run order, that is, "0001", "0002", and so on.
+# For successful runs, the large build artifacts are removed. Doing this
+# reduces the disk space required by about two orders of magnitude for
+# successful runs.
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+if ! git status > /dev/null 2>&1
+then
+ echo '!!!' This script needs to run in a git archive. 1>&2
+ echo '!!!' Giving up. 1>&2
+ exit 1
+fi
+
+# Remember where we started so that we can get back and the end.
+curcommit="`git status | head -1 | awk '{ print $NF }'`"
+
+nfail=0
+ntry=0
+resdir="tools/testing/selftests/rcutorture/res"
+ds="`date +%Y.%m.%d-%H.%M.%S`-group"
+if ! test -e $resdir
+then
+ mkdir $resdir || :
+fi
+mkdir $resdir/$ds
+echo Results directory: $resdir/$ds
+
+KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
+PATH=${KVM}/bin:$PATH; export PATH
+. functions.sh
+cpus="`identify_qemu_vcpus`"
+echo Using up to $cpus CPUs.
+
+# Each pass through this loop does one command-line argument.
+for gitbr in $@
+do
+ echo ' --- git branch ' $gitbr
+
+ # Each pass through this loop tests one commit.
+ for i in `git rev-list "$gitbr"`
+ do
+ ntry=`expr $ntry + 1`
+ idir=`awk -v ntry="$ntry" 'END { printf "%04d", ntry; }' < /dev/null`
+ echo ' --- commit ' $i from branch $gitbr
+ date
+ mkdir $resdir/$ds/$idir
+ echo $gitbr > $resdir/$ds/$idir/gitbr
+ echo $i >> $resdir/$ds/$idir/gitbr
+
+ # Test the specified commit.
+ git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1
+ echo git checkout return code: $? "(Commit $ntry: $i)"
+ kvm.sh --cpus $cpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1
+ ret=$?
+ echo kvm.sh return code $ret for commit $i from branch $gitbr
+
+ # Move the build products to their resting place.
+ runresdir="`grep -m 1 '^Results directory:' < $resdir/$ds/$idir/kvm.sh.out | sed -e 's/^Results directory://'`"
+ mv $runresdir $resdir/$ds/$idir
+ rrd="`echo $runresdir | sed -e 's,^.*/,,'`"
+ echo Run results: $resdir/$ds/$idir/$rrd
+ if test "$ret" -ne 0
+ then
+ # Failure, so leave all evidence intact.
+ nfail=`expr $nfail + 1`
+ else
+ # Success, so remove large files to save about 1GB.
+ ( cd $resdir/$ds/$idir/$rrd; rm -f */vmlinux */bzImage */System.map */Module.symvers )
+ fi
+ done
+done
+date
+
+# Go back to the original commit.
+git checkout "$curcommit"
+
+if test $nfail -ne 0
+then
+ echo '!!! ' $nfail failures in $ntry 'runs!!!'
+ exit 1
+else
+ echo No failures in $ntry runs.
+ exit 0
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 9d9a41625dd9..1706cd4466b4 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -41,7 +41,21 @@ else
title="$title ($ngpsps/s)"
fi
echo $title $stopstate $fwdprog
- nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
+ nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | \
+ awk -v sum=0 '
+ {
+ for (i = 0; i <= NF; i++) {
+ sum += $i;
+ if ($i ~ /Batch:/) {
+ sum = 0;
+ i = i + 2;
+ }
+ }
+ }
+
+ END {
+ print sum
+ }'`
if test -z "$nclosecalls"
then
exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh
index 7d3c2be66c64..d4bec538086d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh
@@ -1,12 +1,12 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
-# Analyze a given results directory for rcuperf performance measurements,
+# Analyze a given results directory for rcuscale performance measurements,
# looking for ftrace data. Exits with 0 if data was found, analyzed, and
-# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after
+# printed. Intended to be invoked from kvm-recheck-rcuscale.sh after
# argument checking.
#
-# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+# Usage: kvm-recheck-rcuscale-ftrace.sh resdir
#
# Copyright (C) IBM Corporation, 2016
#
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
index db0375a57f28..aa745152a525 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
@@ -1,9 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
-# Analyze a given results directory for rcuperf performance measurements.
+# Analyze a given results directory for rcuscale scalability measurements.
#
-# Usage: kvm-recheck-rcuperf.sh resdir
+# Usage: kvm-recheck-rcuscale.sh resdir
#
# Copyright (C) IBM Corporation, 2016
#
@@ -20,7 +20,7 @@ fi
PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
. functions.sh
-if kvm-recheck-rcuperf-ftrace.sh $i
+if kvm-recheck-rcuscale-ftrace.sh $i
then
# ftrace data was successfully analyzed, call it good!
exit 0
@@ -30,12 +30,12 @@ configfile=`echo $i | sed -e 's/^.*\///'`
sed -e 's/^\[[^]]*]//' < $i/console.log |
awk '
-/-perf: .* gps: .* batches:/ {
+/-scale: .* gps: .* batches:/ {
ngps = $9;
nbatches = $11;
}
-/-perf: .*writer-duration/ {
+/-scale: .*writer-duration/ {
gptimes[++n] = $5 / 1000.;
sum += $5 / 1000.;
}
@@ -43,7 +43,7 @@ awk '
END {
newNR = asort(gptimes);
if (newNR <= 0) {
- print "No rcuperf records found???"
+ print "No rcuscale records found???"
exit;
}
pct50 = int(newNR * 50 / 100);
@@ -79,5 +79,5 @@ END {
print "99th percentile grace-period duration: " gptimes[pct99];
print "Maximum grace-period duration: " gptimes[newNR];
print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
- print "Computed from rcuperf printk output.";
+ print "Computed from rcuscale printk output.";
}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh
new file mode 100755
index 000000000000..35a463dddffe
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Analyze a given results directory for refscale performance measurements.
+#
+# Usage: kvm-recheck-refscale.sh resdir
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+
+sed -e 's/^\[[^]]*]//' < $i/console.log | tr -d '\015' |
+awk -v configfile="$configfile" '
+/^[ ]*Runs Time\(ns\) *$/ {
+ if (dataphase + 0 == 0) {
+ dataphase = 1;
+ # print configfile, $0;
+ }
+ next;
+}
+
+/[^ ]*[0-9][0-9]* [0-9][0-9]*\.[0-9][0-9]*$/ {
+ if (dataphase == 1) {
+ # print $0;
+ readertimes[++n] = $2;
+ sum += $2;
+ }
+ next;
+}
+
+{
+ if (dataphase == 1)
+ dataphase == 2;
+ next;
+}
+
+END {
+ print configfile " results:";
+ newNR = asort(readertimes);
+ if (newNR <= 0) {
+ print "No refscale records found???"
+ exit;
+ }
+ medianidx = int(newNR / 2);
+ if (newNR == medianidx * 2)
+ medianvalue = (readertimes[medianidx - 1] + readertimes[medianidx]) / 2;
+ else
+ medianvalue = readertimes[medianidx];
+ points = "Points:";
+ for (i = 1; i <= newNR; i++)
+ points = points " " readertimes[i];
+ print points;
+ print "Average reader duration: " sum / newNR " nanoseconds";
+ print "Minimum reader duration: " readertimes[1];
+ print "Median reader duration: " medianvalue;
+ print "Maximum reader duration: " readertimes[newNR];
+ print "Computed from refscale printk output.";
+}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
new file mode 100755
index 000000000000..671bfee4fcef
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Analyze a given results directory for rcutorture progress.
+#
+# Usage: kvm-recheck-rcu.sh resdir
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+nscfs="`grep 'scf_invoked_count ver:' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* scf_invoked_count ver: //' -e 's/ .*$//' | tr -d '\015'`"
+if test -z "$nscfs"
+then
+ echo "$configfile ------- "
+else
+ dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`"
+ if test -z "$dur"
+ then
+ rate=""
+ else
+ nscfss=`awk -v nscfs=$nscfs -v dur=$dur '
+ BEGIN { print nscfs / dur }' < /dev/null`
+ rate=" ($nscfss/s)"
+ fi
+ echo "${configfile} ------- ${nscfs} SCF handler invocations$rate"
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 0326f4a5ff9c..840a4679a0d7 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -31,6 +31,7 @@ do
head -1 $resdir/log
fi
TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
+ configfile=`echo $i | sed -e 's,^.*/,,'`
rm -f $i/console.log.*.diags
kvm-recheck-${TORTURE_SUITE}.sh $i
if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
@@ -43,7 +44,8 @@ do
then
echo QEMU killed
fi
- configcheck.sh $i/.config $i/ConfigFragment
+ configcheck.sh $i/.config $i/ConfigFragment > $T 2>&1
+ cat $T
if test -r $i/Make.oldconfig.err
then
cat $i/Make.oldconfig.err
@@ -55,21 +57,34 @@ do
cat $i/Warnings
fi
else
- if test -f "$i/qemu-cmd"
- then
- print_bug qemu failed
- echo " $i"
- elif test -f "$i/buildonly"
+ if test -f "$i/buildonly"
then
echo Build-only run, no boot/test
configcheck.sh $i/.config $i/ConfigFragment
parse-build.sh $i/Make.out $configfile
+ elif test -f "$i/qemu-cmd"
+ then
+ print_bug qemu failed
+ echo " $i"
else
print_bug Build failed
echo " $i"
fi
fi
done
+ if test -f "$rd/kcsan.sum"
+ then
+ if grep -q CONFIG_KCSAN=y $T
+ then
+ echo "Compiler or architecture does not support KCSAN!"
+ echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'?
+ elif test -s "$rd/kcsan.sum"
+ then
+ echo KCSAN summary in $rd/kcsan.sum
+ else
+ echo Clean KCSAN run in $rd
+ fi
+ fi
done
EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1
ret=$?
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index e0352304b98b..6dc2b49b85ea 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -44,30 +44,33 @@ then
fi
echo ' ---' `date`: Starting build
echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
-touch $resdir/ConfigFragment.input $resdir/ConfigFragment
-if test -r "$config_dir/CFcommon"
-then
- echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input
- cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input
- config_override.sh $config_dir/CFcommon $config_template > $T/Kc1
- grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment
-else
- cp $config_template $T/Kc1
-fi
-echo " --- $config_template" >> $resdir/ConfigFragment.input
-cat $config_template >> $resdir/ConfigFragment.input
-grep '#CHECK#' $config_template >> $resdir/ConfigFragment
-if test -n "$TORTURE_KCONFIG_ARG"
-then
- echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline
- echo " --- --kconfig argument" >> $resdir/ConfigFragment.input
- cat $T/cmdline >> $resdir/ConfigFragment.input
- config_override.sh $T/Kc1 $T/cmdline > $T/Kc2
- # Note that "#CHECK#" is not permitted on commandline.
-else
- cp $T/Kc1 $T/Kc2
-fi
-cat $T/Kc2 >> $resdir/ConfigFragment
+touch $resdir/ConfigFragment.input
+
+# Combine additional Kconfig options into an existing set such that
+# newer options win. The first argument is the Kconfig source ID, the
+# second the to-be-updated file within $T, and the third and final the
+# list of additional Kconfig options. Note that a $2.tmp file is
+# created when doing the update.
+config_override_param () {
+ if test -n "$3"
+ then
+ echo $3 | sed -e 's/^ *//' -e 's/ *$//' | tr -s " " "\012" > $T/Kconfig_args
+ echo " --- $1" >> $resdir/ConfigFragment.input
+ cat $T/Kconfig_args >> $resdir/ConfigFragment.input
+ config_override.sh $T/$2 $T/Kconfig_args > $T/$2.tmp
+ mv $T/$2.tmp $T/$2
+ # Note that "#CHECK#" is not permitted on commandline.
+ fi
+}
+
+echo > $T/KcList
+config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`"
+config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`"
+config_override_param "--gdb options" KcList "$TORTURE_KCONFIG_GDB_ARG"
+config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG"
+config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG"
+config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
+cp $T/KcList $resdir/ConfigFragment
base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
@@ -80,7 +83,7 @@ then
ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
# Arch-independent indicator
touch $resdir/builtkernel
-elif kvm-build.sh $T/Kc2 $resdir
+elif kvm-build.sh $T/KcList $resdir
then
# Had to build a kernel for this test.
QEMU="`identify_qemu vmlinux`"
@@ -122,7 +125,6 @@ seconds=$4
qemu_args=$5
boot_args=$6
-cd $KVM
kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
if test -z "$TORTURE_BUILDONLY"
then
@@ -139,6 +141,7 @@ then
cpu_count=$TORTURE_ALLOTED_CPUS
fi
qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
+qemu_args="`specify_qemu_net "$qemu_args"`"
# Generate architecture-specific and interaction-specific qemu arguments
qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
@@ -150,6 +153,11 @@ qemu_append="`identify_qemu_append "$QEMU"`"
boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
# Generate kernel-version-specific boot parameters
boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
+if test -n "$TORTURE_BOOT_GDB_ARG"
+then
+ boot_args="$boot_args $TORTURE_BOOT_GDB_ARG"
+fi
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd
if test -n "$TORTURE_BUILDONLY"
then
@@ -157,18 +165,37 @@ then
touch $resdir/buildonly
exit 0
fi
+
+# Decorate qemu-cmd with redirection, backgrounding, and PID capture
+sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
+echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd
+
+# In case qemu refuses to run...
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args" > $resdir/qemu-output 2>&1 & echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+
+# Attempt to run qemu
+( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
-sleep 10 # Give qemu's pid a chance to reach the file
-if test -s "$resdir/qemu_pid"
+if test -z "$TORTURE_KCONFIG_GDB_ARG"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
- echo Monitoring qemu job at pid $qemu_pid
-else
- qemu_pid=""
- echo Monitoring qemu job at yet-as-unknown pid
+ sleep 10 # Give qemu's pid a chance to reach the file
+ if test -s "$resdir/qemu_pid"
+ then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ echo Monitoring qemu job at pid $qemu_pid
+ else
+ qemu_pid=""
+ echo Monitoring qemu job at yet-as-unknown pid
+ fi
+fi
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+ echo Waiting for you to attach a debug session, for example: > /dev/tty
+ echo " gdb $base_resdir/vmlinux" > /dev/tty
+ echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty
+ echo " target remote :1234" > /dev/tty
+ echo " continue" > /dev/tty
+ kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
fi
while :
do
@@ -179,7 +206,7 @@ do
kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
then
- if test $kruntime -ge $seconds
+ if test $kruntime -ge $seconds -o -f "$TORTURE_STOPFILE"
then
break;
fi
@@ -208,10 +235,19 @@ then
fi
if test $commandcompleted -eq 0 -a -n "$qemu_pid"
then
- echo Grace period for qemu job at pid $qemu_pid
+ if ! test -f "$TORTURE_STOPFILE"
+ then
+ echo Grace period for qemu job at pid $qemu_pid
+ fi
oldline="`tail $resdir/console.log`"
while :
do
+ if test -f "$TORTURE_STOPFILE"
+ then
+ echo "PID $qemu_pid killed due to run STOP request" >> $resdir/Warnings 2>&1
+ kill -KILL $qemu_pid
+ break
+ fi
kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
if kill -0 $qemu_pid > /dev/null 2>&1
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
new file mode 100755
index 000000000000..c45a953ef393
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Transform a qemu-cmd file to allow reuse.
+#
+# Usage: kvm-transform.sh bzImage console.log < qemu-cmd-in > qemu-cmd-out
+#
+# bzImage: Kernel and initrd from the same prior kvm.sh run.
+# console.log: File into which to place console output.
+#
+# The original qemu-cmd file is provided on standard input.
+# The transformed qemu-cmd file is on standard output.
+# The transformation assumes that the qemu command is confined to a
+# single line. It also assumes no whitespace in filenames.
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+image="$1"
+if test -z "$image"
+then
+ echo Need kernel image file.
+ exit 1
+fi
+consolelog="$2"
+if test -z "$consolelog"
+then
+ echo "Need console log file name."
+ exit 1
+fi
+
+awk -v image="$image" -v consolelog="$consolelog" '
+{
+ line = "";
+ for (i = 1; i <= NF; i++) {
+ if (line == "")
+ line = $i;
+ else
+ line = line " " $i;
+ if ($i == "-serial") {
+ i++;
+ line = line " file:" consolelog;
+ }
+ if ($i == "-kernel") {
+ i++;
+ line = line " " image;
+ }
+ }
+ print line;
+}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 2315e2ec12d6..6eb1d3f6524d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -31,6 +31,11 @@ TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG=""
+TORTURE_KCONFIG_GDB_ARG=""
+TORTURE_BOOT_GDB_ARG=""
+TORTURE_QEMU_GDB_ARG=""
+TORTURE_KCONFIG_KASAN_ARG=""
+TORTURE_KCONFIG_KCSAN_ARG=""
TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
TORTURE_SHUTDOWN_GRACE=180
@@ -44,6 +49,7 @@ jitter="-1"
usage () {
echo "Usage: $scriptname optional arguments:"
+ echo " --allcpus"
echo " --bootargs kernel-boot-arguments"
echo " --bootimage relative-path-to-kernel-boot-image"
echo " --buildonly"
@@ -53,17 +59,19 @@ usage () {
echo " --defconfig string"
echo " --dryrun sched|script"
echo " --duration minutes"
+ echo " --gdb"
+ echo " --help"
echo " --interactive"
echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
echo " --kconfig Kconfig-options"
echo " --kmake-arg kernel-make-arguments"
echo " --mac nn:nn:nn:nn:nn:nn"
- echo " --memory megabytes | nnnG"
+ echo " --memory megabytes|nnnG"
echo " --no-initrd"
echo " --qemu-args qemu-arguments"
echo " --qemu-cmd qemu-system-..."
echo " --results absolute-pathname"
- echo " --torture rcu"
+ echo " --torture lock|rcu|rcuscale|refscale|scf"
echo " --trust-make"
exit 1
}
@@ -71,6 +79,10 @@ usage () {
while test $# -gt 0
do
case "$1" in
+ --allcpus)
+ cpus=$TORTURE_ALLOTED_CPUS
+ max_cpus=$TORTURE_ALLOTED_CPUS
+ ;;
--bootargs|--bootarg)
checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
TORTURE_BOOTARGS="$2"
@@ -120,6 +132,14 @@ do
dur=$(($2*60))
shift
;;
+ --gdb)
+ TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG
+ TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
+ TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG
+ ;;
+ --help|-h)
+ usage
+ ;;
--interactive)
TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
;;
@@ -133,6 +153,12 @@ do
TORTURE_KCONFIG_ARG="$2"
shift
;;
+ --kasan)
+ TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+ ;;
+ --kcsan)
+ TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG
+ ;;
--kmake-arg)
checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
TORTURE_KMAKE_ARG="$2"
@@ -172,13 +198,14 @@ do
shift
;;
--torture)
- checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
+ checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
TORTURE_SUITE=$2
shift
- if test "$TORTURE_SUITE" = rcuperf
+ if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale
then
- # If you really want jitter for rcuperf, specify
- # it after specifying rcuperf. (But why?)
+ # If you really want jitter for refscale or
+ # rcuscale, specify it after specifying the rcuscale
+ # or the refscale. (But why jitter in these cases?)
jitter=0
fi
;;
@@ -235,6 +262,15 @@ do
done
touch $T/cfgcpu
configs_derep="`echo $configs_derep | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`"
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+ if test "`echo $configs_derep | wc -w`" -gt 1
+ then
+ echo "The --config list is: $configs_derep."
+ echo "Only one --config permitted with --gdb, terminating."
+ exit 1
+ fi
+fi
for CF1 in $configs_derep
do
if test -f "$CONFIGFRAG/$CF1"
@@ -310,6 +346,11 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KCONFIG_GDB_ARG="$TORTURE_KCONFIG_GDB_ARG"; export TORTURE_KCONFIG_GDB_ARG
+TORTURE_BOOT_GDB_ARG="$TORTURE_BOOT_GDB_ARG"; export TORTURE_BOOT_GDB_ARG
+TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG
+TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG
+TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
@@ -323,6 +364,8 @@ then
mkdir -p "$resdir" || :
fi
mkdir $resdir/$ds
+TORTURE_RESDIR="$resdir/$ds"; export TORTURE_RESDIR
+TORTURE_STOPFILE="$resdir/$ds/STOP"; export TORTURE_STOPFILE
echo Results directory: $resdir/$ds
echo $scriptname $args
touch $resdir/$ds/log
@@ -464,6 +507,7 @@ echo
echo
echo " --- `date` Test summary:"
echo Results directory: $resdir/$ds
+kcsan-collapse.sh $resdir/$ds
kvm-recheck.sh $resdir/$ds
___EOF___
@@ -486,3 +530,7 @@ fi
# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
# Function-graph tracing: ftrace=function_graph ftrace_graph_filter=sched_setaffinity,migration_cpu_stop
# Also --kconfig "CONFIG_FUNCTION_TRACER=y CONFIG_FUNCTION_GRAPH_TRACER=y"
+# Control buffer size: --bootargs trace_buf_size=3k
+# Get trace-buffer dumps on all oopses: --bootargs ftrace_dump_on_oops
+# Ditto, but dump only the oopsing CPU: --bootargs ftrace_dump_on_oops=orig_cpu
+# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 4bf62d7b1cbc..e03338091a06 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -33,8 +33,8 @@ then
fi
cat /dev/null > $file.diags
-# Check for proper termination, except that rcuperf runs don't indicate this.
-if test "$TORTURE_SUITE" != rcuperf
+# Check for proper termination, except for rcuscale and refscale.
+if test "$TORTURE_SUITE" != rcuscale && test "$TORTURE_SUITE" != refscale
then
# check for abject failure
@@ -44,17 +44,30 @@ then
tail -1 |
awk '
{
- for (i=NF-8;i<=NF;i++)
+ normalexit = 1;
+ for (i=NF-8;i<=NF;i++) {
+ if (i <= 0 || i !~ /^[0-9]*$/) {
+ bangstring = $0;
+ gsub(/^\[[^]]*] /, "", bangstring);
+ print bangstring;
+ normalexit = 0;
+ exit 0;
+ }
sum+=$i;
+ }
}
- END { print sum }'`
- print_bug $title FAILURE, $nerrs instances
+ END {
+ if (normalexit)
+ print sum " instances"
+ }'`
+ print_bug $title FAILURE, $nerrs
exit
fi
grep --binary-files=text 'torture:.*ver:' $file |
egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+ sed -e 's/^.*ver: //' |
awk '
BEGIN {
ver = 0;
@@ -62,13 +75,13 @@ then
}
{
- if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+ if (!badseq && ($1 + 0 != $1 || $1 <= ver)) {
badseqno1 = ver;
- badseqno2 = $5;
+ badseqno2 = $1;
badseqnr = NR;
badseq = 1;
}
- ver = $5
+ ver = $1
}
END {
@@ -104,10 +117,7 @@ then
fi
fi | tee -a $file.diags
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
-grep -v 'ODEBUG: ' |
-grep -v 'This means that this is a DEBUG kernel and it is' |
-grep -v 'Warning: unable to open an initial console' > $T.diags
+console-badness.sh < $file > $T.diags
if test -s $T.diags
then
print_warning "Assertion failure in $file $title"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index c3c1fb5a9e1f..f2b20db9e296 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -14,3 +14,6 @@ TINY02
TASKS01
TASKS02
TASKS03
+RUDE01
+TRACE01
+TRACE02
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
new file mode 100644
index 000000000000..bafe94cbd739
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -0,0 +1,10 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
new file mode 100644
index 000000000000..9363708c9075
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-rude
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
new file mode 100644
index 000000000000..12e7661b86f5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -0,0 +1,11 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_TRACE_RCU_READ_MB=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
new file mode 100644
index 000000000000..9675ad632dcc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-tracing
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
new file mode 100644
index 000000000000..b69ed6673c41
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -0,0 +1,11 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_TASKS_TRACE_RCU_READ_MB=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
new file mode 100644
index 000000000000..9675ad632dcc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-tracing
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index 2dde0d9964e3..4f95f8544f3f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -16,5 +16,6 @@ CONFIG_RCU_NOCB_CPU=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_PROVE_RCU_LIST=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 2debe7891aeb..7311f84a5876 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=100
+CONFIG_NR_CPUS=56
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
deleted file mode 100644
index a09816b8c0f3..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_RCU_PERF_TEST=y
-CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST
index c9f56cf20775..c9f56cf20775 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
new file mode 100644
index 000000000000..87caa0e932c7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_SCALE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
index fb05ef5279b4..fb05ef5279b4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
index 721cfda76ab2..721cfda76ab2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
index 7629f5dd73b2..7629f5dd73b2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
index 777d5b0c190f..0333e9b18522 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
@@ -11,6 +11,6 @@
#
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
- echo $1 rcuperf.shutdown=1 \
- rcuperf.verbose=1
+ echo $1 rcuscale.shutdown=1 \
+ rcuscale.verbose=1
}
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFLIST b/tools/testing/selftests/rcutorture/configs/refscale/CFLIST
new file mode 100644
index 000000000000..4d62eb4a39f9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/CFLIST
@@ -0,0 +1,2 @@
+NOPREEMPT
+PREEMPT
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
new file mode 100644
index 000000000000..a98b58b54bb1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_REF_SCALE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
new file mode 100644
index 000000000000..1cd25b7314e3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_PREEMPT_RCU=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
new file mode 100644
index 000000000000..d10bc694f42c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
new file mode 100644
index 000000000000..321e82641287
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# Copyright (C) IBM Corporation, 2015
+#
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 refscale.shutdown=1 \
+ refscale.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFLIST b/tools/testing/selftests/rcutorture/configs/scf/CFLIST
new file mode 100644
index 000000000000..4d62eb4a39f9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/CFLIST
@@ -0,0 +1,2 @@
+NOPREEMPT
+PREEMPT
diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFcommon b/tools/testing/selftests/rcutorture/configs/scf/CFcommon
new file mode 100644
index 000000000000..c11ab91f49f5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_SCF_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
new file mode 100644
index 000000000000..b8429d6c6ebc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
@@ -0,0 +1,9 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot
new file mode 100644
index 000000000000..d6a7fa097c2e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot
@@ -0,0 +1 @@
+nohz_full=1
diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
new file mode 100644
index 000000000000..ae4992b141b0
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
@@ -0,0 +1,9 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
new file mode 100644
index 000000000000..d3d9e35d3d55
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+# scftorture_param_onoff bootparam-string config-file
+#
+# Adds onoff scftorture module parameters to kernels having it.
+scftorture_param_onoff () {
+ if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+ then
+ echo CPU-hotplug kernel, adding scftorture onoff. 1>&2
+ echo scftorture.onoff_interval=1000 scftorture.onoff_holdoff=30
+ fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 `scftorture_param_onoff "$1" "$2"` \
+ scftorture.stat_interval=15 \
+ scftorture.shutdown_secs=$3 \
+ scftorture.verbose=1 \
+ scf
+}
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
index 933b4fd12327..41a4255865d4 100644
--- a/tools/testing/selftests/rcutorture/doc/initrd.txt
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -1,12 +1,11 @@
-The rcutorture scripting tools automatically create the needed initrd
-directory using dracut. Failing that, this tool will create an initrd
-containing a single statically linked binary named "init" that loops
-over a very long sleep() call. In both cases, this creation is done
-by tools/testing/selftests/rcutorture/bin/mkinitrd.sh.
+The rcutorture scripting tools automatically create an initrd containing
+a single statically linked binary named "init" that loops over a
+very long sleep() call. In both cases, this creation is done by
+tools/testing/selftests/rcutorture/bin/mkinitrd.sh.
-However, if you are attempting to run rcutorture on a system that does
-not have dracut installed, and if you don't like the notion of static
-linking, you might wish to press an existing initrd into service:
+However, if you don't like the notion of statically linked bare-bones
+userspace environments, you might wish to press an existing initrd
+into service:
------------------------------------------------------------------------
cd tools/testing/selftests/rcutorture
@@ -15,24 +14,3 @@ mkdir initrd
cd initrd
cpio -id < /tmp/initrd.img.zcat
# Manually verify that initrd contains needed binaries and libraries.
-------------------------------------------------------------------------
-
-Interestingly enough, if you are running rcutorture, you don't really
-need userspace in many cases. Running without userspace has the
-advantage of allowing you to test your kernel independently of the
-distro in place, the root-filesystem layout, and so on. To make this
-happen, put the following script in the initrd's tree's "/init" file,
-with 0755 mode.
-
-------------------------------------------------------------------------
-#!/bin/sh
-
-while :
-do
- sleep 10
-done
-------------------------------------------------------------------------
-
-This approach also allows most of the binaries and libraries in the
-initrd filesystem to be dispensed with, which can save significant
-space in rcutorture's "res" directory.
diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
index 449cf579d6f9..b2fc247976b1 100644
--- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
+++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
@@ -1,8 +1,33 @@
-This document describes one way to create the rcu-test-image file
-that contains the filesystem used by the guest-OS kernel. There are
-probably much better ways of doing this, and this filesystem could no
-doubt be smaller. It is probably also possible to simply download
-an appropriate image from any number of places.
+Normally, a minimal initrd is created automatically by the rcutorture
+scripting. But minimal really does mean "minimal", namely just a single
+root directory with a single statically linked executable named "init":
+
+$ size tools/testing/selftests/rcutorture/initrd/init
+ text data bss dec hex filename
+ 328 0 8 336 150 tools/testing/selftests/rcutorture/initrd/init
+
+Suppose you need to run some scripts, perhaps to monitor or control
+some aspect of the rcutorture testing. This will require a more fully
+filled-out userspace, perhaps containing libraries, executables for
+the shell and other utilities, and soforth. In that case, place your
+desired filesystem here:
+
+ tools/testing/selftests/rcutorture/initrd
+
+For example, your tools/testing/selftests/rcutorture/initrd/init might
+be a script that does any needed mount operations and starts whatever
+scripts need starting to properly monitor or control your testing.
+The next rcutorture build will then incorporate this filesystem into
+the kernel image that is passed to qemu.
+
+Or maybe you need a real root filesystem for some reason, in which case
+please read on!
+
+The remainder of this document describes one way to create the
+rcu-test-image file that contains the filesystem used by the guest-OS
+kernel. There are probably much better ways of doing this, and this
+filesystem could no doubt be smaller. It is probably also possible to
+simply download an appropriate image from any number of places.
That said, here are the commands:
@@ -36,7 +61,7 @@ References:
https://help.ubuntu.com/community/JeOSVMBuilder
http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe"
- http://www.landley.net/writing/rootfs-howto.html
- http://en.wikipedia.org/wiki/Initrd
- http://en.wikipedia.org/wiki/Cpio
+ https://www.landley.net/writing/rootfs-howto.html
+ https://en.wikipedia.org/wiki/Initrd
+ https://en.wikipedia.org/wiki/Cpio
http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index e8a657a5f48a..384589095864 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: LGPL-2.1
#define _GNU_SOURCE
#include <assert.h>
+#include <linux/membarrier.h>
#include <pthread.h>
#include <sched.h>
+#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -1131,6 +1133,220 @@ static int set_signal_handler(void)
return ret;
}
+struct test_membarrier_thread_args {
+ int stop;
+ intptr_t percpu_list_ptr;
+};
+
+/* Worker threads modify data in their "active" percpu lists. */
+void *test_membarrier_worker_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ const int iters = opt_reps;
+ int i;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Wait for initialization. */
+ while (!atomic_load(&args->percpu_list_ptr)) {}
+
+ for (i = 0; i < iters; ++i) {
+ int ret;
+
+ do {
+ int cpu = rseq_cpu_start();
+
+ ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
+ sizeof(struct percpu_list_entry) * cpu, 1, cpu);
+ } while (rseq_unlikely(ret));
+ }
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+void test_membarrier_init_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ memset(list, 0, sizeof(*list));
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list_node *node;
+
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = 0;
+ node->next = NULL;
+ list->c[i].head = node;
+ }
+}
+
+void test_membarrier_free_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ for (i = 0; i < CPU_SETSIZE; i++)
+ free(list->c[i].head);
+}
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+ return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+/*
+ * The manager thread swaps per-cpu lists that worker threads see,
+ * and validates that there are no unexpected modifications.
+ */
+void *test_membarrier_manager_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ struct percpu_list list_a, list_b;
+ intptr_t expect_a = 0, expect_b = 0;
+ int cpu_a = 0, cpu_b = 0;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Init lists. */
+ test_membarrier_init_percpu_list(&list_a);
+ test_membarrier_init_percpu_list(&list_b);
+
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+
+ while (!atomic_load(&args->stop)) {
+ /* list_a is "active". */
+ cpu_a = rand() % CPU_SETSIZE;
+ /*
+ * As list_b is "inactive", we should never see changes
+ * to list_b.
+ */
+ if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_b "active". */
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
+ if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
+ errno != ENXIO /* missing CPU */) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /*
+ * Cpu A should now only modify list_b, so the values
+ * in list_a should be stable.
+ */
+ expect_a = atomic_load(&list_a.c[cpu_a].head->data);
+
+ cpu_b = rand() % CPU_SETSIZE;
+ /*
+ * As list_a is "inactive", we should never see changes
+ * to list_a.
+ */
+ if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_a "active". */
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+ if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
+ errno != ENXIO /* missing CPU*/) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /* Remember a value from list_b. */
+ expect_b = atomic_load(&list_b.c[cpu_b].head->data);
+ }
+
+ test_membarrier_free_percpu_list(&list_a);
+ test_membarrier_free_percpu_list(&list_b);
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+void test_membarrier(void)
+{
+ const int num_threads = opt_threads;
+ struct test_membarrier_thread_args thread_args;
+ pthread_t worker_threads[num_threads];
+ pthread_t manager_thread;
+ int i, ret;
+
+ if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
+ perror("sys_membarrier");
+ abort();
+ }
+
+ thread_args.stop = 0;
+ thread_args.percpu_list_ptr = 0;
+ ret = pthread_create(&manager_thread, NULL,
+ test_membarrier_manager_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&worker_threads[i], NULL,
+ test_membarrier_worker_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(worker_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ atomic_store(&thread_args.stop, 1);
+ ret = pthread_join(manager_thread, NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+}
+#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
+void test_membarrier(void)
+{
+ fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
+ "Skipping membarrier test.\n");
+}
+#endif
+
static void show_usage(int argc, char **argv)
{
printf("Usage : %s <OPTIONS>\n",
@@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv)
printf(" [-r N] Number of repetitions per thread (default 5000)\n");
printf(" [-d] Disable rseq system call (no initialization)\n");
printf(" [-D M] Disable rseq for each M threads\n");
- printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+ printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
printf(" [-v] Verbose output.\n");
printf(" [-h] Show this help.\n");
@@ -1268,6 +1484,7 @@ int main(int argc, char **argv)
case 'i':
case 'b':
case 'm':
+ case 'r':
break;
default:
show_usage(argc, argv);
@@ -1320,6 +1537,10 @@ int main(int argc, char **argv)
printf_verbose("counter increment\n");
test_percpu_inc();
break;
+ case 'r':
+ printf_verbose("membarrier\n");
+ test_membarrier();
+ break;
}
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index b2da6004fe30..640411518e46 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -279,6 +279,63 @@ error1:
#endif
}
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ * pval = *(ptr+off)
+ * *pval += inc;
+ */
+static inline __attribute__((always_inline))
+int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+#endif
+ /* get p+v */
+ "movq %[ptr], %%rbx\n\t"
+ "addq %[off], %%rbx\n\t"
+ /* get pv */
+ "movq (%%rbx), %%rcx\n\t"
+ /* *pv += inc */
+ "addq %[inc], (%%rcx)\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_abi] "r" (&__rseq_abi),
+ /* final store input */
+ [ptr] "m" (*ptr),
+ [off] "er" (off),
+ [inc] "er" (inc)
+ : "memory", "cc", "rax", "rbx", "rcx"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
static inline __attribute__((always_inline))
int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t newv2,
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
index e426304fd4a0..f51bc83c9e41 100755
--- a/tools/testing/selftests/rseq/run_param_test.sh
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -15,6 +15,7 @@ TEST_LIST=(
"-T m"
"-T m -M"
"-T i"
+ "-T r"
)
TEST_NAME=(
@@ -25,6 +26,7 @@ TEST_NAME=(
"memcpy"
"memcpy with barrier"
"increment"
+ "membarrier"
)
IFS="$OLDIFS"
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
new file mode 100755
index 000000000000..609a4ef9300e
--- /dev/null
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -0,0 +1,93 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run installed kselftest tests.
+#
+BASE_DIR=$(realpath $(dirname $0))
+cd $BASE_DIR
+TESTS="$BASE_DIR"/kselftest-list.txt
+if [ ! -r "$TESTS" ] ; then
+ echo "$0: Could not find list of tests to run ($TESTS)" >&2
+ available=""
+else
+ available="$(cat "$TESTS")"
+fi
+
+. ./kselftest/runner.sh
+ROOT=$PWD
+
+usage()
+{
+ cat <<EOF
+Usage: $0 [OPTIONS]
+ -s | --summary Print summary with detailed log in output.log
+ -t | --test COLLECTION:TEST Run TEST from COLLECTION
+ -c | --collection COLLECTION Run all tests from COLLECTION
+ -l | --list List the available collection:test entries
+ -d | --dry-run Don't actually run any tests
+ -h | --help Show this usage info
+EOF
+ exit $1
+}
+
+COLLECTIONS=""
+TESTS=""
+dryrun=""
+while true; do
+ case "$1" in
+ -s | --summary)
+ logfile="$BASE_DIR"/output.log
+ cat /dev/null > $logfile
+ shift ;;
+ -t | --test)
+ TESTS="$TESTS $2"
+ shift 2 ;;
+ -c | --collection)
+ COLLECTIONS="$COLLECTIONS $2"
+ shift 2 ;;
+ -l | --list)
+ echo "$available"
+ exit 0 ;;
+ -n | --dry-run)
+ dryrun="echo"
+ shift ;;
+ -h | --help)
+ usage 0 ;;
+ "")
+ break ;;
+ *)
+ usage 1 ;;
+ esac
+done
+
+# Add all selected collections to the explicit test list.
+if [ -n "$COLLECTIONS" ]; then
+ for collection in $COLLECTIONS ; do
+ found="$(echo "$available" | grep "^$collection:")"
+ if [ -z "$found" ] ; then
+ echo "No such collection '$collection'" >&2
+ exit 1
+ fi
+ TESTS="$TESTS $found"
+ done
+fi
+# Replace available test list with explicitly selected tests.
+if [ -n "$TESTS" ]; then
+ valid=""
+ for test in $TESTS ; do
+ found="$(echo "$available" | grep "^${test}$")"
+ if [ -z "$found" ] ; then
+ echo "No such test '$test'" >&2
+ exit 1
+ fi
+ valid="$valid $found"
+ done
+ available="$(echo "$valid" | sed -e 's/ /\n/g')"
+fi
+
+collections=$(echo "$available" | cut -d: -f1 | uniq)
+for collection in $collections ; do
+ [ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg
+ tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2)
+ ($dryrun cd "$collection" && $dryrun run_many $tests)
+done
diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config
index db1e11b08c8a..64c19d8eba79 100644
--- a/tools/testing/selftests/seccomp/config
+++ b/tools/testing/selftests/seccomp/config
@@ -1,2 +1,3 @@
CONFIG_SECCOMP=y
CONFIG_SECCOMP_FILTER=y
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 5838c8697ec3..91f5a89cadac 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -18,9 +18,9 @@
unsigned long long timing(clockid_t clk_id, unsigned long long samples)
{
- pid_t pid, ret;
- unsigned long long i;
struct timespec start, finish;
+ unsigned long long i;
+ pid_t pid, ret;
pid = getpid();
assert(clock_gettime(clk_id, &start) == 0);
@@ -31,30 +31,43 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples)
assert(clock_gettime(clk_id, &finish) == 0);
i = finish.tv_sec - start.tv_sec;
- i *= 1000000000;
+ i *= 1000000000ULL;
i += finish.tv_nsec - start.tv_nsec;
- printf("%lu.%09lu - %lu.%09lu = %llu\n",
+ printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n",
finish.tv_sec, finish.tv_nsec,
start.tv_sec, start.tv_nsec,
- i);
+ i, (double)i / 1000000000.0);
return i;
}
unsigned long long calibrate(void)
{
- unsigned long long i;
-
- printf("Calibrating reasonable sample size...\n");
+ struct timespec start, finish;
+ unsigned long long i, samples, step = 9973;
+ pid_t pid, ret;
+ int seconds = 15;
- for (i = 5; ; i++) {
- unsigned long long samples = 1 << i;
+ printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds);
- /* Find something that takes more than 5 seconds to run. */
- if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
- return samples;
- }
+ samples = 0;
+ pid = getpid();
+ assert(clock_gettime(CLOCK_MONOTONIC, &start) == 0);
+ do {
+ for (i = 0; i < step; i++) {
+ ret = syscall(__NR_getpid);
+ assert(pid == ret);
+ }
+ assert(clock_gettime(CLOCK_MONOTONIC, &finish) == 0);
+
+ samples += step;
+ i = finish.tv_sec - start.tv_sec;
+ i *= 1000000000ULL;
+ i += finish.tv_nsec - start.tv_nsec;
+ } while (i < 1000000000ULL);
+
+ return samples * seconds;
}
int main(int argc, char *argv[])
@@ -68,32 +81,55 @@ int main(int argc, char *argv[])
};
long ret;
unsigned long long samples;
- unsigned long long native, filtered;
+ unsigned long long native, filter1, filter2;
+
+ printf("Current BPF sysctl settings:\n");
+ system("sysctl net.core.bpf_jit_enable");
+ system("sysctl net.core.bpf_jit_harden");
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);
else
samples = calibrate();
- printf("Benchmarking %llu samples...\n", samples);
+ printf("Benchmarking %llu syscalls...\n", samples);
+ /* Native call */
native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
printf("getpid native: %llu ns\n", native);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
assert(ret == 0);
+ /* One filter */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
assert(ret == 0);
- filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
- printf("getpid RET_ALLOW: %llu ns\n", filtered);
+ filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1);
+
+ if (filter1 == native)
+ printf("No overhead measured!? Try running again with more samples.\n");
+
+ /* Two filters */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ assert(ret == 0);
+
+ filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2);
+
+ /* Calculations */
+ printf("Estimated total seccomp overhead for 1 filter: %llu ns\n",
+ filter1 - native);
+
+ printf("Estimated total seccomp overhead for 2 filters: %llu ns\n",
+ filter2 - native);
- printf("Estimated seccomp overhead per syscall: %llu ns\n",
- filtered - native);
+ printf("Estimated seccomp per-filter overhead: %llu ns\n",
+ filter2 - filter1);
- if (filtered == native)
- printf("Trying running again with more samples.\n");
+ printf("Estimated seccomp entry overhead: %llu ns\n",
+ filter1 - native - (filter2 - filter1));
return 0;
}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index c0aa46ce14f6..4a180439ee9e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -45,12 +45,19 @@
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <linux/kcmp.h>
+#include <sys/resource.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <poll.h>
#include "../kselftest_harness.h"
+#include "../clone3/clone3_selftests.h"
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
#ifndef PR_SET_PTRACER
# define PR_SET_PTRACER 0x59616d61
@@ -116,12 +123,18 @@ struct seccomp_data {
# define __NR_seccomp 277
# elif defined(__riscv)
# define __NR_seccomp 277
+# elif defined(__csky__)
+# define __NR_seccomp 277
# elif defined(__hppa__)
# define __NR_seccomp 338
# elif defined(__powerpc__)
# define __NR_seccomp 358
# elif defined(__s390__)
# define __NR_seccomp 348
+# elif defined(__xtensa__)
+# define __NR_seccomp 337
+# elif defined(__sh__)
+# define __NR_seccomp 372
# else
# warning "seccomp syscall number unknown for this architecture"
# define __NR_seccomp 0xffff
@@ -167,7 +180,9 @@ struct seccomp_metadata {
#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
+#endif
+#ifndef SECCOMP_RET_USER_NOTIF
#define SECCOMP_RET_USER_NOTIF 0x7fc00000U
#define SECCOMP_IOC_MAGIC '!'
@@ -180,7 +195,7 @@ struct seccomp_metadata {
#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
struct seccomp_notif_resp)
-#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64)
+#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
struct seccomp_notif {
__u64 id;
@@ -203,6 +218,39 @@ struct seccomp_notif_sizes {
};
#endif
+#ifndef SECCOMP_IOCTL_NOTIF_ADDFD
+/* On success, the return value is the remote process's added fd number */
+#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
+ struct seccomp_notif_addfd)
+
+/* valid flags for seccomp_notif_addfd */
+#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
+
+struct seccomp_notif_addfd {
+ __u64 id;
+ __u32 flags;
+ __u32 srcfd;
+ __u32 newfd;
+ __u32 newfd_flags;
+};
+#endif
+
+struct seccomp_notif_addfd_small {
+ __u64 id;
+ char weird[4];
+};
+#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \
+ SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
+
+struct seccomp_notif_addfd_big {
+ union {
+ struct seccomp_notif_addfd addfd;
+ char buf[sizeof(struct seccomp_notif_addfd) + 8];
+ };
+};
+#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \
+ SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
+
#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
@@ -236,6 +284,40 @@ int seccomp(unsigned int op, unsigned int flags, void *args)
#define SIBLING_EXIT_FAILURE 0xbadface
#define SIBLING_EXIT_NEWPRIVS 0xbadfeed
+static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
+{
+#ifdef __NR_kcmp
+ errno = 0;
+ return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+/* Have TH_LOG report actual location filecmp() is used. */
+#define filecmp(pid1, pid2, fd1, fd2) ({ \
+ int _ret; \
+ \
+ _ret = __filecmp(pid1, pid2, fd1, fd2); \
+ if (_ret != 0) { \
+ if (_ret < 0 && errno == ENOSYS) { \
+ TH_LOG("kcmp() syscall missing (test is less accurate)");\
+ _ret = 0; \
+ } \
+ } \
+ _ret; })
+
+TEST(kcmp)
+{
+ int ret;
+
+ ret = __filecmp(getpid(), getpid(), 1, 1);
+ EXPECT_EQ(ret, 0);
+ if (ret != 0 && errno == ENOSYS)
+ SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
+}
+
TEST(mode_strict_support)
{
long ret;
@@ -692,8 +774,15 @@ void *kill_thread(void *data)
return (void *)SIBLING_EXIT_UNKILLED;
}
+enum kill_t {
+ KILL_THREAD,
+ KILL_PROCESS,
+ RET_UNKNOWN
+};
+
/* Prepare a thread that will kill itself or both of us. */
-void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+void kill_thread_or_group(struct __test_metadata *_metadata,
+ enum kill_t kill_how)
{
pthread_t thread;
void *status;
@@ -709,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
.len = (unsigned short)ARRAY_SIZE(filter_thread),
.filter = filter_thread,
};
+ int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
struct sock_filter filter_process[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+ BPF_STMT(BPF_RET|BPF_K, kill),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog_process = {
@@ -726,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
}
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
- kill_process ? &prog_process : &prog_thread));
+ kill_how == KILL_THREAD ? &prog_thread
+ : &prog_process));
/*
* Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
* flag cannot be downgraded by a new filter.
*/
- ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+ if (kill_how == KILL_PROCESS)
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
/* Start a thread that will exit immediately. */
ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
@@ -760,7 +852,7 @@ TEST(KILL_thread)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
- kill_thread_or_group(_metadata, false);
+ kill_thread_or_group(_metadata, KILL_THREAD);
_exit(38);
}
@@ -779,7 +871,7 @@ TEST(KILL_process)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
- kill_thread_or_group(_metadata, true);
+ kill_thread_or_group(_metadata, KILL_PROCESS);
_exit(38);
}
@@ -790,6 +882,27 @@ TEST(KILL_process)
ASSERT_EQ(SIGSYS, WTERMSIG(status));
}
+TEST(KILL_unknown)
+{
+ int status;
+ pid_t child_pid;
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ kill_thread_or_group(_metadata, RET_UNKNOWN);
+ _exit(38);
+ }
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+ /* If the entire process was killed, we'll see SIGSYS. */
+ EXPECT_TRUE(WIFSIGNALED(status)) {
+ TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
+ }
+ ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
/* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)
{
@@ -1470,6 +1583,7 @@ pid_t setup_trace_fixture(struct __test_metadata *_metadata,
return tracer_pid;
}
+
void teardown_trace_fixture(struct __test_metadata *_metadata,
pid_t tracer)
{
@@ -1584,49 +1698,148 @@ TEST_F(TRACE_poke, getpid_runs_normally)
}
#if defined(__x86_64__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM orig_rax
-# define SYSCALL_RET rax
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_rax
+# define SYSCALL_RET(_regs) (_regs).rax
#elif defined(__i386__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM orig_eax
-# define SYSCALL_RET eax
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_eax
+# define SYSCALL_RET(_regs) (_regs).eax
#elif defined(__arm__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM ARM_r7
-# define SYSCALL_RET ARM_r0
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).ARM_r7
+# ifndef PTRACE_SET_SYSCALL
+# define PTRACE_SET_SYSCALL 23
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
+# define SYSCALL_RET(_regs) (_regs).ARM_r0
#elif defined(__aarch64__)
-# define ARCH_REGS struct user_pt_regs
-# define SYSCALL_NUM regs[8]
-# define SYSCALL_RET regs[0]
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).regs[8]
+# ifndef NT_ARM_SYSTEM_CALL
+# define NT_ARM_SYSTEM_CALL 0x404
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ struct iovec __v; \
+ typeof(_nr) __nr = (_nr); \
+ __v.iov_base = &__nr; \
+ __v.iov_len = sizeof(__nr); \
+ EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
+ NT_ARM_SYSTEM_CALL, &__v)); \
+ } while (0)
+# define SYSCALL_RET(_regs) (_regs).regs[0]
#elif defined(__riscv) && __riscv_xlen == 64
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM a7
-# define SYSCALL_RET a0
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).a7
+# define SYSCALL_RET(_regs) (_regs).a0
+#elif defined(__csky__)
+# define ARCH_REGS struct pt_regs
+# if defined(__CSKYABIV2__)
+# define SYSCALL_NUM(_regs) (_regs).regs[3]
+# else
+# define SYSCALL_NUM(_regs) (_regs).regs[9]
+# endif
+# define SYSCALL_RET(_regs) (_regs).a0
#elif defined(__hppa__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM gr[20]
-# define SYSCALL_RET gr[28]
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).gr[20]
+# define SYSCALL_RET(_regs) (_regs).gr[28]
#elif defined(__powerpc__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM gpr[0]
-# define SYSCALL_RET gpr[3]
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).gpr[0]
+# define SYSCALL_RET(_regs) (_regs).gpr[3]
+# define SYSCALL_RET_SET(_regs, _val) \
+ do { \
+ typeof(_val) _result = (_val); \
+ /* \
+ * A syscall error is signaled by CR0 SO bit \
+ * and the code is stored as a positive value. \
+ */ \
+ if (_result < 0) { \
+ SYSCALL_RET(_regs) = -result; \
+ (_regs).ccr |= 0x10000000; \
+ } else { \
+ SYSCALL_RET(_regs) = result; \
+ (_regs).ccr &= ~0x10000000; \
+ } \
+ } while (0)
+# define SYSCALL_RET_SET_ON_PTRACE_EXIT
#elif defined(__s390__)
-# define ARCH_REGS s390_regs
-# define SYSCALL_NUM gprs[2]
-# define SYSCALL_RET gprs[2]
+# define ARCH_REGS s390_regs
+# define SYSCALL_NUM(_regs) (_regs).gprs[2]
+# define SYSCALL_RET_SET(_regs, _val) \
+ TH_LOG("Can't modify syscall return on this architecture")
#elif defined(__mips__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM regs[2]
-# define SYSCALL_SYSCALL_NUM regs[4]
-# define SYSCALL_RET regs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# include <asm/unistd_nr_n32.h>
+# include <asm/unistd_nr_n64.h>
+# include <asm/unistd_nr_o32.h>
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) \
+ ({ \
+ typeof((_regs).regs[2]) _nr; \
+ if ((_regs).regs[2] == __NR_O32_Linux) \
+ _nr = (_regs).regs[4]; \
+ else \
+ _nr = (_regs).regs[2]; \
+ _nr; \
+ })
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ if ((_regs).regs[2] == __NR_O32_Linux) \
+ (_regs).regs[4] = _nr; \
+ else \
+ (_regs).regs[2] = _nr; \
+ } while (0)
+# define SYSCALL_RET_SET(_regs, _val) \
+ TH_LOG("Can't modify syscall return on this architecture")
+#elif defined(__xtensa__)
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).syscall
+/*
+ * On xtensa syscall return value is in the register
+ * a2 of the current window which is not fixed.
+ */
+#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2]
+#elif defined(__sh__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).gpr[3]
+# define SYSCALL_RET(_regs) (_regs).gpr[0]
#else
# error "Do not know how to find your architecture's registers and syscalls"
#endif
+/*
+ * Most architectures can change the syscall by just updating the
+ * associated register. This is the default if not defined above.
+ */
+#ifndef SYSCALL_NUM_SET
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ SYSCALL_NUM(_regs) = (_nr); \
+ } while (0)
+#endif
+/*
+ * Most architectures can change the syscall return value by just
+ * writing to the SYSCALL_RET register. This is the default if not
+ * defined above. If an architecture cannot set the return value
+ * (for example when the syscall and return value register is
+ * shared), report it with TH_LOG() in an arch-specific definition
+ * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
+ */
+#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
+# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
+#endif
+#ifndef SYSCALL_RET_SET
+# define SYSCALL_RET_SET(_regs, _val) \
+ do { \
+ SYSCALL_RET(_regs) = (_val); \
+ } while (0)
+#endif
+
/* When the syscall return can't be changed, stub out the tests for it. */
-#ifdef SYSCALL_NUM_RET_SHARE_REG
+#ifndef SYSCALL_RET
# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
#else
# define EXPECT_SYSCALL_RETURN(val, action) \
@@ -1641,115 +1854,95 @@ TEST_F(TRACE_poke, getpid_runs_normally)
} while (0)
#endif
-/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
+/*
+ * Some architectures (e.g. powerpc) can only set syscall
+ * return values on syscall exit during ptrace.
+ */
+const bool ptrace_entry_set_syscall_nr = true;
+const bool ptrace_entry_set_syscall_ret =
+#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
+ true;
+#else
+ false;
+#endif
+
+/*
+ * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
* architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
*/
#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
-#define HAVE_GETREGS
+# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
+# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
+#else
+# define ARCH_GETREGS(_regs) ({ \
+ struct iovec __v; \
+ __v.iov_base = &(_regs); \
+ __v.iov_len = sizeof(_regs); \
+ ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \
+ })
+# define ARCH_SETREGS(_regs) ({ \
+ struct iovec __v; \
+ __v.iov_base = &(_regs); \
+ __v.iov_len = sizeof(_regs); \
+ ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \
+ })
#endif
/* Architecture-specific syscall fetching routine. */
int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
{
ARCH_REGS regs;
-#ifdef HAVE_GETREGS
- EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
- TH_LOG("PTRACE_GETREGS failed");
- return -1;
- }
-#else
- struct iovec iov;
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
- TH_LOG("PTRACE_GETREGSET failed");
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
return -1;
}
-#endif
-#if defined(__mips__)
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
- return regs.SYSCALL_SYSCALL_NUM;
-#endif
- return regs.SYSCALL_NUM;
+ return SYSCALL_NUM(regs);
}
/* Architecture-specific syscall changing routine. */
-void change_syscall(struct __test_metadata *_metadata,
- pid_t tracee, int syscall, int result)
+void __change_syscall(struct __test_metadata *_metadata,
+ pid_t tracee, long *syscall, long *ret)
{
- int ret;
- ARCH_REGS regs;
-#ifdef HAVE_GETREGS
- ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
-#else
- struct iovec iov;
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
- EXPECT_EQ(0, ret) {}
+ ARCH_REGS orig, regs;
-#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
- defined(__s390__) || defined(__hppa__) || defined(__riscv)
- {
- regs.SYSCALL_NUM = syscall;
- }
-#elif defined(__mips__)
- {
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
- regs.SYSCALL_SYSCALL_NUM = syscall;
- else
- regs.SYSCALL_NUM = syscall;
- }
+ /* Do not get/set registers if we have nothing to do. */
+ if (!syscall && !ret)
+ return;
-#elif defined(__arm__)
-# ifndef PTRACE_SET_SYSCALL
-# define PTRACE_SET_SYSCALL 23
-# endif
- {
- ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
- EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
+ return;
}
+ orig = regs;
-#elif defined(__aarch64__)
-# ifndef NT_ARM_SYSTEM_CALL
-# define NT_ARM_SYSTEM_CALL 0x404
-# endif
- {
- iov.iov_base = &syscall;
- iov.iov_len = sizeof(syscall);
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
- &iov);
- EXPECT_EQ(0, ret);
- }
+ if (syscall)
+ SYSCALL_NUM_SET(regs, *syscall);
-#else
- ASSERT_EQ(1, 0) {
- TH_LOG("How is the syscall changed on this architecture?");
- }
-#endif
+ if (ret)
+ SYSCALL_RET_SET(regs, *ret);
- /* If syscall is skipped, change return value. */
- if (syscall == -1)
-#ifdef SYSCALL_NUM_RET_SHARE_REG
- TH_LOG("Can't modify syscall return on this architecture");
-#else
- regs.SYSCALL_RET = result;
-#endif
+ /* Flush any register changes made. */
+ if (memcmp(&orig, &regs, sizeof(orig)) != 0)
+ EXPECT_EQ(0, ARCH_SETREGS(regs));
+}
-#ifdef HAVE_GETREGS
- ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
-#else
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
- EXPECT_EQ(0, ret);
+/* Change only syscall number. */
+void change_syscall_nr(struct __test_metadata *_metadata,
+ pid_t tracee, long syscall)
+{
+ __change_syscall(_metadata, tracee, &syscall, NULL);
+}
+
+/* Change syscall return value (and set syscall number to -1). */
+void change_syscall_ret(struct __test_metadata *_metadata,
+ pid_t tracee, long ret)
+{
+ long syscall = -1;
+
+ __change_syscall(_metadata, tracee, &syscall, &ret);
}
-void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
+void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
int status, void *args)
{
int ret;
@@ -1764,17 +1957,17 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
case 0x1002:
/* change getpid to getppid. */
EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, __NR_getppid, 0);
+ change_syscall_nr(_metadata, tracee, __NR_getppid);
break;
case 0x1003:
/* skip gettid with valid return code. */
EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, -1, 45000);
+ change_syscall_ret(_metadata, tracee, 45000);
break;
case 0x1004:
/* skip openat with error. */
EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, -1, -ESRCH);
+ change_syscall_ret(_metadata, tracee, -ESRCH);
break;
case 0x1005:
/* do nothing (allow getppid) */
@@ -1789,12 +1982,21 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
}
+FIXTURE(TRACE_syscall) {
+ struct sock_fprog prog;
+ pid_t tracer, mytid, mypid, parent;
+ long syscall_nr;
+};
+
void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
int status, void *args)
{
- int ret, nr;
+ int ret;
unsigned long msg;
static bool entry;
+ long syscall_nr_val, syscall_ret_val;
+ long *syscall_nr = NULL, *syscall_ret = NULL;
+ FIXTURE_DATA(TRACE_syscall) *self = args;
/*
* The traditional way to tell PTRACE_SYSCALL entry/exit
@@ -1808,22 +2010,64 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
- if (!entry)
- return;
+ /*
+ * Some architectures only support setting return values during
+ * syscall exit under ptrace, and on exit the syscall number may
+ * no longer be available. Therefore, save the initial sycall
+ * number here, so it can be examined during both entry and exit
+ * phases.
+ */
+ if (entry)
+ self->syscall_nr = get_syscall(_metadata, tracee);
- nr = get_syscall(_metadata, tracee);
+ /*
+ * Depending on the architecture's syscall setting abilities, we
+ * pick which things to set during this phase (entry or exit).
+ */
+ if (entry == ptrace_entry_set_syscall_nr)
+ syscall_nr = &syscall_nr_val;
+ if (entry == ptrace_entry_set_syscall_ret)
+ syscall_ret = &syscall_ret_val;
+
+ /* Now handle the actual rewriting cases. */
+ switch (self->syscall_nr) {
+ case __NR_getpid:
+ syscall_nr_val = __NR_getppid;
+ /* Never change syscall return for this case. */
+ syscall_ret = NULL;
+ break;
+ case __NR_gettid:
+ syscall_nr_val = -1;
+ syscall_ret_val = 45000;
+ break;
+ case __NR_openat:
+ syscall_nr_val = -1;
+ syscall_ret_val = -ESRCH;
+ break;
+ default:
+ /* Unhandled, do nothing. */
+ return;
+ }
- if (nr == __NR_getpid)
- change_syscall(_metadata, tracee, __NR_getppid, 0);
- if (nr == __NR_gettid)
- change_syscall(_metadata, tracee, -1, 45000);
- if (nr == __NR_openat)
- change_syscall(_metadata, tracee, -1, -ESRCH);
+ __change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
}
-FIXTURE(TRACE_syscall) {
- struct sock_fprog prog;
- pid_t tracer, mytid, mypid, parent;
+FIXTURE_VARIANT(TRACE_syscall) {
+ /*
+ * All of the SECCOMP_RET_TRACE behaviors can be tested with either
+ * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
+ * This indicates if we should use SECCOMP_RET_TRACE (false), or
+ * ptrace (true).
+ */
+ bool use_ptrace;
+};
+
+FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
+ .use_ptrace = true,
+};
+
+FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
+ .use_ptrace = false,
};
FIXTURE_SETUP(TRACE_syscall)
@@ -1841,12 +2085,11 @@ FIXTURE_SETUP(TRACE_syscall)
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
-
- memset(&self->prog, 0, sizeof(self->prog));
- self->prog.filter = malloc(sizeof(filter));
- ASSERT_NE(NULL, self->prog.filter);
- memcpy(self->prog.filter, filter, sizeof(filter));
- self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
/* Prepare some testable syscall results. */
self->mytid = syscall(__NR_gettid);
@@ -1864,60 +2107,48 @@ FIXTURE_SETUP(TRACE_syscall)
ASSERT_NE(self->parent, self->mypid);
/* Launch tracer. */
- self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
- false);
-}
+ self->tracer = setup_trace_fixture(_metadata,
+ variant->use_ptrace ? tracer_ptrace
+ : tracer_seccomp,
+ self, variant->use_ptrace);
-FIXTURE_TEARDOWN(TRACE_syscall)
-{
- teardown_trace_fixture(_metadata, self->tracer);
- if (self->prog.filter)
- free(self->prog.filter);
-}
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
-TEST_F(TRACE_syscall, ptrace_syscall_redirected)
-{
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
- teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
+ if (variant->use_ptrace)
+ return;
- /* Tracer will redirect getpid to getppid. */
- EXPECT_NE(self->mypid, syscall(__NR_getpid));
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
}
-TEST_F(TRACE_syscall, ptrace_syscall_errno)
+FIXTURE_TEARDOWN(TRACE_syscall)
{
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
-
- /* Tracer should skip the open syscall, resulting in ESRCH. */
- EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
}
-TEST_F(TRACE_syscall, ptrace_syscall_faked)
+TEST(negative_ENOSYS)
{
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
- teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
+ /*
+ * There should be no difference between an "internal" skip
+ * and userspace asking for syscall "-1".
+ */
+ errno = 0;
+ EXPECT_EQ(-1, syscall(-1));
+ EXPECT_EQ(errno, ENOSYS);
+ /* And no difference for "still not valid but not -1". */
+ errno = 0;
+ EXPECT_EQ(-1, syscall(-101));
+ EXPECT_EQ(errno, ENOSYS);
+}
- /* Tracer should skip the gettid syscall, resulting fake pid. */
- EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
+TEST_F(TRACE_syscall, negative_ENOSYS)
+{
+ negative_ENOSYS(_metadata);
}
TEST_F(TRACE_syscall, syscall_allowed)
{
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
/* getppid works as expected (no changes). */
EXPECT_EQ(self->parent, syscall(__NR_getppid));
EXPECT_NE(self->mypid, syscall(__NR_getppid));
@@ -1925,14 +2156,6 @@ TEST_F(TRACE_syscall, syscall_allowed)
TEST_F(TRACE_syscall, syscall_redirected)
{
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
/* getpid has been redirected to getppid as expected. */
EXPECT_EQ(self->parent, syscall(__NR_getpid));
EXPECT_NE(self->mypid, syscall(__NR_getpid));
@@ -1940,33 +2163,17 @@ TEST_F(TRACE_syscall, syscall_redirected)
TEST_F(TRACE_syscall, syscall_errno)
{
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* openat has been skipped and an errno return. */
+ /* Tracer should skip the open syscall, resulting in ESRCH. */
EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
}
TEST_F(TRACE_syscall, syscall_faked)
{
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* gettid has been skipped and an altered return value stored. */
+ /* Tracer skips the gettid syscall and store altered return value. */
EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
}
-TEST_F(TRACE_syscall, skip_after_RET_TRACE)
+TEST_F(TRACE_syscall, skip_after)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
@@ -1981,14 +2188,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE)
};
long ret;
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install fixture filter. */
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install "errno on getppid" filter. */
+ /* Install additional "errno on getppid" filter. */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
ASSERT_EQ(0, ret);
@@ -1998,69 +2198,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE)
EXPECT_EQ(EPERM, errno);
}
-TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
-{
- struct sock_filter filter[] = {
- BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
- offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
- };
- struct sock_fprog prog = {
- .len = (unsigned short)ARRAY_SIZE(filter),
- .filter = filter,
- };
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install fixture filter. */
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install "death on getppid" filter. */
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Tracer will redirect getpid to getppid, and we should die. */
- EXPECT_NE(self->mypid, syscall(__NR_getpid));
-}
-
-TEST_F(TRACE_syscall, skip_after_ptrace)
-{
- struct sock_filter filter[] = {
- BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
- offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
- };
- struct sock_fprog prog = {
- .len = (unsigned short)ARRAY_SIZE(filter),
- .filter = filter,
- };
- long ret;
-
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
- teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install "errno on getppid" filter. */
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Tracer will redirect getpid to getppid, and we should see EPERM. */
- EXPECT_EQ(-1, syscall(__NR_getpid));
- EXPECT_EQ(EPERM, errno);
-}
-
-TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
+TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
@@ -2075,15 +2213,7 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
};
long ret;
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
- teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install "death on getppid" filter. */
+ /* Install additional "death on getppid" filter. */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
ASSERT_EQ(0, ret);
@@ -3068,7 +3198,7 @@ TEST(get_metadata)
/* Only real root can get metadata. */
if (geteuid()) {
- XFAIL(return, "get_metadata requires real root");
+ SKIP(return, "get_metadata requires real root");
return;
}
@@ -3111,7 +3241,7 @@ TEST(get_metadata)
ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
EXPECT_EQ(sizeof(md), ret) {
if (errno == EINVAL)
- XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
+ SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
}
EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
@@ -3127,14 +3257,14 @@ skip:
ASSERT_EQ(0, kill(pid, SIGKILL));
}
-static int user_trap_syscall(int nr, unsigned int flags)
+static int user_notif_syscall(int nr, unsigned int flags)
{
struct sock_filter filter[] = {
- BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
@@ -3173,7 +3303,7 @@ TEST(user_notification_basic)
/* Check that we get -ENOSYS with no listener attached */
if (pid == 0) {
- if (user_trap_syscall(__NR_getppid, 0) < 0)
+ if (user_notif_syscall(__NR_getppid, 0) < 0)
exit(1);
ret = syscall(__NR_getppid);
exit(ret >= 0 || errno != ENOSYS);
@@ -3190,13 +3320,13 @@ TEST(user_notification_basic)
EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
/* Check that the basic notification machinery works */
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/* Installing a second listener in the chain should EBUSY */
- EXPECT_EQ(user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER),
+ EXPECT_EQ(user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER),
-1);
EXPECT_EQ(errno, EBUSY);
@@ -3257,15 +3387,20 @@ TEST(user_notification_with_tsync)
int ret;
unsigned int flags;
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
/* these were exclusive */
flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
SECCOMP_FILTER_FLAG_TSYNC;
- ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
+ ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
ASSERT_EQ(EINVAL, errno);
/* but now they're not */
flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
- ret = user_trap_syscall(__NR_getppid, flags);
+ ret = user_notif_syscall(__NR_getppid, flags);
close(ret);
ASSERT_LE(0, ret);
}
@@ -3283,8 +3418,8 @@ TEST(user_notification_kill_in_middle)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/*
@@ -3337,8 +3472,8 @@ TEST(user_notification_signal)
ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
- listener = user_trap_syscall(__NR_gettid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_gettid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3407,8 +3542,8 @@ TEST(user_notification_closed_listener)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/*
@@ -3439,10 +3574,13 @@ TEST(user_notification_child_pid_ns)
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
- ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
+ ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) {
+ if (errno == EINVAL)
+ SKIP(return, "kernel missing CLONE_NEWUSER support");
+ };
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3481,8 +3619,8 @@ TEST(user_notification_sibling_pid_ns)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3504,7 +3642,10 @@ TEST(user_notification_sibling_pid_ns)
}
/* Create the sibling ns, and sibling in it. */
- ASSERT_EQ(unshare(CLONE_NEWPID), 0);
+ ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
+ if (errno == EPERM)
+ SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
+ }
ASSERT_EQ(errno, 0);
pid2 = fork();
@@ -3546,8 +3687,8 @@ TEST(user_notification_fault_recv)
ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3584,16 +3725,6 @@ TEST(seccomp_get_notif_sizes)
EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
}
-static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
-{
-#ifdef __NR_kcmp
- return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
-#else
- errno = ENOSYS;
- return -1;
-#endif
-}
-
TEST(user_notification_continue)
{
pid_t pid;
@@ -3608,7 +3739,7 @@ TEST(user_notification_continue)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3618,20 +3749,14 @@ TEST(user_notification_continue)
int dup_fd, pipe_fds[2];
pid_t self;
- ret = pipe(pipe_fds);
- if (ret < 0)
- exit(1);
+ ASSERT_GE(pipe(pipe_fds), 0);
dup_fd = dup(pipe_fds[0]);
- if (dup_fd < 0)
- exit(1);
+ ASSERT_GE(dup_fd, 0);
+ EXPECT_NE(pipe_fds[0], dup_fd);
self = getpid();
-
- ret = filecmp(self, self, pipe_fds[0], dup_fd);
- if (ret)
- exit(2);
-
+ ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0);
exit(0);
}
@@ -3672,7 +3797,7 @@ TEST(user_notification_continue)
resp.val = 0;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
if (errno == EINVAL)
- XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
+ SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
}
skip:
@@ -3680,15 +3805,342 @@ skip:
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status)) {
if (WEXITSTATUS(status) == 2) {
- XFAIL(return, "Kernel does not support kcmp() syscall");
+ SKIP(return, "Kernel does not support kcmp() syscall");
return;
}
}
}
+TEST(user_notification_filter_empty)
+{
+ pid_t pid;
+ long ret;
+ int status;
+ struct pollfd pollfd;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int listener;
+
+ listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup2(listener, 200) != 200)
+ _exit(EXIT_FAILURE);
+
+ close(listener);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ /*
+ * The seccomp filter has become unused so we should be notified once
+ * the kernel gets around to cleaning up task struct.
+ */
+ pollfd.fd = 200;
+ pollfd.events = POLLHUP;
+
+ EXPECT_GT(poll(&pollfd, 1, 2000), 0);
+ EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
+}
+
+static void *do_thread(void *data)
+{
+ return NULL;
+}
+
+TEST(user_notification_filter_empty_threaded)
+{
+ pid_t pid;
+ long ret;
+ int status;
+ struct pollfd pollfd;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ pid_t pid1, pid2;
+ int listener, status;
+ pthread_t thread;
+
+ listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup2(listener, 200) != 200)
+ _exit(EXIT_FAILURE);
+
+ close(listener);
+
+ pid1 = fork();
+ if (pid1 < 0)
+ _exit(EXIT_FAILURE);
+
+ if (pid1 == 0)
+ _exit(EXIT_SUCCESS);
+
+ pid2 = fork();
+ if (pid2 < 0)
+ _exit(EXIT_FAILURE);
+
+ if (pid2 == 0)
+ _exit(EXIT_SUCCESS);
+
+ if (pthread_create(&thread, NULL, do_thread, NULL) ||
+ pthread_join(thread, NULL))
+ _exit(EXIT_FAILURE);
+
+ if (pthread_create(&thread, NULL, do_thread, NULL) ||
+ pthread_join(thread, NULL))
+ _exit(EXIT_FAILURE);
+
+ if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) ||
+ WEXITSTATUS(status))
+ _exit(EXIT_FAILURE);
+
+ if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) ||
+ WEXITSTATUS(status))
+ _exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ /*
+ * The seccomp filter has become unused so we should be notified once
+ * the kernel gets around to cleaning up task struct.
+ */
+ pollfd.fd = 200;
+ pollfd.events = POLLHUP;
+
+ EXPECT_GT(poll(&pollfd, 1, 2000), 0);
+ EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
+}
+
+TEST(user_notification_addfd)
+{
+ pid_t pid;
+ long ret;
+ int status, listener, memfd, fd;
+ struct seccomp_notif_addfd addfd = {};
+ struct seccomp_notif_addfd_small small = {};
+ struct seccomp_notif_addfd_big big = {};
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ memfd = memfd_create("test", 0);
+ ASSERT_GE(memfd, 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Check that the basic notification machinery works */
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
+ exit(1);
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
+ }
+
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ addfd.srcfd = memfd;
+ addfd.newfd = 0;
+ addfd.id = req.id;
+ addfd.flags = 0x0;
+
+ /* Verify bad newfd_flags cannot be set */
+ addfd.newfd_flags = ~O_CLOEXEC;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EINVAL);
+ addfd.newfd_flags = O_CLOEXEC;
+
+ /* Verify bad flags cannot be set */
+ addfd.flags = 0xff;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EINVAL);
+ addfd.flags = 0;
+
+ /* Verify that remote_fd cannot be set without setting flags */
+ addfd.newfd = 1;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EINVAL);
+ addfd.newfd = 0;
+
+ /* Verify small size cannot be set */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* Verify we can't send bits filled in unknown buffer area */
+ memset(&big, 0xAA, sizeof(big));
+ big.addfd = addfd;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
+ EXPECT_EQ(errno, E2BIG);
+
+
+ /* Verify we can set an arbitrary remote fd */
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ /*
+ * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd),
+ * 4(listener), so the newly allocated fd should be 5.
+ */
+ EXPECT_EQ(fd, 5);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+ /* Verify we can set an arbitrary remote fd with large size */
+ memset(&big, 0x0, sizeof(big));
+ big.addfd = addfd;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
+ EXPECT_EQ(fd, 6);
+
+ /* Verify we can set a specific remote fd */
+ addfd.newfd = 42;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ EXPECT_EQ(fd, 42);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+ /* Resume syscall */
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /*
+ * This sets the ID of the ADD FD to the last request plus 1. The
+ * notification ID increments 1 per notification.
+ */
+ addfd.id = req.id + 1;
+
+ /* This spins until the underlying notification is generated */
+ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
+ errno != -EINPROGRESS)
+ nanosleep(&delay, NULL);
+
+ memset(&req, 0, sizeof(req));
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ ASSERT_EQ(addfd.id, req.id);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /* Wait for child to finish. */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ close(memfd);
+}
+
+TEST(user_notification_addfd_rlimit)
+{
+ pid_t pid;
+ long ret;
+ int status, listener, memfd;
+ struct seccomp_notif_addfd addfd = {};
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ const struct rlimit lim = {
+ .rlim_cur = 0,
+ .rlim_max = 0,
+ };
+
+ memfd = memfd_create("test", 0);
+ ASSERT_GE(memfd, 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Check that the basic notification machinery works */
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0)
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
+
+
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
+
+ addfd.srcfd = memfd;
+ addfd.newfd_flags = O_CLOEXEC;
+ addfd.newfd = 0;
+ addfd.id = req.id;
+ addfd.flags = 0;
+
+ /* Should probably spot check /proc/sys/fs/file-nr */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EMFILE);
+
+ addfd.newfd = 100;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EBADF);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /* Wait for child to finish. */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ close(memfd);
+}
+
/*
* TODO:
- * - add microbenchmarks
* - expand NNP testing
* - better arch-specific TRACE and TRAP handlers.
* - endianness checking when appropriate
@@ -3696,7 +4148,6 @@ skip:
* - arch value testing (x86 modes especially)
* - verify that FILTER_FLAG_LOG filters generate log messages
* - verify that RET_LOG generates log messages
- * - ...
*/
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings
new file mode 100644
index 000000000000..ba4d85f74cd6
--- /dev/null
+++ b/tools/testing/selftests/seccomp/settings
@@ -0,0 +1 @@
+timeout=90
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index ad0f8df2ca0a..8934a3766d20 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -71,7 +71,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
swapcontext(&sc, &uc);
ksft_print_msg("%s\n", p->msg);
if (!p->flag) {
- ksft_exit_skip("[RUN]\tAborting\n");
+ ksft_exit_fail_msg("[RUN]\tAborting\n");
exit(EXIT_FAILURE);
}
}
@@ -144,7 +144,7 @@ int main(void)
err = sigaltstack(&stk, NULL);
if (err) {
if (errno == EINVAL) {
- ksft_exit_skip(
+ ksft_test_result_skip(
"[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
/*
* If test cases for the !SS_AUTODISARM variant were
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
index d5a2da428752..be8266f5d04c 100644
--- a/tools/testing/selftests/splice/.gitignore
+++ b/tools/testing/selftests/splice/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
default_file_splice_read
+splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
index e519b159b60d..541cd826d5a5 100644
--- a/tools/testing/selftests/splice/Makefile
+++ b/tools/testing/selftests/splice/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := default_file_splice_read.sh
-TEST_GEN_PROGS_EXTENDED := default_file_splice_read
+TEST_PROGS := default_file_splice_read.sh short_splice_read.sh
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read splice_read
include ../lib.mk
diff --git a/tools/testing/selftests/splice/config b/tools/testing/selftests/splice/config
new file mode 100644
index 000000000000..058c928368b8
--- /dev/null
+++ b/tools/testing/selftests/splice/config
@@ -0,0 +1 @@
+CONFIG_TEST_LKM=m
diff --git a/tools/testing/selftests/splice/settings b/tools/testing/selftests/splice/settings
new file mode 100644
index 000000000000..89cedfc0d12b
--- /dev/null
+++ b/tools/testing/selftests/splice/settings
@@ -0,0 +1 @@
+timeout=5
diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh
new file mode 100755
index 000000000000..7810d3589d9a
--- /dev/null
+++ b/tools/testing/selftests/splice/short_splice_read.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+set -e
+
+ret=0
+
+do_splice()
+{
+ filename="$1"
+ bytes="$2"
+ expected="$3"
+
+ out=$(./splice_read "$filename" "$bytes" | cat)
+ if [ "$out" = "$expected" ] ; then
+ echo "ok: $filename $bytes"
+ else
+ echo "FAIL: $filename $bytes"
+ ret=1
+ fi
+}
+
+test_splice()
+{
+ filename="$1"
+
+ full=$(cat "$filename")
+ two=$(echo "$full" | grep -m1 . | cut -c-2)
+
+ # Make sure full splice has the same contents as a standard read.
+ do_splice "$filename" 4096 "$full"
+
+ # Make sure a partial splice see the first two characters.
+ do_splice "$filename" 2 "$two"
+}
+
+# proc_single_open(), seq_read()
+test_splice /proc/$$/limits
+# special open, seq_read()
+test_splice /proc/$$/comm
+
+# proc_handler, proc_dointvec_minmax
+test_splice /proc/sys/fs/nr_open
+# proc_handler, proc_dostring
+test_splice /proc/sys/kernel/modprobe
+# proc_handler, special read
+test_splice /proc/sys/kernel/version
+
+if ! [ -d /sys/module/test_module/sections ] ; then
+ modprobe test_module
+fi
+# kernfs, attr
+test_splice /sys/module/test_module/coresize
+# kernfs, binattr
+test_splice /sys/module/test_module/sections/.init.text
+
+exit $ret
diff --git a/tools/testing/selftests/splice/splice_read.c b/tools/testing/selftests/splice/splice_read.c
new file mode 100644
index 000000000000..46dae6a25cfb
--- /dev/null
+++ b/tools/testing/selftests/splice/splice_read.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ size_t size;
+ ssize_t spliced;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s INPUT [BYTES]\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0) {
+ perror(argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (argc == 3)
+ size = atol(argv[2]);
+ else {
+ struct stat statbuf;
+
+ if (fstat(fd, &statbuf) < 0) {
+ perror(argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (statbuf.st_size > INT_MAX) {
+ fprintf(stderr, "%s: Too big\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ size = statbuf.st_size;
+ }
+
+ /* splice(2) file to stdout. */
+ spliced = splice(fd, NULL, STDOUT_FILENO, NULL,
+ size, SPLICE_F_MOVE);
+ if (spliced < 0) {
+ perror("splice");
+ return EXIT_FAILURE;
+ }
+
+ close(fd);
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
index 3824b66f41a0..414a617db993 100644
--- a/tools/testing/selftests/sync/sync_test.c
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -86,9 +86,9 @@ int main(void)
int err;
ksft_print_header();
- ksft_set_plan(3 + 7);
sync_api_supported();
+ ksft_set_plan(3 + 7);
ksft_print_msg("[RUN]\tTesting sync framework\n");
diff --git a/tools/testing/selftests/sysctl/config b/tools/testing/selftests/sysctl/config
index 6ca14800d755..fc263efd1fad 100644
--- a/tools/testing/selftests/sysctl/config
+++ b/tools/testing/selftests/sysctl/config
@@ -1 +1 @@
-CONFIG_TEST_SYSCTL=y
+CONFIG_TEST_SYSCTL=m
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 6a970b127c9b..19515dcb7d04 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -39,16 +39,7 @@ ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002"
ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
-
-test_modprobe()
-{
- if [ ! -d $DIR ]; then
- echo "$0: $DIR not present" >&2
- echo "You must have the following enabled in your kernel:" >&2
- cat $TEST_DIR/config >&2
- exit $ksft_skip
- fi
-}
+ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int"
function allow_user_defaults()
{
@@ -122,13 +113,15 @@ test_reqs()
function load_req_mod()
{
- if [ ! -d $DIR ]; then
+ if [ ! -d $SYSCTL ]; then
if ! modprobe -q -n $TEST_DRIVER; then
echo "$0: module $TEST_DRIVER not found [SKIP]"
+ echo "You must set CONFIG_TEST_SYSCTL=m in your kernel" >&2
exit $ksft_skip
fi
modprobe $TEST_DRIVER
if [ $? -ne 0 ]; then
+ echo "$0: modprobe $TEST_DRIVER failed."
exit
fi
fi
@@ -752,6 +745,46 @@ sysctl_test_0006()
run_bitmaptest
}
+sysctl_test_0007()
+{
+ TARGET="${SYSCTL}/boot_int"
+ if [ ! -f $TARGET ]; then
+ echo "Skipping test for $TARGET as it is not present ..."
+ return $ksft_skip
+ fi
+
+ if [ -d $DIR ]; then
+ echo "Boot param test only possible sysctl_test is built-in, not module:"
+ cat $TEST_DIR/config >&2
+ return $ksft_skip
+ fi
+
+ echo -n "Testing if $TARGET is set to 1 ..."
+ ORIG=$(cat "${TARGET}")
+
+ if [ x$ORIG = "x1" ]; then
+ echo "ok"
+ return 0
+ fi
+ echo "FAIL"
+ echo "Checking if /proc/cmdline contains setting of the expected parameter ..."
+ if [ ! -f /proc/cmdline ]; then
+ echo "/proc/cmdline does not exist, test inconclusive"
+ return 0
+ fi
+
+ FOUND=$(grep -c "sysctl[./]debug[./]test_sysctl[./]boot_int=1" /proc/cmdline)
+ if [ $FOUND = "1" ]; then
+ echo "Kernel param found but $TARGET is not 1, TEST FAILED"
+ rc=1
+ test_rc
+ fi
+
+ echo "Skipping test, expected kernel parameter missing."
+ echo "To perform this test, make sure kernel is booted with parameter: sysctl.debug.test_sysctl.boot_int=1"
+ return $ksft_skip
+}
+
list_tests()
{
echo "Test ID list:"
@@ -766,6 +799,7 @@ list_tests()
echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
+ echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param"
}
usage()
@@ -929,7 +963,6 @@ test_reqs
allow_user_defaults
check_production_sysctl_writes_strict
load_req_mod
-test_modprobe
trap "test_finish" EXIT
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/Makefile
index be5a5e542804..91fee5c43274 100644
--- a/tools/testing/selftests/tc-testing/bpf/Makefile
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-APIDIR := ../../../../include/uapi
+top_srcdir = $(abspath ../../../..)
+APIDIR := $(top_scrdir)/include/uapi
TEST_GEN_FILES = action.o
-top_srcdir = ../../../../..
KSFT_KHDR_INSTALL := 1
-include ../../lib.mk
+include ../lib.mk
CLANG ?= clang
LLC ?= llc
@@ -28,3 +28,6 @@ $(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+
+TEST_PROGS += ./tdc.sh
+TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/action.c
index c32b99b80e19..c32b99b80e19 100644
--- a/tools/testing/selftests/tc-testing/bpf/action.c
+++ b/tools/testing/selftests/tc-testing/action.c
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 47a3082b6661..503982b8f295 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -260,10 +260,10 @@
255
]
],
- "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 12345",
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 123456",
"expExitCode": "255",
"verifyCmd": "$TC action ls action bpf",
- "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 12345",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 123456",
"matchCount": "0",
"teardown": [
"$TC action flush action bpf"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
index 88ec134872e4..072febf25f55 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -469,7 +469,7 @@
255
]
],
- "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie 123456789abcde \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
"expExitCode": "0",
"verifyCmd": "$TC actions ls action csum",
"matchPattern": "^[ \t]+index [0-9]* ref",
@@ -492,7 +492,7 @@
1,
255
],
- "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie 123456789abcde \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
],
"cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
"expExitCode": "0",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
index f8ea6f5fa8e9..72cdc3c800a5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
@@ -1472,6 +1472,31 @@
]
},
{
+ "id": "94bb",
+ "name": "Add pedit action with LAYERED_OP ip6 traffic_class",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge ip6 traffic_class set 0x40 continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "ipv6\\+0: val 04000000 mask f00fffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
"id": "6f5e",
"name": "Add pedit action with LAYERED_OP ip6 flow_lbl",
"category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
index fbeb9197697d..d06346968bcb 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -629,7 +629,7 @@
"cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022 index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions get action tunnel_key index 1",
- "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022.*index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:00880022.*index 1",
"matchCount": "1",
"teardown": [
"$TC actions flush action tunnel_key"
@@ -653,7 +653,7 @@
"cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344 index 1",
"expExitCode": "0",
"verifyCmd": "$TC actions get action tunnel_key index 1",
- "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1",
"matchCount": "1",
"teardown": [
"$TC actions flush action tunnel_key"
@@ -677,7 +677,7 @@
"cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 824212:80:00880022 index 1",
"expExitCode": "255",
"verifyCmd": "$TC actions get action tunnel_key index 1",
- "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 824212:80:00880022.*index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 824212:80:00880022.*index 1",
"matchCount": "0",
"teardown": [
"$TC actions flush action tunnel_key"
@@ -701,7 +701,7 @@
"cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:4224:00880022 index 1",
"expExitCode": "255",
"verifyCmd": "$TC actions get action tunnel_key index 1",
- "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:4224:00880022.*index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:4224:00880022.*index 1",
"matchCount": "0",
"teardown": [
"$TC actions flush action tunnel_key"
@@ -725,7 +725,7 @@
"cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288 index 1",
"expExitCode": "255",
"verifyCmd": "$TC actions get action tunnel_key index 1",
- "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288.*index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:4288.*index 1",
"matchCount": "0",
"teardown": [
"$TC actions flush action tunnel_key"
@@ -749,7 +749,7 @@
"cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288428822 index 1",
"expExitCode": "255",
"verifyCmd": "$TC actions get action tunnel_key index 1",
- "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288428822.*index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:4288428822.*index 1",
"matchCount": "0",
"teardown": [
"$TC actions flush action tunnel_key"
@@ -773,7 +773,7 @@
"cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42: index 1",
"expExitCode": "255",
"verifyCmd": "$TC actions get action tunnel_key index 1",
- "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:.*index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt[s]? 0102:80:00880022,0408:42:.*index 1",
"matchCount": "0",
"teardown": [
"$TC actions flush action tunnel_key"
@@ -818,12 +818,12 @@
1,
255
],
- "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie aabbccddeeff112233445566778800a"
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie 123456"
],
- "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie a1b1c1d1",
+ "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie 123456",
"expExitCode": "0",
"verifyCmd": "$TC actions get action tunnel_key index 1",
- "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie a1b1c1d1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie 123456",
"matchCount": "1",
"teardown": [
"$TC actions flush action tunnel_key"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 8877f7b2b809..bb543bf69d69 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -32,7 +32,7 @@
"setup": [
"$TC qdisc add dev $DEV2 ingress"
],
- "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 parent ffff: handle 0xffffffff flower action ok",
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok",
"expExitCode": "0",
"verifyCmd": "$TC filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff",
@@ -77,9 +77,9 @@
},
"setup": [
"$TC qdisc add dev $DEV2 ingress",
- "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
+ "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
],
- "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
"expExitCode": "2",
"verifyCmd": "$TC -s filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
@@ -87,5 +87,43 @@
"teardown": [
"$TC qdisc del dev $DEV2 ingress"
]
+ },
+ {
+ "id": "7c65",
+ "name": "Add flower filter and then terse dump it",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "matchPattern": "filter protocol ip pref 1 flower.*handle",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
+ },
+ {
+ "id": "d45e",
+ "name": "Add flower filter and verify that terse dump doesn't output filter key",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "matchPattern": " dst_mac e4:11:22:11:4a:51",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
new file mode 100755
index 000000000000..7fe38c76db44
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+./tdc.py -c actions --nobuildebpf
+./tdc.py -c qdisc
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 6a2bd2cf528e..995f66ce43eb 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -72,21 +72,21 @@ mac_prefix = args.mac_prefix
def format_add_filter(device, prio, handle, skip, src_mac, dst_mac,
share_action):
- return ("filter add dev {} {} protocol ip parent ffff: handle {} "
+ return ("filter add dev {} {} protocol ip ingress handle {} "
" flower {} src_mac {} dst_mac {} action drop {}".format(
device, prio, handle, skip, src_mac, dst_mac, share_action))
def format_rep_filter(device, prio, handle, skip, src_mac, dst_mac,
share_action):
- return ("filter replace dev {} {} protocol ip parent ffff: handle {} "
+ return ("filter replace dev {} {} protocol ip ingress handle {} "
" flower {} src_mac {} dst_mac {} action drop {}".format(
device, prio, handle, skip, src_mac, dst_mac, share_action))
def format_del_filter(device, prio, handle, skip, src_mac, dst_mac,
share_action):
- return ("filter del dev {} {} protocol ip parent ffff: handle {} "
+ return ("filter del dev {} {} protocol ip ingress handle {} "
"flower".format(device, prio, handle))
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index 080709cc4297..cd4a27ee1466 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -24,7 +24,7 @@ NAMES = {
# Name of the namespace to use
'NS': 'tcut',
# Directory containing eBPF test programs
- 'EBPFDIR': './bpf'
+ 'EBPFDIR': './'
}
diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c
index 8e7b7c72ef65..72d41b955fb2 100644
--- a/tools/testing/selftests/timens/clock_nanosleep.c
+++ b/tools/testing/selftests/timens/clock_nanosleep.c
@@ -119,7 +119,7 @@ int main(int argc, char *argv[])
ksft_set_plan(4);
- check_config_posix_timers();
+ check_supported_timers();
if (unshare_timens())
return 1;
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
index 098be7c83be3..52b6a1185f52 100644
--- a/tools/testing/selftests/timens/timens.c
+++ b/tools/testing/selftests/timens/timens.c
@@ -155,7 +155,7 @@ int main(int argc, char *argv[])
nscheck();
- check_config_posix_timers();
+ check_supported_timers();
ksft_set_plan(ARRAY_SIZE(clocks) * 2);
diff --git a/tools/testing/selftests/timens/timens.h b/tools/testing/selftests/timens/timens.h
index e09e7e39bc52..d4fc52d47146 100644
--- a/tools/testing/selftests/timens/timens.h
+++ b/tools/testing/selftests/timens/timens.h
@@ -14,15 +14,26 @@
#endif
static int config_posix_timers = true;
+static int config_alarm_timers = true;
-static inline void check_config_posix_timers(void)
+static inline void check_supported_timers(void)
{
+ struct timespec ts;
+
if (timer_create(-1, 0, 0) == -1 && errno == ENOSYS)
config_posix_timers = false;
+
+ if (clock_gettime(CLOCK_BOOTTIME_ALARM, &ts) == -1 && errno == EINVAL)
+ config_alarm_timers = false;
}
static inline bool check_skip(int clockid)
{
+ if (!config_alarm_timers && clockid == CLOCK_BOOTTIME_ALARM) {
+ ksft_test_result_skip("CLOCK_BOOTTIME_ALARM isn't supported\n");
+ return true;
+ }
+
if (config_posix_timers)
return false;
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 96dba11ebe44..5e7f0051bd7b 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -22,6 +22,9 @@ int run_test(int clockid, struct timespec now)
timer_t fd;
int i;
+ if (check_skip(clockid))
+ return 0;
+
for (i = 0; i < 2; i++) {
struct sigevent sevp = {.sigev_notify = SIGEV_NONE};
int flags = 0;
@@ -74,6 +77,8 @@ int main(int argc, char *argv[])
nscheck();
+ check_supported_timers();
+
ksft_set_plan(3);
clock_gettime(CLOCK_MONOTONIC, &mtime_now);
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
index eff1ec5ff215..9edd43d6b2c1 100644
--- a/tools/testing/selftests/timens/timerfd.c
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -28,6 +28,9 @@ int run_test(int clockid, struct timespec now)
long long elapsed;
int fd, i;
+ if (check_skip(clockid))
+ return 0;
+
if (tclock_gettime(clockid, &now))
return pr_perror("clock_gettime(%d)", clockid);
@@ -81,6 +84,8 @@ int main(int argc, char *argv[])
nscheck();
+ check_supported_timers();
+
ksft_set_plan(3);
clock_gettime(CLOCK_MONOTONIC, &mtime_now);
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 7656c7ce79d9..0e73a16874c4 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -13,6 +13,7 @@ DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
+TEST_FILES := settings
include ../lib.mk
diff --git a/tools/testing/selftests/timers/settings b/tools/testing/selftests/timers/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/timers/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index 8155c2ea7ccb..3e5ff29ee1dd 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -1,10 +1,10 @@
-#!/bin/bash
+#!/bin/sh
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
-python -m unittest -v tpm2_tests.SmokeTest
-python -m unittest -v tpm2_tests.AsyncTest
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
-CLEAR_CMD=$(which tpm2_clear)
-if [ -n $CLEAR_CMD ]; then
- tpm2_clear -T device
-fi
+[ -e /dev/tpm0 ] || exit $ksft_skip
+
+python3 -m unittest -v tpm2_tests.SmokeTest
+python3 -m unittest -v tpm2_tests.AsyncTest
diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh
index a6f5e346635e..04c47b13fe8a 100755
--- a/tools/testing/selftests/tpm2/test_space.sh
+++ b/tools/testing/selftests/tpm2/test_space.sh
@@ -1,4 +1,9 @@
-#!/bin/bash
+#!/bin/sh
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
-python -m unittest -v tpm2_tests.SpaceTest
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+[ -e /dev/tpmrm0 ] || exit $ksft_skip
+
+python3 -m unittest -v tpm2_tests.SpaceTest
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index d0fcb66a88a6..f34486cd7342 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -247,14 +247,14 @@ class ProtocolError(Exception):
class AuthCommand(object):
"""TPMS_AUTH_COMMAND"""
- def __init__(self, session_handle=TPM2_RS_PW, nonce='', session_attributes=0,
- hmac=''):
+ def __init__(self, session_handle=TPM2_RS_PW, nonce=bytes(),
+ session_attributes=0, hmac=bytes()):
self.session_handle = session_handle
self.nonce = nonce
self.session_attributes = session_attributes
self.hmac = hmac
- def __str__(self):
+ def __bytes__(self):
fmt = '>I H%us B H%us' % (len(self.nonce), len(self.hmac))
return struct.pack(fmt, self.session_handle, len(self.nonce),
self.nonce, self.session_attributes, len(self.hmac),
@@ -268,11 +268,11 @@ class AuthCommand(object):
class SensitiveCreate(object):
"""TPMS_SENSITIVE_CREATE"""
- def __init__(self, user_auth='', data=''):
+ def __init__(self, user_auth=bytes(), data=bytes()):
self.user_auth = user_auth
self.data = data
- def __str__(self):
+ def __bytes__(self):
fmt = '>H%us H%us' % (len(self.user_auth), len(self.data))
return struct.pack(fmt, len(self.user_auth), self.user_auth,
len(self.data), self.data)
@@ -296,8 +296,9 @@ class Public(object):
return '>HHIH%us%usH%us' % \
(len(self.auth_policy), len(self.parameters), len(self.unique))
- def __init__(self, object_type, name_alg, object_attributes, auth_policy='',
- parameters='', unique=''):
+ def __init__(self, object_type, name_alg, object_attributes,
+ auth_policy=bytes(), parameters=bytes(),
+ unique=bytes()):
self.object_type = object_type
self.name_alg = name_alg
self.object_attributes = object_attributes
@@ -305,7 +306,7 @@ class Public(object):
self.parameters = parameters
self.unique = unique
- def __str__(self):
+ def __bytes__(self):
return struct.pack(self.__fmt(),
self.object_type,
self.name_alg,
@@ -343,7 +344,7 @@ def get_algorithm(name):
def hex_dump(d):
d = [format(ord(x), '02x') for x in d]
- d = [d[i: i + 16] for i in xrange(0, len(d), 16)]
+ d = [d[i: i + 16] for i in range(0, len(d), 16)]
d = [' '.join(x) for x in d]
d = os.linesep.join(d)
@@ -401,7 +402,7 @@ class Client:
pcrsel_len = max((i >> 3) + 1, 3)
pcrsel = [0] * pcrsel_len
pcrsel[i >> 3] = 1 << (i & 7)
- pcrsel = ''.join(map(chr, pcrsel))
+ pcrsel = ''.join(map(chr, pcrsel)).encode()
fmt = '>HII IHB%us' % (pcrsel_len)
cmd = struct.pack(fmt,
@@ -443,7 +444,7 @@ class Client:
TPM2_CC_PCR_EXTEND,
i,
len(auth_cmd),
- str(auth_cmd),
+ bytes(auth_cmd),
1, bank_alg, dig)
self.send_cmd(cmd)
@@ -457,7 +458,7 @@ class Client:
TPM2_RH_NULL,
TPM2_RH_NULL,
16,
- '\0' * 16,
+ ('\0' * 16).encode(),
0,
session_type,
TPM2_ALG_NULL,
@@ -472,7 +473,7 @@ class Client:
for i in pcrs:
pcr = self.read_pcr(i, bank_alg)
- if pcr == None:
+ if pcr is None:
return None
x += pcr
@@ -489,7 +490,7 @@ class Client:
pcrsel = [0] * pcrsel_len
for i in pcrs:
pcrsel[i >> 3] |= 1 << (i & 7)
- pcrsel = ''.join(map(chr, pcrsel))
+ pcrsel = ''.join(map(chr, pcrsel)).encode()
fmt = '>HII IH%usIHB3s' % ds
cmd = struct.pack(fmt,
@@ -497,7 +498,8 @@ class Client:
struct.calcsize(fmt),
TPM2_CC_POLICY_PCR,
handle,
- len(dig), str(dig),
+ len(dig),
+ bytes(dig),
1,
bank_alg,
pcrsel_len, pcrsel)
@@ -534,7 +536,7 @@ class Client:
self.send_cmd(cmd)
- def create_root_key(self, auth_value = ''):
+ def create_root_key(self, auth_value = bytes()):
attributes = \
Public.FIXED_TPM | \
Public.FIXED_PARENT | \
@@ -570,11 +572,11 @@ class Client:
TPM2_CC_CREATE_PRIMARY,
TPM2_RH_OWNER,
len(auth_cmd),
- str(auth_cmd),
+ bytes(auth_cmd),
len(sensitive),
- str(sensitive),
+ bytes(sensitive),
len(public),
- str(public),
+ bytes(public),
0, 0)
return struct.unpack('>I', self.send_cmd(cmd)[10:14])[0]
@@ -587,7 +589,7 @@ class Client:
attributes = 0
if not policy_dig:
attributes |= Public.USER_WITH_AUTH
- policy_dig = ''
+ policy_dig = bytes()
auth_cmd = AuthCommand()
sensitive = SensitiveCreate(user_auth=auth_value, data=data)
@@ -608,11 +610,11 @@ class Client:
TPM2_CC_CREATE,
parent_key,
len(auth_cmd),
- str(auth_cmd),
+ bytes(auth_cmd),
len(sensitive),
- str(sensitive),
+ bytes(sensitive),
len(public),
- str(public),
+ bytes(public),
0, 0)
rsp = self.send_cmd(cmd)
@@ -635,7 +637,7 @@ class Client:
TPM2_CC_LOAD,
parent_key,
len(auth_cmd),
- str(auth_cmd),
+ bytes(auth_cmd),
blob)
data_handle = struct.unpack('>I', self.send_cmd(cmd)[10:14])[0]
@@ -653,7 +655,7 @@ class Client:
TPM2_CC_UNSEAL,
data_handle,
len(auth_cmd),
- str(auth_cmd))
+ bytes(auth_cmd))
try:
rsp = self.send_cmd(cmd)
@@ -675,7 +677,7 @@ class Client:
TPM2_CC_DICTIONARY_ATTACK_LOCK_RESET,
TPM2_RH_LOCKOUT,
len(auth_cmd),
- str(auth_cmd))
+ bytes(auth_cmd))
self.send_cmd(cmd)
@@ -693,7 +695,7 @@ class Client:
more_data, cap, cnt = struct.unpack('>BII', rsp[:9])
rsp = rsp[9:]
- for i in xrange(0, cnt):
+ for i in range(0, cnt):
handle = struct.unpack('>I', rsp[:4])[0]
handles.append(handle)
rsp = rsp[4:]
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index 728be7c69b76..9d764306887b 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -20,8 +20,8 @@ class SmokeTest(unittest.TestCase):
self.client.close()
def test_seal_with_auth(self):
- data = 'X' * 64
- auth = 'A' * 15
+ data = ('X' * 64).encode()
+ auth = ('A' * 15).encode()
blob = self.client.seal(self.root_key, data, auth, None)
result = self.client.unseal(self.root_key, blob, auth, None)
@@ -30,8 +30,8 @@ class SmokeTest(unittest.TestCase):
def test_seal_with_policy(self):
handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
- data = 'X' * 64
- auth = 'A' * 15
+ data = ('X' * 64).encode()
+ auth = ('A' * 15).encode()
pcrs = [16]
try:
@@ -58,14 +58,15 @@ class SmokeTest(unittest.TestCase):
self.assertEqual(data, result)
def test_unseal_with_wrong_auth(self):
- data = 'X' * 64
- auth = 'A' * 20
+ data = ('X' * 64).encode()
+ auth = ('A' * 20).encode()
rc = 0
blob = self.client.seal(self.root_key, data, auth, None)
try:
- result = self.client.unseal(self.root_key, blob, auth[:-1] + 'B', None)
- except ProtocolError, e:
+ result = self.client.unseal(self.root_key, blob,
+ auth[:-1] + 'B'.encode(), None)
+ except ProtocolError as e:
rc = e.rc
self.assertEqual(rc, tpm2.TPM2_RC_AUTH_FAIL)
@@ -73,8 +74,8 @@ class SmokeTest(unittest.TestCase):
def test_unseal_with_wrong_policy(self):
handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
- data = 'X' * 64
- auth = 'A' * 17
+ data = ('X' * 64).encode()
+ auth = ('A' * 17).encode()
pcrs = [16]
try:
@@ -91,7 +92,7 @@ class SmokeTest(unittest.TestCase):
# This should succeed.
ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1)
- self.client.extend_pcr(1, 'X' * ds)
+ self.client.extend_pcr(1, ('X' * ds).encode())
handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
@@ -108,7 +109,7 @@ class SmokeTest(unittest.TestCase):
# Then, extend a PCR that is part of the policy and try to unseal.
# This should fail.
- self.client.extend_pcr(16, 'X' * ds)
+ self.client.extend_pcr(16, ('X' * ds).encode())
handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
@@ -119,7 +120,7 @@ class SmokeTest(unittest.TestCase):
self.client.policy_password(handle)
result = self.client.unseal(self.root_key, blob, auth, handle)
- except ProtocolError, e:
+ except ProtocolError as e:
rc = e.rc
self.client.flush_context(handle)
except:
@@ -130,13 +131,13 @@ class SmokeTest(unittest.TestCase):
def test_seal_with_too_long_auth(self):
ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1)
- data = 'X' * 64
- auth = 'A' * (ds + 1)
+ data = ('X' * 64).encode()
+ auth = ('A' * (ds + 1)).encode()
rc = 0
try:
blob = self.client.seal(self.root_key, data, auth, None)
- except ProtocolError, e:
+ except ProtocolError as e:
rc = e.rc
self.assertEqual(rc, tpm2.TPM2_RC_SIZE)
@@ -152,7 +153,7 @@ class SmokeTest(unittest.TestCase):
0xDEADBEEF)
self.client.send_cmd(cmd)
- except IOError, e:
+ except IOError as e:
rejected = True
except:
pass
@@ -212,7 +213,7 @@ class SmokeTest(unittest.TestCase):
self.client.tpm.write(cmd)
rsp = self.client.tpm.read()
- except IOError, e:
+ except IOError as e:
# read the response
rsp = self.client.tpm.read()
rejected = True
@@ -283,7 +284,7 @@ class SpaceTest(unittest.TestCase):
rc = 0
try:
space1.send_cmd(cmd)
- except ProtocolError, e:
+ except ProtocolError as e:
rc = e.rc
self.assertEqual(rc, tpm2.TPM2_RC_COMMAND_CODE |
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
index f83391aa42cf..5cebfb356345 100644
--- a/tools/testing/selftests/uevent/uevent_filtering.c
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -19,7 +19,6 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "../kselftest.h"
#include "../kselftest_harness.h"
#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
index 382cfb39a1a3..5eb64d41e541 100644
--- a/tools/testing/selftests/vDSO/.gitignore
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -1,3 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
vdso_test
+vdso_test_gettimeofday
+vdso_test_getcpu
vdso_standalone_test_x86
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 9e03d61f52fd..0069f2f83f86 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-TEST_GEN_PROGS := $(OUTPUT)/vdso_test
+TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
ifeq ($(ARCH),x86)
TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
endif
@@ -17,7 +17,8 @@ LDLIBS += -lgcc_s
endif
all: $(TEST_GEN_PROGS)
-$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
+$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
vdso_standalone_test_x86.c parse_vdso.c \
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 1dbb4b87268f..413f75620a35 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -21,29 +21,7 @@
#include <limits.h>
#include <elf.h>
-/*
- * To use this vDSO parser, first call one of the vdso_init_* functions.
- * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
- * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
- * Then call vdso_sym for each symbol you want. For example, to look up
- * gettimeofday on x86_64, use:
- *
- * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
- * or
- * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
- *
- * vdso_sym will return 0 if the symbol doesn't exist or if the init function
- * failed or was not called. vdso_sym is a little slow, so its return value
- * should be cached.
- *
- * vdso_sym is threadsafe; the init functions are not.
- *
- * These are the prototypes:
- */
-extern void vdso_init_from_auxv(void *auxv);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void *vdso_sym(const char *version, const char *name);
-
+#include "parse_vdso.h"
/* And here's the code. */
#ifndef ELF_BITS
diff --git a/tools/testing/selftests/vDSO/parse_vdso.h b/tools/testing/selftests/vDSO/parse_vdso.h
new file mode 100644
index 000000000000..de0453067d7c
--- /dev/null
+++ b/tools/testing/selftests/vDSO/parse_vdso.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef PARSE_VDSO_H
+#define PARSE_VDSO_H
+
+#include <stdint.h>
+
+/*
+ * To use this vDSO parser, first call one of the vdso_init_* functions.
+ * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
+ * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
+ * Then call vdso_sym for each symbol you want. For example, to look up
+ * gettimeofday on x86_64, use:
+ *
+ * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
+ * or
+ * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+ *
+ * vdso_sym will return 0 if the symbol doesn't exist or if the init function
+ * failed or was not called. vdso_sym is a little slow, so its return value
+ * should be cached.
+ *
+ * vdso_sym is threadsafe; the init functions are not.
+ *
+ * These are the prototypes:
+ */
+void *vdso_sym(const char *version, const char *name);
+void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+void vdso_init_from_auxv(void *auxv);
+
+#endif
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 5ac4b00acfbc..8a44ff973ee1 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -16,9 +16,7 @@
#include <unistd.h>
#include <stdint.h>
-extern void *vdso_sym(const char *version, const char *name);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void vdso_init_from_auxv(void *auxv);
+#include "parse_vdso.h"
/* We need a libc functions... */
int strcmp(const char *a, const char *b)
diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
new file mode 100644
index 000000000000..fc25ede131b8
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vdso_test_getcpu.c: Sample code to test parse_vdso.c and vDSO getcpu()
+ *
+ * Copyright (c) 2020 Arm Ltd
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+
+#include "../kselftest.h"
+#include "parse_vdso.h"
+
+const char *version = "LINUX_2.6";
+const char *name = "__vdso_getcpu";
+
+struct getcpu_cache;
+typedef long (*getcpu_t)(unsigned int *, unsigned int *,
+ struct getcpu_cache *);
+
+int main(int argc, char **argv)
+{
+ unsigned long sysinfo_ehdr;
+ unsigned int cpu, node;
+ getcpu_t get_cpu;
+ long ret;
+
+ sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+ if (!sysinfo_ehdr) {
+ printf("AT_SYSINFO_EHDR is not present!\n");
+ return KSFT_SKIP;
+ }
+
+ vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+ get_cpu = (getcpu_t)vdso_sym(version, name);
+ if (!get_cpu) {
+ printf("Could not find %s\n", name);
+ return KSFT_SKIP;
+ }
+
+ ret = get_cpu(&cpu, &node, 0);
+ if (ret == 0) {
+ printf("Running on CPU %u node %u\n", cpu, node);
+ } else {
+ printf("%s failed\n", name);
+ return KSFT_FAIL;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_test.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
index 719d5a6bd664..8ccc73ed8240 100644
--- a/tools/testing/selftests/vDSO/vdso_test.c
+++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vdso_test.c: Sample code to test parse_vdso.c
+ * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and
+ * vDSO gettimeofday()
* Copyright (c) 2014 Andy Lutomirski
*
* Compile with:
- * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c
*
* Tested on x86, 32-bit and 64-bit. It may work on other architectures, too.
*/
@@ -16,10 +17,7 @@
#include <sys/time.h>
#include "../kselftest.h"
-
-extern void *vdso_sym(const char *version, const char *name);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void vdso_init_from_auxv(void *auxv);
+#include "parse_vdso.h"
/*
* ARM64's vDSO exports its gettimeofday() implementation with a different
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index ca17fe0c3280..849e8226395a 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
hugepage-mmap
hugepage-shm
+khugepaged
map_hugetlb
map_populate
thuge-gen
@@ -9,6 +10,7 @@ mlock2-tests
mremap_dontunmap
on-fault-limit
transhuge-stress
+protection_keys
userfaultfd
mlock-intersect-test
mlock-random-test
@@ -17,3 +19,4 @@ gup_benchmark
va_128TBswitch
map_fixed_noreplace
write_to_hugetlbfs
+hmm-tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 6998877f707e..30873b19d04b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -3,10 +3,28 @@
uname_M := $(shell uname -m 2>/dev/null || echo not)
MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
+# Without this, failed build products remain, with up-to-date timestamps,
+# thus tricking Make (and you!) into believing that All Is Well, in subsequent
+# make invocations:
+.DELETE_ON_ERROR:
+
+# Avoid accidental wrong builds, due to built-in rules working just a little
+# bit too well--but not quite as well as required for our situation here.
+#
+# In other words, "make userfaultfd" is supposed to fail to build at all,
+# because this Makefile only supports either "make" (all), or "make /full/path".
+# However, the built-in rules, if not suppressed, will pick up CFLAGS and the
+# initial LDLIBS (but not the target-specific LDLIBS, because those are only
+# set for the full path target!). This causes it to get pretty far into building
+# things despite using incorrect values such as an *occasionally* incomplete
+# LDLIBS.
+MAKEFLAGS += --no-builtin-rules
+
CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
LDLIBS = -lrt
TEST_GEN_FILES = compaction_test
TEST_GEN_FILES += gup_benchmark
+TEST_GEN_FILES += hmm-tests
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-shm
TEST_GEN_FILES += map_hugetlb
@@ -19,6 +37,31 @@ TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += khugepaged
+
+ifeq ($(ARCH),x86_64)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
+
+TARGETS := protection_keys
+BINARIES_32 := $(TARGETS:%=%_32)
+BINARIES_64 := $(TARGETS:%=%_64)
+
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
+
+ifeq ($(CAN_BUILD_I386),1)
+TEST_GEN_FILES += $(BINARIES_32)
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+TEST_GEN_FILES += $(BINARIES_64)
+endif
+else
+TEST_GEN_FILES += protection_keys
+endif
ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64))
TEST_GEN_FILES += va_128TBswitch
@@ -33,6 +76,57 @@ TEST_FILES := test_vmalloc.sh
KSFT_KHDR_INSTALL := 1
include ../lib.mk
+$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread
+
+ifeq ($(ARCH),x86_64)
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
+define gen-target-rule-32
+$(1) $(1)_32: $(OUTPUT)/$(1)_32
+.PHONY: $(1) $(1)_32
+endef
+
+define gen-target-rule-64
+$(1) $(1)_64: $(OUTPUT)/$(1)_64
+.PHONY: $(1) $(1)_64
+endef
+
+ifeq ($(CAN_BUILD_I386),1)
+$(BINARIES_32): CFLAGS += -m32
+$(BINARIES_32): LDLIBS += -lrt -ldl -lm
+$(BINARIES_32): %_32: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+$(BINARIES_64): CFLAGS += -m64
+$(BINARIES_64): LDLIBS += -lrt -ldl
+$(BINARIES_64): %_64: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+endif
+
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+ @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
+ echo "environment. This will reduce test coverage of 64-bit" 2>&1; \
+ echo "kernels. If you are using a Debian-like distribution," 2>&1; \
+ echo "try:"; 2>&1; \
+ echo ""; \
+ echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+ echo ""; \
+ echo "If you are using a Fedora-like distribution, try:"; \
+ echo ""; \
+ echo " yum install glibc-devel.*i686"; \
+ exit 0;
+endif
+endif
+
$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index bcec71250873..9b420140ba2b 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -18,7 +18,8 @@
#include "../kselftest.h"
-#define MAP_SIZE 1048576
+#define MAP_SIZE_MB 100
+#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024)
struct map_list {
void *map;
@@ -165,7 +166,7 @@ int main(int argc, char **argv)
void *map = NULL;
unsigned long mem_free = 0;
unsigned long hugepage_size = 0;
- unsigned long mem_fragmentable = 0;
+ long mem_fragmentable_MB = 0;
if (prereq() != 0) {
printf("Either the sysctl compact_unevictable_allowed is not\n"
@@ -190,9 +191,9 @@ int main(int argc, char **argv)
return -1;
}
- mem_fragmentable = mem_free * 0.8 / 1024;
+ mem_fragmentable_MB = mem_free * 0.8 / 1024;
- while (mem_fragmentable > 0) {
+ while (mem_fragmentable_MB > 0) {
map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
if (map == MAP_FAILED)
@@ -213,7 +214,7 @@ int main(int argc, char **argv)
for (i = 0; i < MAP_SIZE; i += page_size)
*(unsigned long *)(map + i) = (unsigned long)map + i;
- mem_fragmentable--;
+ mem_fragmentable_MB -= MAP_SIZE_MB;
}
for (entry = list; entry != NULL; entry = entry->next) {
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 93b90a9b1eeb..69dd0d1aa30b 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -1,3 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_USERFAULTFD=y
CONFIG_TEST_VMALLOC=m
+CONFIG_DEVICE_PRIVATE=y
+CONFIG_TEST_HMM=m
+CONFIG_GUP_BENCHMARK=y
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 43b4dfe161a2..1d4359341e44 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -15,12 +15,12 @@
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
-#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
-#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
+#define GUP_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark)
-#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark)
+#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
+#define PIN_BENCHMARK _IOWR('g', 4, struct gup_benchmark)
+#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_benchmark)
/* Just the flags we need, copied from mm.h: */
#define FOLL_WRITE 0x01 /* check pte is writable */
@@ -52,6 +52,9 @@ int main(int argc, char **argv)
case 'b':
cmd = PIN_BENCHMARK;
break;
+ case 'L':
+ cmd = PIN_LONGTERM_BENCHMARK;
+ break;
case 'm':
size = atoi(optarg) * MB;
break;
@@ -67,9 +70,6 @@ int main(int argc, char **argv)
case 'T':
thp = 0;
break;
- case 'L':
- cmd = GUP_LONGTERM_BENCHMARK;
- break;
case 'U':
cmd = GUP_BENCHMARK;
break;
@@ -105,12 +105,16 @@ int main(int argc, char **argv)
gup.flags |= FOLL_WRITE;
fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
- if (fd == -1)
- perror("open"), exit(1);
+ if (fd == -1) {
+ perror("open");
+ exit(1);
+ }
p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
- if (p == MAP_FAILED)
- perror("mmap"), exit(1);
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
gup.addr = (unsigned long)p;
if (thp == 1)
@@ -123,8 +127,10 @@ int main(int argc, char **argv)
for (i = 0; i < repeats; i++) {
gup.size = size;
- if (ioctl(fd, cmd, &gup))
- perror("ioctl"), exit(1);
+ if (ioctl(fd, cmd, &gup)) {
+ perror("ioctl");
+ exit(1);
+ }
printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
gup.put_delta_usec);
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
new file mode 100644
index 000000000000..c9404ef9698e
--- /dev/null
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -0,0 +1,1480 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HMM stands for Heterogeneous Memory Management, it is a helper layer inside
+ * the linux kernel to help device drivers mirror a process address space in
+ * the device. This allows the device to use the same address space which
+ * makes communication and data exchange a lot easier.
+ *
+ * This framework's sole purpose is to exercise various code paths inside
+ * the kernel to make sure that HMM performs as expected and to flush out any
+ * bugs.
+ */
+
+#include "../kselftest_harness.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <strings.h>
+#include <time.h>
+#include <pthread.h>
+#include <hugetlbfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+/*
+ * This is a private UAPI to the kernel test module so it isn't exported
+ * in the usual include/uapi/... directory.
+ */
+#include "../../../../lib/test_hmm_uapi.h"
+
+struct hmm_buffer {
+ void *ptr;
+ void *mirror;
+ unsigned long size;
+ int fd;
+ uint64_t cpages;
+ uint64_t faults;
+};
+
+#define TWOMEG (1 << 21)
+#define HMM_BUFFER_SIZE (1024 << 12)
+#define HMM_PATH_MAX 64
+#define NTIMES 10
+
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+
+FIXTURE(hmm)
+{
+ int fd;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+
+FIXTURE(hmm2)
+{
+ int fd0;
+ int fd1;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+
+static int hmm_open(int unit)
+{
+ char pathname[HMM_PATH_MAX];
+ int fd;
+
+ snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit);
+ fd = open(pathname, O_RDWR, 0);
+ if (fd < 0)
+ fprintf(stderr, "could not open hmm dmirror driver (%s)\n",
+ pathname);
+ return fd;
+}
+
+FIXTURE_SETUP(hmm)
+{
+ self->page_size = sysconf(_SC_PAGE_SIZE);
+ self->page_shift = ffs(self->page_size) - 1;
+
+ self->fd = hmm_open(0);
+ ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_SETUP(hmm2)
+{
+ self->page_size = sysconf(_SC_PAGE_SIZE);
+ self->page_shift = ffs(self->page_size) - 1;
+
+ self->fd0 = hmm_open(0);
+ ASSERT_GE(self->fd0, 0);
+ self->fd1 = hmm_open(1);
+ ASSERT_GE(self->fd1, 0);
+}
+
+FIXTURE_TEARDOWN(hmm)
+{
+ int ret = close(self->fd);
+
+ ASSERT_EQ(ret, 0);
+ self->fd = -1;
+}
+
+FIXTURE_TEARDOWN(hmm2)
+{
+ int ret = close(self->fd0);
+
+ ASSERT_EQ(ret, 0);
+ self->fd0 = -1;
+
+ ret = close(self->fd1);
+ ASSERT_EQ(ret, 0);
+ self->fd1 = -1;
+}
+
+static int hmm_dmirror_cmd(int fd,
+ unsigned long request,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ struct hmm_dmirror_cmd cmd;
+ int ret;
+
+ /* Simulate a device reading system memory. */
+ cmd.addr = (__u64)buffer->ptr;
+ cmd.ptr = (__u64)buffer->mirror;
+ cmd.npages = npages;
+
+ for (;;) {
+ ret = ioctl(fd, request, &cmd);
+ if (ret == 0)
+ break;
+ if (errno == EINTR)
+ continue;
+ return -errno;
+ }
+ buffer->cpages = cmd.cpages;
+ buffer->faults = cmd.faults;
+
+ return 0;
+}
+
+static void hmm_buffer_free(struct hmm_buffer *buffer)
+{
+ if (buffer == NULL)
+ return;
+
+ if (buffer->ptr)
+ munmap(buffer->ptr, buffer->size);
+ free(buffer->mirror);
+ free(buffer);
+}
+
+/*
+ * Create a temporary file that will be deleted on close.
+ */
+static int hmm_create_file(unsigned long size)
+{
+ char path[HMM_PATH_MAX];
+ int fd;
+
+ strcpy(path, "/tmp");
+ fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600);
+ if (fd >= 0) {
+ int r;
+
+ do {
+ r = ftruncate(fd, size);
+ } while (r == -1 && errno == EINTR);
+ if (!r)
+ return fd;
+ close(fd);
+ }
+ return -1;
+}
+
+/*
+ * Return a random unsigned number.
+ */
+static unsigned int hmm_random(void)
+{
+ static int fd = -1;
+ unsigned int r;
+
+ if (fd < 0) {
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "%s:%d failed to open /dev/urandom\n",
+ __FILE__, __LINE__);
+ return ~0U;
+ }
+ }
+ read(fd, &r, sizeof(r));
+ return r;
+}
+
+static void hmm_nanosleep(unsigned int n)
+{
+ struct timespec t;
+
+ t.tv_sec = 0;
+ t.tv_nsec = n;
+ nanosleep(&t, NULL);
+}
+
+/*
+ * Simple NULL test of device open/close.
+ */
+TEST_F(hmm, open_close)
+{
+}
+
+/*
+ * Read private anonymous memory.
+ */
+TEST_F(hmm, anon_read)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int val;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /*
+ * Initialize buffer in system memory but leave the first two pages
+ * zero (pte_none and pfn_zero).
+ */
+ i = 2 * self->page_size / sizeof(*ptr);
+ for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Set buffer permission to read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Populate the CPU page table with a special zero page. */
+ val = *(int *)(buffer->ptr + self->page_size);
+ ASSERT_EQ(val, 0);
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ ptr = buffer->mirror;
+ for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+ for (; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Read private anonymous memory which has been protected with
+ * mprotect() PROT_NONE.
+ */
+TEST_F(hmm, anon_read_prot)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize mirror buffer so we can verify it isn't written. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ /* Protect buffer from reading. */
+ ret = mprotect(buffer->ptr, size, PROT_NONE);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, -EFAULT);
+
+ /* Allow CPU to read the buffer so we can check it. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory.
+ */
+TEST_F(hmm, anon_write)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory which has been protected with
+ * mprotect() PROT_READ.
+ */
+TEST_F(hmm, anon_write_prot)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device reading a zero page of memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 1);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, -EPERM);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Now allow writing and see that the zero page is replaced. */
+ ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Check that a device writing an anonymous private mapping
+ * will copy-on-write if a child process inherits the mapping.
+ */
+TEST_F(hmm, anon_write_child)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ pid_t pid;
+ int child_fd;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer->ptr so we can tell if it is written. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (pid != 0) {
+ waitpid(pid, &ret, 0);
+ ASSERT_EQ(WIFEXITED(ret), 1);
+
+ /* Check that the parent's buffer did not change. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ return;
+ }
+
+ /* Check that we see the parent's values. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ /* The child process needs its own mirror to its own mm. */
+ child_fd = hmm_open(0);
+ ASSERT_GE(child_fd, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ close(child_fd);
+ exit(0);
+}
+
+/*
+ * Check that a device writing an anonymous shared mapping
+ * will not copy-on-write if a child process inherits the mapping.
+ */
+TEST_F(hmm, anon_write_child_shared)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ pid_t pid;
+ int child_fd;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer->ptr so we can tell if it is written. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (pid != 0) {
+ waitpid(pid, &ret, 0);
+ ASSERT_EQ(WIFEXITED(ret), 1);
+
+ /* Check that the parent's buffer did change. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ return;
+ }
+
+ /* Check that we see the parent's values. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ /* The child process needs its own mirror to its own mm. */
+ child_fd = hmm_open(0);
+ ASSERT_GE(child_fd, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ close(child_fd);
+ exit(0);
+}
+
+/*
+ * Write private anonymous huge page.
+ */
+TEST_F(hmm, anon_write_huge)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = 2 * TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ size = TWOMEG;
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write huge TLBFS page.
+ */
+TEST_F(hmm, anon_write_hugetlbfs)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ long pagesizes[4];
+ int n, idx;
+
+ /* Skip test if we can't allocate a hugetlbfs page. */
+
+ n = gethugepagesizes(pagesizes, 4);
+ if (n <= 0)
+ SKIP(return, "Huge page size could not be determined");
+ for (idx = 0; --n > 0; ) {
+ if (pagesizes[n] < pagesizes[idx])
+ idx = n;
+ }
+ size = ALIGN(TWOMEG, pagesizes[idx]);
+ npages = size >> self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->ptr = get_hugepage_region(size, GHR_STRICT);
+ if (buffer->ptr == NULL) {
+ free(buffer);
+ SKIP(return, "Huge page could not be allocated");
+ }
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ free_hugepage_region(buffer->ptr);
+ buffer->ptr = NULL;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Read mmap'ed file memory.
+ */
+TEST_F(hmm, file_read)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int fd;
+ ssize_t len;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ fd = hmm_create_file(size);
+ ASSERT_GE(fd, 0);
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = fd;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Write initial contents of the file. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+ len = pwrite(fd, buffer->mirror, size, 0);
+ ASSERT_EQ(len, size);
+ memset(buffer->mirror, 0, size);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ,
+ MAP_SHARED,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write mmap'ed file memory.
+ */
+TEST_F(hmm, file_write)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int fd;
+ ssize_t len;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ fd = hmm_create_file(size);
+ ASSERT_GE(fd, 0);
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = fd;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Check that the device also wrote the file. */
+ len = pread(fd, buffer->mirror, size, 0);
+ ASSERT_EQ(len, size);
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory.
+ */
+TEST_F(hmm, migrate)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory and fault some of it back
+ * to system memory, then try migrating the resulting mix of system and device
+ * private memory to the device.
+ */
+TEST_F(hmm, migrate_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault half the pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Migrate memory to the device again. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous shared memory to device private memory.
+ */
+TEST_F(hmm, migrate_shared)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Migrate memory to device. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, -ENOENT);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Try to migrate various memory types to device private memory.
+ */
+TEST_F(hmm2, migrate_mixed)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int *ptr;
+ unsigned char *p;
+ int ret;
+ int val;
+
+ npages = 6;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+ p = buffer->ptr;
+
+ /* Migrating a protected area should be an error. */
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, -EINVAL);
+
+ /* Punch a hole after the first page address. */
+ ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ ASSERT_EQ(ret, 0);
+
+ /* We expect an error if the vma doesn't cover the range. */
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3);
+ ASSERT_EQ(ret, -EINVAL);
+
+ /* Page 2 will be a read-only zero page. */
+ ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 2 * self->page_size);
+ val = *ptr + 3;
+ ASSERT_EQ(val, 3);
+
+ /* Page 3 will be read-only. */
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 3 * self->page_size);
+ *ptr = val;
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Page 4-5 will be read-write. */
+ ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 4 * self->page_size);
+ *ptr = val;
+ ptr = (int *)(buffer->ptr + 5 * self->page_size);
+ *ptr = val;
+
+ /* Now try to migrate pages 2-5 to device 1. */
+ buffer->ptr = p + 2 * self->page_size;
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 4);
+
+ /* Page 5 won't be migrated to device 0 because it's on device 1. */
+ buffer->ptr = p + 5 * self->page_size;
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ASSERT_EQ(ret, -ENOENT);
+ buffer->ptr = p;
+
+ buffer->ptr = p;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory and fault it back to system
+ * memory multiple times.
+ */
+TEST_F(hmm, migrate_multiple)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ unsigned long c;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ for (c = 0; c < NTIMES; c++) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer,
+ npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+ }
+}
+
+/*
+ * Read anonymous memory multiple times.
+ */
+TEST_F(hmm, anon_read_multiple)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ unsigned long c;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ for (c = 0; c < NTIMES; c++) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i + c;
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+ npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i + c);
+
+ hmm_buffer_free(buffer);
+ }
+}
+
+void *unmap_buffer(void *p)
+{
+ struct hmm_buffer *buffer = p;
+
+ /* Delay for a bit and then unmap buffer while it is being read. */
+ hmm_nanosleep(hmm_random() % 32000);
+ munmap(buffer->ptr + buffer->size / 2, buffer->size / 2);
+ buffer->ptr = NULL;
+
+ return NULL;
+}
+
+/*
+ * Try reading anonymous memory while it is being unmapped.
+ */
+TEST_F(hmm, anon_teardown)
+{
+ unsigned long npages;
+ unsigned long size;
+ unsigned long c;
+ void *ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ for (c = 0; c < NTIMES; ++c) {
+ pthread_t thread;
+ struct hmm_buffer *buffer;
+ unsigned long i;
+ int *ptr;
+ int rc;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i + c;
+
+ rc = pthread_create(&thread, NULL, unmap_buffer, buffer);
+ ASSERT_EQ(rc, 0);
+
+ /* Simulate a device reading system memory. */
+ rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+ npages);
+ if (rc == 0) {
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror;
+ i < size / sizeof(*ptr);
+ ++i)
+ ASSERT_EQ(ptr[i], i + c);
+ }
+
+ pthread_join(thread, &ret);
+ hmm_buffer_free(buffer);
+ }
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm2, snapshot)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int *ptr;
+ unsigned char *p;
+ unsigned char *m;
+ int ret;
+ int val;
+
+ npages = 7;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+ p = buffer->ptr;
+
+ /* Punch a hole after the first page address. */
+ ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ ASSERT_EQ(ret, 0);
+
+ /* Page 2 will be read-only zero page. */
+ ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 2 * self->page_size);
+ val = *ptr + 3;
+ ASSERT_EQ(val, 3);
+
+ /* Page 3 will be read-only. */
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 3 * self->page_size);
+ *ptr = val;
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Page 4-6 will be read-write. */
+ ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 4 * self->page_size);
+ *ptr = val;
+
+ /* Page 5 will be migrated to device 0. */
+ buffer->ptr = p + 5 * self->page_size;
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 1);
+
+ /* Page 6 will be migrated to device 1. */
+ buffer->ptr = p + 6 * self->page_size;
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 1);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ buffer->ptr = p;
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR);
+ ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR);
+ ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
+ ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
+ ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
+ * should be mapped by a large page table entry.
+ */
+TEST_F(hmm, compound)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int *ptr;
+ unsigned char *m;
+ int ret;
+ long pagesizes[4];
+ int n, idx;
+ unsigned long i;
+
+ /* Skip test if we can't allocate a hugetlbfs page. */
+
+ n = gethugepagesizes(pagesizes, 4);
+ if (n <= 0)
+ return;
+ for (idx = 0; --n > 0; ) {
+ if (pagesizes[n] < pagesizes[idx])
+ idx = n;
+ }
+ size = ALIGN(TWOMEG, pagesizes[idx]);
+ npages = size >> self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->ptr = get_hugepage_region(size, GHR_STRICT);
+ if (buffer->ptr == NULL) {
+ free(buffer);
+ return;
+ }
+
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Initialize the pages the device will snapshot in buffer->ptr. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ for (i = 0; i < npages; ++i)
+ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE |
+ HMM_DMIRROR_PROT_PMD);
+
+ /* Make the region read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ for (i = 0; i < npages; ++i)
+ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ |
+ HMM_DMIRROR_PROT_PMD);
+
+ free_hugepage_region(buffer->ptr);
+ buffer->ptr = NULL;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test two devices reading the same memory (double mapped).
+ */
+TEST_F(hmm2, double_map)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = 6;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Make region read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate device 0 reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Simulate device 1 reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Punch a hole after the first page address. */
+ ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ ASSERT_EQ(ret, 0);
+
+ hmm_buffer_free(buffer);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c
new file mode 100644
index 000000000000..8b75821302a7
--- /dev/null
+++ b/tools/testing/selftests/vm/khugepaged.c
@@ -0,0 +1,1035 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#ifndef MADV_PAGEOUT
+#define MADV_PAGEOUT 21
+#endif
+
+#define BASE_ADDR ((void *)(1UL << 30))
+static unsigned long hpage_pmd_size;
+static unsigned long page_size;
+static int hpage_pmd_nr;
+
+#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
+#define PID_SMAPS "/proc/self/smaps"
+
+enum thp_enabled {
+ THP_ALWAYS,
+ THP_MADVISE,
+ THP_NEVER,
+};
+
+static const char *thp_enabled_strings[] = {
+ "always",
+ "madvise",
+ "never",
+ NULL
+};
+
+enum thp_defrag {
+ THP_DEFRAG_ALWAYS,
+ THP_DEFRAG_DEFER,
+ THP_DEFRAG_DEFER_MADVISE,
+ THP_DEFRAG_MADVISE,
+ THP_DEFRAG_NEVER,
+};
+
+static const char *thp_defrag_strings[] = {
+ "always",
+ "defer",
+ "defer+madvise",
+ "madvise",
+ "never",
+ NULL
+};
+
+enum shmem_enabled {
+ SHMEM_ALWAYS,
+ SHMEM_WITHIN_SIZE,
+ SHMEM_ADVISE,
+ SHMEM_NEVER,
+ SHMEM_DENY,
+ SHMEM_FORCE,
+};
+
+static const char *shmem_enabled_strings[] = {
+ "always",
+ "within_size",
+ "advise",
+ "never",
+ "deny",
+ "force",
+ NULL
+};
+
+struct khugepaged_settings {
+ bool defrag;
+ unsigned int alloc_sleep_millisecs;
+ unsigned int scan_sleep_millisecs;
+ unsigned int max_ptes_none;
+ unsigned int max_ptes_swap;
+ unsigned int max_ptes_shared;
+ unsigned long pages_to_scan;
+};
+
+struct settings {
+ enum thp_enabled thp_enabled;
+ enum thp_defrag thp_defrag;
+ enum shmem_enabled shmem_enabled;
+ bool debug_cow;
+ bool use_zero_page;
+ struct khugepaged_settings khugepaged;
+};
+
+static struct settings default_settings = {
+ .thp_enabled = THP_MADVISE,
+ .thp_defrag = THP_DEFRAG_ALWAYS,
+ .shmem_enabled = SHMEM_NEVER,
+ .debug_cow = 0,
+ .use_zero_page = 0,
+ .khugepaged = {
+ .defrag = 1,
+ .alloc_sleep_millisecs = 10,
+ .scan_sleep_millisecs = 10,
+ },
+};
+
+static struct settings saved_settings;
+static bool skip_settings_restore;
+
+static int exit_status;
+
+static void success(const char *msg)
+{
+ printf(" \e[32m%s\e[0m\n", msg);
+}
+
+static void fail(const char *msg)
+{
+ printf(" \e[31m%s\e[0m\n", msg);
+ exit_status++;
+}
+
+static int read_file(const char *path, char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numread;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+static int write_file(const char *path, const char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numwritten;
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ return 0;
+
+ numwritten = write(fd, buf, buflen - 1);
+ close(fd);
+ if (numwritten < 1)
+ return 0;
+
+ return (unsigned int) numwritten;
+}
+
+static int read_string(const char *name, const char *strings[])
+{
+ char path[PATH_MAX];
+ char buf[256];
+ char *c;
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!read_file(path, buf, sizeof(buf))) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+
+ c = strchr(buf, '[');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ c++;
+ memmove(buf, c, sizeof(buf) - (c - buf));
+
+ c = strchr(buf, ']');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ *c = '\0';
+
+ ret = 0;
+ while (strings[ret]) {
+ if (!strcmp(strings[ret], buf))
+ return ret;
+ ret++;
+ }
+
+ printf("Failed to parse %s\n", name);
+ exit(EXIT_FAILURE);
+}
+
+static void write_string(const char *name, const char *val)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!write_file(path, val, strlen(val) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static const unsigned long read_num(const char *name)
+{
+ char path[PATH_MAX];
+ char buf[21];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = read_file(path, buf, sizeof(buf));
+ if (ret < 0) {
+ perror("read_file(read_num)");
+ exit(EXIT_FAILURE);
+ }
+
+ return strtoul(buf, NULL, 10);
+}
+
+static void write_num(const char *name, unsigned long num)
+{
+ char path[PATH_MAX];
+ char buf[21];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(buf, "%ld", num);
+ if (!write_file(path, buf, strlen(buf) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void write_settings(struct settings *settings)
+{
+ struct khugepaged_settings *khugepaged = &settings->khugepaged;
+
+ write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
+ write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
+ write_string("shmem_enabled",
+ shmem_enabled_strings[settings->shmem_enabled]);
+ write_num("debug_cow", settings->debug_cow);
+ write_num("use_zero_page", settings->use_zero_page);
+
+ write_num("khugepaged/defrag", khugepaged->defrag);
+ write_num("khugepaged/alloc_sleep_millisecs",
+ khugepaged->alloc_sleep_millisecs);
+ write_num("khugepaged/scan_sleep_millisecs",
+ khugepaged->scan_sleep_millisecs);
+ write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
+ write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
+ write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
+ write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
+}
+
+static void restore_settings(int sig)
+{
+ if (skip_settings_restore)
+ goto out;
+
+ printf("Restore THP and khugepaged settings...");
+ write_settings(&saved_settings);
+ success("OK");
+ if (sig)
+ exit(EXIT_FAILURE);
+out:
+ exit(exit_status);
+}
+
+static void save_settings(void)
+{
+ printf("Save THP and khugepaged settings...");
+ saved_settings = (struct settings) {
+ .thp_enabled = read_string("enabled", thp_enabled_strings),
+ .thp_defrag = read_string("defrag", thp_defrag_strings),
+ .shmem_enabled =
+ read_string("shmem_enabled", shmem_enabled_strings),
+ .debug_cow = read_num("debug_cow"),
+ .use_zero_page = read_num("use_zero_page"),
+ };
+ saved_settings.khugepaged = (struct khugepaged_settings) {
+ .defrag = read_num("khugepaged/defrag"),
+ .alloc_sleep_millisecs =
+ read_num("khugepaged/alloc_sleep_millisecs"),
+ .scan_sleep_millisecs =
+ read_num("khugepaged/scan_sleep_millisecs"),
+ .max_ptes_none = read_num("khugepaged/max_ptes_none"),
+ .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
+ .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
+ .pages_to_scan = read_num("khugepaged/pages_to_scan"),
+ };
+ success("OK");
+
+ signal(SIGTERM, restore_settings);
+ signal(SIGINT, restore_settings);
+ signal(SIGHUP, restore_settings);
+ signal(SIGQUIT, restore_settings);
+}
+
+static void adjust_settings(void)
+{
+
+ printf("Adjust settings...");
+ write_settings(&default_settings);
+ success("OK");
+}
+
+#define MAX_LINE_LENGTH 500
+
+static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
+{
+ while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
+ if (!strncmp(buf, pattern, strlen(pattern)))
+ return true;
+ }
+ return false;
+}
+
+static bool check_huge(void *addr)
+{
+ bool thp = false;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+
+ fp = fopen(PID_SMAPS, "r");
+ if (!fp) {
+ printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+ exit(EXIT_FAILURE);
+ }
+ if (!check_for_pattern(fp, addr_pattern, buffer))
+ goto err_out;
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
+ hpage_pmd_size >> 10);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Fetch the AnonHugePages: in the same block and check whether it got
+ * the expected number of hugeepages next.
+ */
+ if (!check_for_pattern(fp, "AnonHugePages:", buffer))
+ goto err_out;
+
+ if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
+ goto err_out;
+
+ thp = true;
+err_out:
+ fclose(fp);
+ return thp;
+}
+
+
+static bool check_swap(void *addr, unsigned long size)
+{
+ bool swap = false;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+
+ fp = fopen(PID_SMAPS, "r");
+ if (!fp) {
+ printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+ exit(EXIT_FAILURE);
+ }
+ if (!check_for_pattern(fp, addr_pattern, buffer))
+ goto err_out;
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
+ size >> 10);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Fetch the Swap: in the same block and check whether it got
+ * the expected number of hugeepages next.
+ */
+ if (!check_for_pattern(fp, "Swap:", buffer))
+ goto err_out;
+
+ if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
+ goto err_out;
+
+ swap = true;
+err_out:
+ fclose(fp);
+ return swap;
+}
+
+static void *alloc_mapping(void)
+{
+ void *p;
+
+ p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p != BASE_ADDR) {
+ printf("Failed to allocate VMA at %p\n", BASE_ADDR);
+ exit(EXIT_FAILURE);
+ }
+
+ return p;
+}
+
+static void fill_memory(int *p, unsigned long start, unsigned long end)
+{
+ int i;
+
+ for (i = start / page_size; i < end / page_size; i++)
+ p[i * page_size / sizeof(*p)] = i + 0xdead0000;
+}
+
+static void validate_memory(int *p, unsigned long start, unsigned long end)
+{
+ int i;
+
+ for (i = start / page_size; i < end / page_size; i++) {
+ if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
+ printf("Page %d is corrupted: %#x\n",
+ i, p[i * page_size / sizeof(*p)]);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+#define TICK 500000
+static bool wait_for_scan(const char *msg, char *p)
+{
+ int full_scans;
+ int timeout = 6; /* 3 seconds */
+
+ /* Sanity check */
+ if (check_huge(p)) {
+ printf("Unexpected huge page\n");
+ exit(EXIT_FAILURE);
+ }
+
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+
+ /* Wait until the second full_scan completed */
+ full_scans = read_num("khugepaged/full_scans") + 2;
+
+ printf("%s...", msg);
+ while (timeout--) {
+ if (check_huge(p))
+ break;
+ if (read_num("khugepaged/full_scans") >= full_scans)
+ break;
+ printf(".");
+ usleep(TICK);
+ }
+
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+ return timeout == -1;
+}
+
+static void alloc_at_fault(void)
+{
+ struct settings settings = default_settings;
+ char *p;
+
+ settings.thp_enabled = THP_ALWAYS;
+ write_settings(&settings);
+
+ p = alloc_mapping();
+ *p = 1;
+ printf("Allocate huge page on fault...");
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ write_settings(&default_settings);
+
+ madvise(p, page_size, MADV_DONTNEED);
+ printf("Split huge PMD on MADV_DONTNEED...");
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_full(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+ fill_memory(p, 0, hpage_pmd_size);
+ if (wait_for_scan("Collapse fully populated PTE table", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_empty(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+ if (wait_for_scan("Do not collapse empty PTE table", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ fail("Fail");
+ else
+ success("OK");
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+ fill_memory(p, 0, page_size);
+ if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, page_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_none(void)
+{
+ int max_ptes_none = hpage_pmd_nr / 2;
+ struct settings settings = default_settings;
+ void *p;
+
+ settings.khugepaged.max_ptes_none = max_ptes_none;
+ write_settings(&settings);
+
+ p = alloc_mapping();
+
+ fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+ if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ fail("Fail");
+ else
+ success("OK");
+ validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+
+ fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+ if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+
+ munmap(p, hpage_pmd_size);
+ write_settings(&default_settings);
+}
+
+static void collapse_swapin_single_pte(void)
+{
+ void *p;
+ p = alloc_mapping();
+ fill_memory(p, 0, hpage_pmd_size);
+
+ printf("Swapout one page...");
+ if (madvise(p, page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ if (wait_for_scan("Collapse with swapping in single PTE entry", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+out:
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_swap(void)
+{
+ int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
+ void *p;
+
+ p = alloc_mapping();
+
+ fill_memory(p, 0, hpage_pmd_size);
+ printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
+ if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ fail("Fail");
+ else
+ success("OK");
+ validate_memory(p, 0, hpage_pmd_size);
+
+ fill_memory(p, 0, hpage_pmd_size);
+ printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
+ if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, max_ptes_swap * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+out:
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry_compound(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate huge page...");
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+ printf("Split huge page leaving single PTE mapping compound page...");
+ madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, page_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_full_of_compound(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate huge page...");
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Split huge page leaving single PTE page table full of compound pages...");
+ madvise(p, page_size, MADV_NOHUGEPAGE);
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ if (wait_for_scan("Collapse PTE table full of compound pages", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_compound_extreme(void)
+{
+ void *p;
+ int i;
+
+ p = alloc_mapping();
+ for (i = 0; i < hpage_pmd_nr; i++) {
+ printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
+ i + 1, hpage_pmd_nr);
+
+ madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(BASE_ADDR, 0, hpage_pmd_size);
+ if (!check_huge(BASE_ADDR)) {
+ printf("Failed to allocate huge page\n");
+ exit(EXIT_FAILURE);
+ }
+ madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+ p = mremap(BASE_ADDR - i * page_size,
+ i * page_size + hpage_pmd_size,
+ (i + 1) * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED,
+ BASE_ADDR + 2 * hpage_pmd_size);
+ if (p == MAP_FAILED) {
+ perror("mremap+unmap");
+ exit(EXIT_FAILURE);
+ }
+
+ p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
+ (i + 1) * page_size,
+ (i + 1) * page_size + hpage_pmd_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED,
+ BASE_ADDR - (i + 1) * page_size);
+ if (p == MAP_FAILED) {
+ perror("mremap+alloc");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ munmap(BASE_ADDR, hpage_pmd_size);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ if (wait_for_scan("Collapse PTE table full of different compound pages", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_fork(void)
+{
+ int wstatus;
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate small page...");
+ fill_memory(p, 0, page_size);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Share small page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ fill_memory(p, page_size, 2 * page_size);
+
+ if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ validate_memory(p, 0, page_size);
+ munmap(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has small page...");
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, page_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_fork_compound(void)
+{
+ int wstatus;
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate huge page...");
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Share huge page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Split huge page PMD in child process...");
+ madvise(p, page_size, MADV_NOHUGEPAGE);
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ fill_memory(p, 0, page_size);
+
+ write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
+ if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ write_num("khugepaged/max_ptes_shared",
+ default_settings.khugepaged.max_ptes_shared);
+
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has huge page...");
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_shared()
+{
+ int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
+ int wstatus;
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate huge page...");
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Share huge page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
+ fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
+ fail("Timeout");
+ else if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
+ fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+
+ if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has huge page...");
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+int main(void)
+{
+ setbuf(stdout, NULL);
+
+ page_size = getpagesize();
+ hpage_pmd_size = read_num("hpage_pmd_size");
+ hpage_pmd_nr = hpage_pmd_size / page_size;
+
+ default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
+ default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
+ default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
+ default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
+
+ save_settings();
+ adjust_settings();
+
+ alloc_at_fault();
+ collapse_full();
+ collapse_empty();
+ collapse_single_pte_entry();
+ collapse_max_ptes_none();
+ collapse_swapin_single_pte();
+ collapse_max_ptes_swap();
+ collapse_single_pte_entry_compound();
+ collapse_full_of_compound();
+ collapse_compound_extreme();
+ collapse_fork();
+ collapse_fork_compound();
+ collapse_max_ptes_shared();
+
+ restore_settings(0);
+}
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
index 6af951900aa3..312889edb84a 100644
--- a/tools/testing/selftests/vm/map_hugetlb.c
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -83,7 +83,7 @@ int main(int argc, char **argv)
}
if (shift)
- printf("%u kB hugepages\n", 1 << shift);
+ printf("%u kB hugepages\n", 1 << (shift - 10));
else
printf("Default size hugepages\n");
printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/vm/mremap_dontunmap.c
index ee06cb0b9efb..3a7b5ef0b0c6 100644
--- a/tools/testing/selftests/vm/mremap_dontunmap.c
+++ b/tools/testing/selftests/vm/mremap_dontunmap.c
@@ -11,7 +11,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <stdlib.h>
#include <unistd.h>
#include "../kselftest.h"
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
new file mode 100644
index 000000000000..622a85848f61
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PKEYS_HELPER_H
+#define _PKEYS_HELPER_H
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+/* Define some kernel-like types */
+#define u8 __u8
+#define u16 __u16
+#define u32 __u32
+#define u64 __u64
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
+extern int dprint_in_signal;
+extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+extern int test_nr;
+extern int iteration_nr;
+
+#ifdef __GNUC__
+__attribute__((format(printf, 1, 2)))
+#endif
+static inline void sigsafe_printf(const char *format, ...)
+{
+ va_list ap;
+
+ if (!dprint_in_signal) {
+ va_start(ap, format);
+ vprintf(format, ap);
+ va_end(ap);
+ } else {
+ int ret;
+ /*
+ * No printf() functions are signal-safe.
+ * They deadlock easily. Write the format
+ * string to get some output, even if
+ * incomplete.
+ */
+ ret = write(1, format, strlen(format));
+ if (ret < 0)
+ exit(1);
+ }
+}
+#define dprintf_level(level, args...) do { \
+ if (level <= DEBUG_LEVEL) \
+ sigsafe_printf(args); \
+} while (0)
+#define dprintf0(args...) dprintf_level(0, args)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+
+extern void abort_hooks(void);
+#define pkey_assert(condition) do { \
+ if (!(condition)) { \
+ dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("errno at assert: %d", errno); \
+ abort_hooks(); \
+ exit(__LINE__); \
+ } \
+} while (0)
+
+__attribute__((noinline)) int read_ptr(int *ptr);
+void expected_pkey_fault(int pkey);
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val);
+int sys_pkey_free(unsigned long pkey);
+int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey);
+void record_pkey_malloc(void *ptr, long size, int prot);
+
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+#include "pkey-x86.h"
+#elif defined(__powerpc64__) /* arch */
+#include "pkey-powerpc.h"
+#else /* arch */
+#error Architecture not supported
+#endif /* arch */
+
+#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+
+static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+ u32 shift = pkey_bit_position(pkey);
+ /* mask out bits from pkey in old value */
+ reg &= ~((u64)PKEY_MASK << shift);
+ /* OR in new bits for pkey */
+ reg |= (flags & PKEY_MASK) << shift;
+ return reg;
+}
+
+static inline u64 get_pkey_bits(u64 reg, int pkey)
+{
+ u32 shift = pkey_bit_position(pkey);
+ /*
+ * shift down the relevant bits to the lowest two, then
+ * mask off all the other higher bits
+ */
+ return ((reg >> shift) & PKEY_MASK);
+}
+
+extern u64 shadow_pkey_reg;
+
+static inline u64 _read_pkey_reg(int line)
+{
+ u64 pkey_reg = __read_pkey_reg();
+
+ dprintf4("read_pkey_reg(line=%d) pkey_reg: %016llx"
+ " shadow: %016llx\n",
+ line, pkey_reg, shadow_pkey_reg);
+ assert(pkey_reg == shadow_pkey_reg);
+
+ return pkey_reg;
+}
+
+#define read_pkey_reg() _read_pkey_reg(__LINE__)
+
+static inline void write_pkey_reg(u64 pkey_reg)
+{
+ dprintf4("%s() changing %016llx to %016llx\n", __func__,
+ __read_pkey_reg(), pkey_reg);
+ /* will do the shadow check for us: */
+ read_pkey_reg();
+ __write_pkey_reg(pkey_reg);
+ shadow_pkey_reg = pkey_reg;
+ dprintf4("%s(%016llx) pkey_reg: %016llx\n", __func__,
+ pkey_reg, __read_pkey_reg());
+}
+
+/*
+ * These are technically racy. since something could
+ * change PKEY register between the read and the write.
+ */
+static inline void __pkey_access_allow(int pkey, int do_allow)
+{
+ u64 pkey_reg = read_pkey_reg();
+ int bit = pkey * 2;
+
+ if (do_allow)
+ pkey_reg &= (1<<bit);
+ else
+ pkey_reg |= (1<<bit);
+
+ dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
+ write_pkey_reg(pkey_reg);
+}
+
+static inline void __pkey_write_allow(int pkey, int do_allow_write)
+{
+ u64 pkey_reg = read_pkey_reg();
+ int bit = pkey * 2 + 1;
+
+ if (do_allow_write)
+ pkey_reg &= (1<<bit);
+ else
+ pkey_reg |= (1<<bit);
+
+ write_pkey_reg(pkey_reg);
+ dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
+}
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) \
+ ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to) \
+ ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si)
+{
+#ifdef si_pkey
+ return &si->si_pkey;
+#else
+ return (u32 *)(((u8 *)si) + si_pkey_offset);
+#endif
+}
+
+static inline int kernel_has_pkeys(void)
+{
+ /* try allocating a key and see if it succeeds */
+ int ret = sys_pkey_alloc(0, 0);
+ if (ret <= 0) {
+ return 0;
+ }
+ sys_pkey_free(ret);
+ return 1;
+}
+
+static inline int is_pkeys_supported(void)
+{
+ /* check if the cpu supports pkeys */
+ if (!cpu_has_pkeys()) {
+ dprintf1("SKIP: %s: no CPU support\n", __func__);
+ return 0;
+ }
+
+ /* check if the kernel supports pkeys */
+ if (!kernel_has_pkeys()) {
+ dprintf1("SKIP: %s: no kernel support\n", __func__);
+ return 0;
+ }
+
+ return 1;
+}
+
+#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/vm/pkey-powerpc.h
new file mode 100644
index 000000000000..1ebb586b2fbc
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-powerpc.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PKEYS_POWERPC_H
+#define _PKEYS_POWERPC_H
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 386
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 384
+# define SYS_pkey_free 385
+#endif
+#define REG_IP_IDX PT_NIP
+#define REG_TRAPNO PT_TRAP
+#define gregs gp_regs
+#define fpregs fp_regs
+#define si_pkey_offset 0x20
+
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS 0x3 /* disable read and write */
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE 0x2
+
+#define NR_PKEYS 32
+#define NR_RESERVED_PKEYS_4K 27 /* pkey-0, pkey-1, exec-only-pkey
+ and 24 other keys that cannot be
+ represented in the PTE */
+#define NR_RESERVED_PKEYS_64K_3KEYS 3 /* PowerNV and KVM: pkey-0,
+ pkey-1 and exec-only key */
+#define NR_RESERVED_PKEYS_64K_4KEYS 4 /* PowerVM: pkey-0, pkey-1,
+ pkey-31 and exec-only key */
+#define PKEY_BITS_PER_PKEY 2
+#define HPAGE_SIZE (1UL << 24)
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+
+static inline u32 pkey_bit_position(int pkey)
+{
+ return (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+}
+
+static inline u64 __read_pkey_reg(void)
+{
+ u64 pkey_reg;
+
+ asm volatile("mfspr %0, 0xd" : "=r" (pkey_reg));
+
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+ u64 amr = pkey_reg;
+
+ dprintf4("%s() changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+
+ asm volatile("isync; mtspr 0xd, %0; isync"
+ : : "r" ((unsigned long)(amr)) : "memory");
+
+ dprintf4("%s() pkey register after changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+}
+
+static inline int cpu_has_pkeys(void)
+{
+ /* No simple way to determine this */
+ return 1;
+}
+
+static inline bool arch_is_powervm()
+{
+ struct stat buf;
+
+ if ((stat("/sys/firmware/devicetree/base/ibm,partition-name", &buf) == 0) &&
+ (stat("/sys/firmware/devicetree/base/hmc-managed?", &buf) == 0) &&
+ (stat("/sys/firmware/devicetree/base/chosen/qemu,graphic-width", &buf) == -1) )
+ return true;
+
+ return false;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+ if (sysconf(_SC_PAGESIZE) == 4096)
+ return NR_RESERVED_PKEYS_4K;
+ else
+ if (arch_is_powervm())
+ return NR_RESERVED_PKEYS_64K_4KEYS;
+ else
+ return NR_RESERVED_PKEYS_64K_3KEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+ /*
+ * powerpc does not allow userspace to change permissions of exec-only
+ * keys since those keys are not allocated by userspace. The signal
+ * handler wont be able to reset the permissions, which means the code
+ * will infinitely continue to segfault here.
+ */
+ return;
+}
+
+/* 4-byte instructions * 16384 = 64K page */
+#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int ret;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+
+ ret = syscall(__NR_subpage_prot, ptr, size, NULL);
+ if (ret) {
+ perror("subpage_perm");
+ return PTR_ERR_ENOTSUP;
+ }
+
+ ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
+ pkey_assert(!ret);
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
+ return ptr;
+}
+
+#endif /* _PKEYS_POWERPC_H */
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
new file mode 100644
index 000000000000..3be20f5d5275
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PKEYS_X86_H
+#define _PKEYS_X86_H
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
+#endif
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
+#endif
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
+
+#endif
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+#define NR_PKEYS 16
+#define NR_RESERVED_PKEYS 2 /* pkey-0 and exec-only-pkey */
+#define PKEY_BITS_PER_PKEY 2
+#define HPAGE_SIZE (1UL<<21)
+#define PAGE_SIZE 4096
+#define MB (1<<20)
+
+static inline void __page_o_noops(void)
+{
+ /* 8-bytes of instruction * 512 bytes = 1 page */
+ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+static inline u64 __read_pkey_reg(void)
+{
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ unsigned pkey_reg;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkey_reg = eax;
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+ unsigned int eax = pkey_reg;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ dprintf4("%s() changing %016llx to %016llx\n", __func__,
+ __read_pkey_reg(), pkey_reg);
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+ assert(pkey_reg == __read_pkey_reg());
+}
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pkeys(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ eax = 0x7;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & X86_FEATURE_PKU)) {
+ dprintf2("cpu does not have PKU\n");
+ return 0;
+ }
+ if (!(ecx & X86_FEATURE_OSPKE)) {
+ dprintf2("cpu does not have OSPKE\n");
+ return 0;
+ }
+ return 1;
+}
+
+static inline u32 pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
+#define XSTATE_PKEY_BIT (9)
+#define XSTATE_PKEY 0x200
+
+int pkey_reg_xstate_offset(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int xstate_offset;
+ int xstate_size;
+ unsigned long XSTATE_CPUID = 0xd;
+ int leaf;
+
+ /* assume that XSTATE_PKEY is set in XCR0 */
+ leaf = XSTATE_PKEY_BIT;
+ {
+ eax = XSTATE_CPUID;
+ ecx = leaf;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (leaf == XSTATE_PKEY_BIT) {
+ xstate_offset = ebx;
+ xstate_size = eax;
+ }
+ }
+
+ if (xstate_size == 0) {
+ printf("could not find size/offset of PKEY in xsave state\n");
+ return 0;
+ }
+
+ return xstate_offset;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+ return NR_RESERVED_PKEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+ int ptr_contents;
+
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pkey_fault(pkey);
+}
+
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+ return PTR_ERR_ENOTSUP;
+}
+
+#endif /* _PKEYS_X86_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 480995bceefa..fdbb602ecf32 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Tests x86 Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
+ * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
*
* There are examples in here of:
* * how to set protection keys on memory
- * * how to set/clear bits in PKRU (the rights register)
- * * how to handle SEGV_PKRU signals and extract pkey-relevant
+ * * how to set/clear bits in pkey registers (the rights register)
+ * * how to handle SEGV_PKUERR signals and extract pkey-relevant
* information from the siginfo
*
* Things to add:
@@ -22,8 +22,10 @@
* gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
*/
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
#include <errno.h>
#include <linux/futex.h>
+#include <time.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <string.h>
@@ -48,34 +50,10 @@
int iteration_nr = 1;
int test_nr;
-unsigned int shadow_pkru;
-
-#define HPAGE_SIZE (1UL<<21)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
-#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
-#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
-#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
-#define __stringify_1(x...) #x
-#define __stringify(x...) __stringify_1(x)
-
-#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-
+u64 shadow_pkey_reg;
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-extern void abort_hooks(void);
-#define pkey_assert(condition) do { \
- if (!(condition)) { \
- dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
- __FILE__, __LINE__, \
- test_nr, iteration_nr); \
- dprintf0("errno at assert: %d", errno); \
- abort_hooks(); \
- exit(__LINE__); \
- } \
-} while (0)
-
void cat_into_file(char *str, char *file)
{
int fd = open(file, O_RDWR);
@@ -158,12 +136,6 @@ void abort_hooks(void)
#endif
}
-static inline void __page_o_noops(void)
-{
- /* 8-bytes of instruction * 512 bytes = 1 page */
- asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
-}
-
/*
* This attempts to have roughly a page of instructions followed by a few
* instructions that do a write, and another page of instructions. That
@@ -174,7 +146,12 @@ static inline void __page_o_noops(void)
* will then fault, which makes sure that the fault code handles
* execute-only memory properly.
*/
+#ifdef __powerpc64__
+/* This way, both 4K and 64K alignment are maintained */
+__attribute__((__aligned__(65536)))
+#else
__attribute__((__aligned__(PAGE_SIZE)))
+#endif
void lots_o_noops_around_write(int *write_to_me)
{
dprintf3("running %s()\n", __func__);
@@ -186,51 +163,134 @@ void lots_o_noops_around_write(int *write_to_me)
dprintf3("%s() done\n", __func__);
}
-/* Define some kernel-like types */
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
+void dump_mem(void *dumpme, int len_bytes)
+{
+ char *c = (void *)dumpme;
+ int i;
-#ifdef __i386__
+ for (i = 0; i < len_bytes; i += sizeof(u64)) {
+ u64 *ptr = (u64 *)(c + i);
+ dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr);
+ }
+}
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
+static u32 hw_pkey_get(int pkey, unsigned long flags)
+{
+ u64 pkey_reg = __read_pkey_reg();
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
+ dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
+ __func__, pkey, flags, 0, 0);
+ dprintf2("%s() raw pkey_reg: %016llx\n", __func__, pkey_reg);
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
+ return (u32) get_pkey_bits(pkey_reg, pkey);
+}
-#else
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u64 old_pkey_reg = __read_pkey_reg();
+ u64 new_pkey_reg;
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
+ /* make sure that 'rights' only contains the bits we expect: */
+ assert(!(rights & ~mask));
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
+ /* modify bits accordingly in old pkey_reg and assign it */
+ new_pkey_reg = set_pkey_bits(old_pkey_reg, pkey, rights);
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
+ __write_pkey_reg(new_pkey_reg);
-#endif
+ dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x"
+ " pkey_reg now: %016llx old_pkey_reg: %016llx\n",
+ __func__, pkey, rights, flags, 0, __read_pkey_reg(),
+ old_pkey_reg);
+ return 0;
+}
-void dump_mem(void *dumpme, int len_bytes)
+void pkey_disable_set(int pkey, int flags)
{
- char *c = (void *)dumpme;
- int i;
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights;
+ u64 orig_pkey_reg = read_pkey_reg();
- for (i = 0; i < len_bytes; i += sizeof(u64)) {
- u64 *ptr = (u64 *)(c + i);
- dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
- }
+ dprintf1("START->%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
+ assert(!ret);
+ /* pkey_reg and flags have the same format */
+ shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
+ dprintf1("%s(%d) shadow: 0x%016llx\n",
+ __func__, pkey, shadow_pkey_reg);
+
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
+ __func__, pkey, read_pkey_reg());
+ if (flags)
+ pkey_assert(read_pkey_reg() >= orig_pkey_reg);
+ dprintf1("END<---%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+}
+
+void pkey_disable_clear(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ u64 orig_pkey_reg = read_pkey_reg();
+
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights &= ~flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, 0);
+ shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
+ pkey, read_pkey_reg());
+ if (flags)
+ assert(read_pkey_reg() <= orig_pkey_reg);
+}
+
+void pkey_write_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_write_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_access_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
+}
+void pkey_access_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
}
/* Failed address bound checks: */
@@ -255,7 +315,7 @@ static char *si_code_str(int si_code)
return "UNKNOWN";
}
-int pkru_faults;
+int pkey_faults;
int last_si_pkey = -1;
void signal_handler(int signum, siginfo_t *si, void *vucontext)
{
@@ -263,24 +323,28 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
int trapno;
unsigned long ip;
char *fpregs;
- u32 *pkru_ptr;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ u32 *pkey_reg_ptr;
+ int pkey_reg_offset;
+#endif /* arch */
u64 siginfo_pkey;
u32 *si_pkey_ptr;
- int pkru_offset;
- fpregset_t fpregset;
dprint_in_signal = 1;
dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
- __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
+ __func__, __LINE__,
+ __read_pkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
- fpregset = uctxt->uc_mcontext.fpregs;
- fpregs = (void *)fpregset;
+ fpregs = (char *) uctxt->uc_mcontext.fpregs;
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
- trapno, ip, si_code_str(si->si_code), si->si_code);
+ dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
+ __func__, trapno, ip, si_code_str(si->si_code),
+ si->si_code);
+
+#if defined(__i386__) || defined(__x86_64__) /* arch */
#ifdef __i386__
/*
* 32-bit has some extra padding so that userspace can tell whether
@@ -288,20 +352,22 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
* state. We just assume that it is here.
*/
fpregs += 0x70;
-#endif
- pkru_offset = pkru_xstate_offset();
- pkru_ptr = (void *)(&fpregs[pkru_offset]);
+#endif /* i386 */
+ pkey_reg_offset = pkey_reg_xstate_offset();
+ pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
- dprintf1("siginfo: %p\n", si);
- dprintf1(" fpregs: %p\n", fpregs);
/*
- * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+ * If we got a PKEY fault, we *HAVE* to have at least one bit set in
* here.
*/
- dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+ dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset());
if (DEBUG_LEVEL > 4)
- dump_mem(pkru_ptr - 128, 256);
- pkey_assert(*pkru_ptr);
+ dump_mem(pkey_reg_ptr - 128, 256);
+ pkey_assert(*pkey_reg_ptr);
+#endif /* arch */
+
+ dprintf1("siginfo: %p\n", si);
+ dprintf1(" fpregs: %p\n", fpregs);
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
@@ -310,20 +376,29 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
exit(4);
}
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ si_pkey_ptr = siginfo_get_pkey_ptr(si);
dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
dump_mem((u8 *)si_pkey_ptr - 8, 24);
siginfo_pkey = *si_pkey_ptr;
pkey_assert(siginfo_pkey < NR_PKEYS);
last_si_pkey = siginfo_pkey;
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
- /* need __rdpkru() version so we do not do shadow_pkru checking */
- dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
- dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
- *(u64 *)pkru_ptr = 0x00000000;
- dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- pkru_faults++;
+ /*
+ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+ * checking
+ */
+ dprintf1("signal pkey_reg from pkey_reg: %016llx\n",
+ __read_pkey_reg());
+ dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey);
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
+ *(u64 *)pkey_reg_ptr = 0x00000000;
+ dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n");
+#elif defined(__powerpc64__) /* arch */
+ /* restore access and let the faulting instruction continue */
+ pkey_access_allow(siginfo_pkey);
+#endif /* arch */
+ pkey_faults++;
dprintf1("<<<<==================================================\n");
dprint_in_signal = 0;
}
@@ -391,143 +466,6 @@ pid_t fork_lazy_child(void)
return forkret;
}
-#ifndef PKEY_DISABLE_ACCESS
-# define PKEY_DISABLE_ACCESS 0x1
-#endif
-
-#ifndef PKEY_DISABLE_WRITE
-# define PKEY_DISABLE_WRITE 0x2
-#endif
-
-static u32 hw_pkey_get(int pkey, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkru = __rdpkru();
- u32 shifted_pkru;
- u32 masked_pkru;
-
- dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
- __func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkru: %x\n", __func__, pkru);
-
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
- masked_pkru = shifted_pkru & mask;
- dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
- /*
- * shift down the relevant bits to the lowest two, then
- * mask off all the other high bits.
- */
- return masked_pkru;
-}
-
-static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkru = __rdpkru();
- u32 new_pkru;
-
- /* make sure that 'rights' only contains the bits we expect: */
- assert(!(rights & ~mask));
-
- /* copy old pkru */
- new_pkru = old_pkru;
- /* mask out bits from pkey in old value: */
- new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
- /* OR in new bits for pkey: */
- new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
-
- __wrpkru(new_pkru);
-
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
- __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
- return 0;
-}
-
-void pkey_disable_set(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights;
- u32 orig_pkru = rdpkru();
-
- dprintf1("START->%s(%d, 0x%x)\n", __func__,
- pkey, flags);
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
- assert(!ret);
- /*pkru and flags have the same format */
- shadow_pkru |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
-
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- pkey_assert(rdpkru() > orig_pkru);
- dprintf1("END<---%s(%d, 0x%x)\n", __func__,
- pkey, flags);
-}
-
-void pkey_disable_clear(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkru = rdpkru();
-
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, 0);
- /* pkru and flags have the same format */
- shadow_pkru &= ~(flags << (pkey * 2));
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- assert(rdpkru() > orig_pkru);
-}
-
-void pkey_write_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_write_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_access_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
-}
-void pkey_access_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
-}
-
int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
unsigned long pkey)
{
@@ -561,33 +499,44 @@ int alloc_pkey(void)
int ret;
unsigned long init_val = 0x0;
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
- __LINE__, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
+ __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
ret = sys_pkey_alloc(0, init_val);
/*
- * pkey_alloc() sets PKRU, so we need to reflect it in
- * shadow_pkru:
+ * pkey_alloc() sets PKEY register, so we need to reflect it in
+ * shadow_pkey_reg:
*/
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
if (ret) {
/* clear both the bits: */
- shadow_pkru &= ~(0x3 << (ret * 2));
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
+ ~PKEY_MASK);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__,
+ __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
/*
* move the new state in from init_val
- * (remember, we cheated and init_val == pkru format)
+ * (remember, we cheated and init_val == pkey_reg format)
*/
- shadow_pkru |= (init_val << (ret * 2));
+ shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
+ init_val);
}
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
+ dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno);
/* for shadow checking: */
- rdpkru();
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ read_pkey_reg();
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
return ret;
}
@@ -612,10 +561,10 @@ int alloc_random_pkey(void)
int nr_alloced = 0;
int random_index;
memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+ srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */
max_nr_pkey_allocs = NR_PKEYS;
- max_nr_pkey_allocs = 1;
for (i = 0; i < max_nr_pkey_allocs; i++) {
int new_pkey = alloc_pkey();
if (new_pkey < 0)
@@ -638,8 +587,9 @@ int alloc_random_pkey(void)
free_ret = sys_pkey_free(alloced_pkeys[i]);
pkey_assert(!free_ret);
}
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n", __func__,
+ __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -657,11 +607,15 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
if (nr_iterations-- < 0)
break;
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
}
pkey_assert(pkey < NR_PKEYS);
@@ -669,8 +623,9 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
ptr, size, orig_prot, pkey, ret);
pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n", __func__,
+ __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -752,7 +707,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
void *ptr;
int ret;
- rdpkru();
+ read_pkey_reg();
dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
size, prot, pkey);
pkey_assert(pkey < NR_PKEYS);
@@ -761,7 +716,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
pkey_assert(!ret);
record_pkey_malloc(ptr, size, prot);
- rdpkru();
+ read_pkey_reg();
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
return ptr;
@@ -798,12 +753,15 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
}
int hugetlb_setup_ok;
+#define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages"
#define GET_NR_HUGE_PAGES 10
void setup_hugetlbfs(void)
{
int err;
int fd;
- char buf[] = "123";
+ char buf[256];
+ long hpagesz_kb;
+ long hpagesz_mb;
if (geteuid() != 0) {
fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
@@ -814,11 +772,16 @@ void setup_hugetlbfs(void)
/*
* Now go make sure that we got the pages and that they
- * are 2M pages. Someone might have made 1G the default.
+ * are PMD-level pages. Someone might have made PUD-level
+ * pages the default.
*/
- fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+ hpagesz_kb = HPAGE_SIZE / 1024;
+ hpagesz_mb = hpagesz_kb / 1024;
+ sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb);
+ fd = open(buf, O_RDONLY);
if (fd < 0) {
- perror("opening sysfs 2M hugetlb config");
+ fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n",
+ hpagesz_mb, strerror(errno));
return;
}
@@ -826,13 +789,14 @@ void setup_hugetlbfs(void)
err = read(fd, buf, sizeof(buf)-1);
close(fd);
if (err <= 0) {
- perror("reading sysfs 2M hugetlb config");
+ fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n",
+ hpagesz_mb, strerror(errno));
return;
}
if (atoi(buf) != GET_NR_HUGE_PAGES) {
- fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
- buf, GET_NR_HUGE_PAGES);
+ fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n",
+ hpagesz_mb, buf, GET_NR_HUGE_PAGES);
return;
}
@@ -886,6 +850,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
malloc_pkey_with_mprotect,
+ malloc_pkey_with_mprotect_subpage,
malloc_pkey_anon_huge,
malloc_pkey_hugetlb
/* can not do direct with the pkey_mprotect() API:
@@ -924,14 +889,14 @@ void *malloc_pkey(long size, int prot, u16 pkey)
return ret;
}
-int last_pkru_faults;
+int last_pkey_faults;
#define UNKNOWN_PKEY -2
-void expected_pk_fault(int pkey)
+void expected_pkey_fault(int pkey)
{
- dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
- __func__, last_pkru_faults, pkru_faults);
+ dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n",
+ __func__, last_pkey_faults, pkey_faults);
dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
- pkey_assert(last_pkru_faults + 1 == pkru_faults);
+ pkey_assert(last_pkey_faults + 1 == pkey_faults);
/*
* For exec-only memory, we do not know the pkey in
@@ -940,24 +905,28 @@ void expected_pk_fault(int pkey)
if (pkey != UNKNOWN_PKEY)
pkey_assert(last_si_pkey == pkey);
+#if defined(__i386__) || defined(__x86_64__) /* arch */
/*
- * The signal handler shold have cleared out PKRU to let the
+ * The signal handler shold have cleared out PKEY register to let the
* test program continue. We now have to restore it.
*/
- if (__rdpkru() != 0)
+ if (__read_pkey_reg() != 0)
+#else /* arch */
+ if (__read_pkey_reg() != shadow_pkey_reg)
+#endif /* arch */
pkey_assert(0);
- __wrpkru(shadow_pkru);
- dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
- __func__, shadow_pkru);
- last_pkru_faults = pkru_faults;
+ __write_pkey_reg(shadow_pkey_reg);
+ dprintf1("%s() set pkey_reg=%016llx to restore state after signal "
+ "nuked it\n", __func__, shadow_pkey_reg);
+ last_pkey_faults = pkey_faults;
last_si_pkey = -1;
}
-#define do_not_expect_pk_fault(msg) do { \
- if (last_pkru_faults != pkru_faults) \
- dprintf0("unexpected PK fault: %s\n", msg); \
- pkey_assert(last_pkru_faults == pkru_faults); \
+#define do_not_expect_pkey_fault(msg) do { \
+ if (last_pkey_faults != pkey_faults) \
+ dprintf0("unexpected PKey fault: %s\n", msg); \
+ pkey_assert(last_pkey_faults == pkey_faults); \
} while (0)
int test_fds[10] = { -1 };
@@ -1000,6 +969,58 @@ __attribute__((noinline)) int read_ptr(int *ptr)
return *ptr;
}
+void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
+{
+ int i, err;
+ int max_nr_pkey_allocs;
+ int alloced_pkeys[NR_PKEYS];
+ int nr_alloced = 0;
+ long size;
+
+ pkey_assert(pkey_last_malloc_record);
+ size = pkey_last_malloc_record->size;
+ /*
+ * This is a bit of a hack. But mprotect() requires
+ * huge-page-aligned sizes when operating on hugetlbfs.
+ * So, make sure that we use something that's a multiple
+ * of a huge page when we can.
+ */
+ if (size >= HPAGE_SIZE)
+ size = HPAGE_SIZE;
+
+ /* allocate every possible key and make sure key-0 never got allocated */
+ max_nr_pkey_allocs = NR_PKEYS;
+ for (i = 0; i < max_nr_pkey_allocs; i++) {
+ int new_pkey = alloc_pkey();
+ pkey_assert(new_pkey != 0);
+
+ if (new_pkey < 0)
+ break;
+ alloced_pkeys[nr_alloced++] = new_pkey;
+ }
+ /* free all the allocated keys */
+ for (i = 0; i < nr_alloced; i++) {
+ int free_ret;
+
+ if (!alloced_pkeys[i])
+ continue;
+ free_ret = sys_pkey_free(alloced_pkeys[i]);
+ pkey_assert(!free_ret);
+ }
+
+ /* attach key-0 in various modes */
+ err = sys_mprotect_pkey(ptr, size, PROT_READ, 0);
+ pkey_assert(!err);
+ err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0);
+ pkey_assert(!err);
+ err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0);
+ pkey_assert(!err);
+ err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0);
+ pkey_assert(!err);
+ err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0);
+ pkey_assert(!err);
+}
+
void test_read_of_write_disabled_region(int *ptr, u16 pkey)
{
int ptr_contents;
@@ -1015,26 +1036,67 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey)
int ptr_contents;
dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
- rdpkru();
+ read_pkey_reg();
+ pkey_access_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ expected_pkey_fault(pkey);
+}
+
+void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
+ u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling access to PKEY[%02d], doing read @ %p\n",
+ pkey, ptr);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("reading ptr before disabling the read : %d\n",
+ ptr_contents);
+ read_pkey_reg();
pkey_access_deny(pkey);
ptr_contents = read_ptr(ptr);
dprintf1("*ptr: %d\n", ptr_contents);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
+
+void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
+ u16 pkey)
+{
+ *ptr = __LINE__;
+ dprintf1("disabling write access; after accessing the page, "
+ "to PKEY[%02d], doing write\n", pkey);
+ pkey_write_deny(pkey);
+ *ptr = __LINE__;
+ expected_pkey_fault(pkey);
+}
+
void test_write_of_write_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
pkey_write_deny(pkey);
*ptr = __LINE__;
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_write_of_access_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
pkey_access_deny(pkey);
*ptr = __LINE__;
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
+
+void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
+ u16 pkey)
+{
+ *ptr = __LINE__;
+ dprintf1("disabling access; after accessing the page, "
+ " to PKEY[%02d], doing write\n", pkey);
+ pkey_access_deny(pkey);
+ *ptr = __LINE__;
+ expected_pkey_fault(pkey);
+}
+
void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
{
int ret;
@@ -1160,9 +1222,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
int new_pkey;
dprintf1("%s() alloc loop: %d\n", __func__, i);
new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, err, __rdpkru(), shadow_pkru);
- rdpkru(); /* for shadow checking */
+ dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, err, __read_pkey_reg(),
+ shadow_pkey_reg);
+ read_pkey_reg(); /* for shadow checking */
dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
if ((new_pkey == -1) && (errno == ENOSPC)) {
dprintf2("%s() failed to allocate pkey after %d tries\n",
@@ -1188,6 +1252,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
dprintf3("%s()::%d\n", __func__, __LINE__);
/*
+ * On x86:
* There are 16 pkeys supported in hardware. Three are
* allocated by the time we get here:
* 1. The default key (0)
@@ -1195,13 +1260,21 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
* 3. One allocated by the test code and passed in via
* 'pkey' to this function.
* Ensure that we can allocate at least another 13 (16-3).
+ *
+ * On powerpc:
+ * There are either 5, 28, 29 or 32 pkeys supported in
+ * hardware depending on the page size (4K or 64K) and
+ * platform (powernv or powervm). Four are allocated by
+ * the time we get here. These include pkey-0, pkey-1,
+ * exec-only pkey and the one allocated by the test code.
+ * Ensure that we can allocate the remaining.
*/
- pkey_assert(i >= NR_PKEYS-3);
+ pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1));
for (i = 0; i < nr_allocated_pkeys; i++) {
err = sys_pkey_free(allocated_pkeys[i]);
pkey_assert(!err);
- rdpkru(); /* for shadow checking */
+ read_pkey_reg(); /* for shadow checking */
}
}
@@ -1287,7 +1360,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect an exception: */
peek_result = read_ptr(ptr);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
/*
* Try to access the NON-pkey-protected "plain_ptr" via ptrace:
@@ -1297,7 +1370,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect NO exception: */
peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault("read plain pointer after ptrace");
+ do_not_expect_pkey_fault("read plain pointer after ptrace");
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
pkey_assert(ret != -1);
@@ -1347,17 +1420,15 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
pkey_assert(!ret);
pkey_access_deny(pkey);
- dprintf2("pkru: %x\n", rdpkru());
+ dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/*
* Make sure this is an *instruction* fault
*/
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(pkey);
+ do_not_expect_pkey_fault("executing on PROT_EXEC memory");
+ expect_fault_on_read_execonly_key(p1, pkey);
}
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
@@ -1378,15 +1449,13 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
- dprintf2("pkru: %x\n", rdpkru());
+ dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(UNKNOWN_PKEY);
+ do_not_expect_pkey_fault("executing on PROT_EXEC memory");
+ expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY);
/*
* Put the memory back to non-PROT_EXEC. Should clear the
@@ -1400,7 +1469,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
pkey_assert(!ret);
ptr_contents = read_ptr(p1);
- do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+ do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
}
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
@@ -1408,7 +1477,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
int size = PAGE_SIZE;
int sret;
- if (cpu_has_pku()) {
+ if (cpu_has_pkeys()) {
dprintf1("SKIP: %s: no CPU support\n", __func__);
return;
}
@@ -1420,8 +1489,11 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_read_of_write_disabled_region,
test_read_of_access_disabled_region,
+ test_read_of_access_disabled_region_with_page_already_mapped,
test_write_of_write_disabled_region,
+ test_write_of_write_disabled_region_with_page_already_mapped,
test_write_of_access_disabled_region,
+ test_write_of_access_disabled_region_with_page_already_mapped,
test_kernel_write_of_access_disabled_region,
test_kernel_write_of_write_disabled_region,
test_kernel_gup_of_access_disabled_region,
@@ -1433,6 +1505,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_pkey_syscalls_on_non_allocated_pkey,
test_pkey_syscalls_bad_args,
test_pkey_alloc_exhaust,
+ test_pkey_alloc_free_attach_pkey0,
};
void run_tests_once(void)
@@ -1442,7 +1515,7 @@ void run_tests_once(void)
for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
int pkey;
- int orig_pkru_faults = pkru_faults;
+ int orig_pkey_faults = pkey_faults;
dprintf1("======================\n");
dprintf1("test %d preparing...\n", test_nr);
@@ -1457,8 +1530,8 @@ void run_tests_once(void)
free_pkey_malloc(ptr);
sys_pkey_free(pkey);
- dprintf1("pkru_faults: %d\n", pkru_faults);
- dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+ dprintf1("pkey_faults: %d\n", pkey_faults);
+ dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
tracing_off();
close_test_fds();
@@ -1471,18 +1544,19 @@ void run_tests_once(void)
void pkey_setup_shadow(void)
{
- shadow_pkru = __rdpkru();
+ shadow_pkey_reg = __read_pkey_reg();
}
int main(void)
{
int nr_iterations = 22;
+ int pkeys_supported = is_pkeys_supported();
setup_handlers();
- printf("has pku: %d\n", cpu_has_pku());
+ printf("has pkeys: %d\n", pkeys_supported);
- if (!cpu_has_pku()) {
+ if (!pkeys_supported) {
int size = PAGE_SIZE;
int *ptr;
@@ -1495,7 +1569,7 @@ int main(void)
}
pkey_setup_shadow();
- printf("startup pkru: %x\n", rdpkru());
+ printf("startup pkey_reg: %016llx\n", read_pkey_reg());
setup_hugetlbfs();
while (nr_iterations-- > 0)
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 76ca5e7a3951..a3f4f30f0a2e 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -307,4 +307,20 @@ else
echo "[FAIL]"
exitcode=1
fi
+
+echo "running HMM smoke test"
+echo "------------------------------------"
+./test_hmm.sh smoke
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
exit $exitcode
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh
new file mode 100755
index 000000000000..0647b525a625
--- /dev/null
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
+#
+# This is a test script for the kernel test driver to analyse vmalloc
+# allocator. Therefore it is just a kernel module loader. You can specify
+# and pass different parameters in order to:
+# a) analyse performance of vmalloc allocations;
+# b) stressing and stability check of vmalloc subsystem.
+
+TEST_NAME="test_hmm"
+DRIVER="test_hmm"
+
+# 1 if fails
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_test_requirements()
+{
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "$0: Must be run as root"
+ exit $ksft_skip
+ fi
+
+ if ! which modprobe > /dev/null 2>&1; then
+ echo "$0: You need modprobe installed"
+ exit $ksft_skip
+ fi
+
+ if ! modinfo $DRIVER > /dev/null 2>&1; then
+ echo "$0: You must have the following enabled in your kernel:"
+ echo "CONFIG_TEST_HMM=m"
+ exit $ksft_skip
+ fi
+}
+
+load_driver()
+{
+ modprobe $DRIVER > /dev/null 2>&1
+ if [ $? == 0 ]; then
+ major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
+ mknod /dev/hmm_dmirror0 c $major 0
+ mknod /dev/hmm_dmirror1 c $major 1
+ fi
+}
+
+unload_driver()
+{
+ modprobe -r $DRIVER > /dev/null 2>&1
+ rm -f /dev/hmm_dmirror?
+}
+
+run_smoke()
+{
+ echo "Running smoke test. Note, this test provides basic coverage."
+
+ load_driver
+ $(dirname "${BASH_SOURCE[0]}")/hmm-tests
+ unload_driver
+}
+
+usage()
+{
+ echo -n "Usage: $0"
+ echo
+ echo "Example usage:"
+ echo
+ echo "# Shows help message"
+ echo "./${TEST_NAME}.sh"
+ echo
+ echo "# Smoke testing"
+ echo "./${TEST_NAME}.sh smoke"
+ echo
+ exit 0
+}
+
+function run_test()
+{
+ if [ $# -eq 0 ]; then
+ usage
+ else
+ if [ "$1" = "smoke" ]; then
+ run_smoke
+ else
+ usage
+ fi
+ fi
+}
+
+check_test_requirements
+run_test $@
+
+exit 0
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 61e5cfeb1350..9b0912a01777 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -227,8 +227,10 @@ static void hugetlb_allocate_area(void **alloc_area)
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
if (area_alias == MAP_FAILED) {
- if (munmap(*alloc_area, nr_pages * page_size) < 0)
- perror("hugetlb munmap"), exit(1);
+ if (munmap(*alloc_area, nr_pages * page_size) < 0) {
+ perror("hugetlb munmap");
+ exit(1);
+ }
*alloc_area = NULL;
return;
}
@@ -337,9 +339,10 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
/* Undo write-protect, do wakeup after that */
prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
- if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
- fprintf(stderr, "clear WP failed for address 0x%Lx\n",
- start), exit(1);
+ if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
+ fprintf(stderr, "clear WP failed for address 0x%Lx\n", start);
+ exit(1);
+ }
}
static void *locking_thread(void *arg)
@@ -359,8 +362,10 @@ static void *locking_thread(void *arg)
seed += cpu;
bzero(&rand, sizeof(rand));
bzero(&randstate, sizeof(randstate));
- if (initstate_r(seed, randstate, sizeof(randstate), &rand))
- fprintf(stderr, "srandom_r error\n"), exit(1);
+ if (initstate_r(seed, randstate, sizeof(randstate), &rand)) {
+ fprintf(stderr, "srandom_r error\n");
+ exit(1);
+ }
} else {
page_nr = -bounces;
if (!(bounces & BOUNCE_RACINGFAULTS))
@@ -369,12 +374,16 @@ static void *locking_thread(void *arg)
while (!finished) {
if (bounces & BOUNCE_RANDOM) {
- if (random_r(&rand, &rand_nr))
- fprintf(stderr, "random_r 1 error\n"), exit(1);
+ if (random_r(&rand, &rand_nr)) {
+ fprintf(stderr, "random_r 1 error\n");
+ exit(1);
+ }
page_nr = rand_nr;
if (sizeof(page_nr) > sizeof(rand_nr)) {
- if (random_r(&rand, &rand_nr))
- fprintf(stderr, "random_r 2 error\n"), exit(1);
+ if (random_r(&rand, &rand_nr)) {
+ fprintf(stderr, "random_r 2 error\n");
+ exit(1);
+ }
page_nr |= (((unsigned long) rand_nr) << 16) <<
16;
}
@@ -385,11 +394,13 @@ static void *locking_thread(void *arg)
start = time(NULL);
if (bounces & BOUNCE_VERIFY) {
count = *area_count(area_dst, page_nr);
- if (!count)
+ if (!count) {
fprintf(stderr,
"page_nr %lu wrong count %Lu %Lu\n",
page_nr, count,
- count_verify[page_nr]), exit(1);
+ count_verify[page_nr]);
+ exit(1);
+ }
/*
@@ -401,11 +412,12 @@ static void *locking_thread(void *arg)
*/
#if 1
if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
- page_size))
+ page_size)) {
fprintf(stderr,
"my_bcmp page_nr %lu wrong count %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]), exit(1);
+ page_nr, count, count_verify[page_nr]);
+ exit(1);
+ }
#else
unsigned long loops;
@@ -437,7 +449,7 @@ static void *locking_thread(void *arg)
fprintf(stderr,
"page_nr %lu memory corruption %Lu %Lu\n",
page_nr, count,
- count_verify[page_nr]), exit(1);
+ count_verify[page_nr]); exit(1);
}
count++;
*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
@@ -461,12 +473,14 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
offset);
if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
/* real retval in ufdio_copy.copy */
- if (uffdio_copy->copy != -EEXIST)
+ if (uffdio_copy->copy != -EEXIST) {
fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
- uffdio_copy->copy), exit(1);
+ uffdio_copy->copy);
+ exit(1);
+ }
} else {
fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n",
- uffdio_copy->copy), exit(1);
+ uffdio_copy->copy); exit(1);
}
}
@@ -474,9 +488,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
{
struct uffdio_copy uffdio_copy;
- if (offset >= nr_pages * page_size)
- fprintf(stderr, "unexpected offset %lu\n",
- offset), exit(1);
+ if (offset >= nr_pages * page_size) {
+ fprintf(stderr, "unexpected offset %lu\n", offset);
+ exit(1);
+ }
uffdio_copy.dst = (unsigned long) area_dst + offset;
uffdio_copy.src = (unsigned long) area_src + offset;
uffdio_copy.len = page_size;
@@ -487,12 +502,14 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
uffdio_copy.copy = 0;
if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
/* real retval in ufdio_copy.copy */
- if (uffdio_copy.copy != -EEXIST)
+ if (uffdio_copy.copy != -EEXIST) {
fprintf(stderr, "UFFDIO_COPY error %Ld\n",
- uffdio_copy.copy), exit(1);
+ uffdio_copy.copy);
+ exit(1);
+ }
} else if (uffdio_copy.copy != page_size) {
fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
- uffdio_copy.copy), exit(1);
+ uffdio_copy.copy); exit(1);
} else {
if (test_uffdio_copy_eexist && retry) {
test_uffdio_copy_eexist = false;
@@ -521,11 +538,11 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
if (ret < 0) {
if (errno == EAGAIN)
return 1;
- else
- perror("blocking read error"), exit(1);
+ perror("blocking read error");
} else {
- fprintf(stderr, "short read\n"), exit(1);
+ fprintf(stderr, "short read\n");
}
+ exit(1);
}
return 0;
@@ -536,9 +553,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
{
unsigned long offset;
- if (msg->event != UFFD_EVENT_PAGEFAULT)
- fprintf(stderr, "unexpected msg event %u\n",
- msg->event), exit(1);
+ if (msg->event != UFFD_EVENT_PAGEFAULT) {
+ fprintf(stderr, "unexpected msg event %u\n", msg->event);
+ exit(1);
+ }
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
wp_range(uffd, msg->arg.pagefault.address, page_size, false);
@@ -546,8 +564,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
} else {
/* Missing page faults */
if (bounces & BOUNCE_VERIFY &&
- msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
- fprintf(stderr, "unexpected write fault\n"), exit(1);
+ msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) {
+ fprintf(stderr, "unexpected write fault\n");
+ exit(1);
+ }
offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
offset &= ~(page_size-1);
@@ -574,25 +594,32 @@ static void *uffd_poll_thread(void *arg)
for (;;) {
ret = poll(pollfd, 2, -1);
- if (!ret)
- fprintf(stderr, "poll error %d\n", ret), exit(1);
- if (ret < 0)
- perror("poll"), exit(1);
+ if (!ret) {
+ fprintf(stderr, "poll error %d\n", ret);
+ exit(1);
+ }
+ if (ret < 0) {
+ perror("poll");
+ exit(1);
+ }
if (pollfd[1].revents & POLLIN) {
- if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
- fprintf(stderr, "read pipefd error\n"),
- exit(1);
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1) {
+ fprintf(stderr, "read pipefd error\n");
+ exit(1);
+ }
break;
}
- if (!(pollfd[0].revents & POLLIN))
+ if (!(pollfd[0].revents & POLLIN)) {
fprintf(stderr, "pollfd[0].revents %d\n",
- pollfd[0].revents), exit(1);
+ pollfd[0].revents);
+ exit(1);
+ }
if (uffd_read_msg(uffd, &msg))
continue;
switch (msg.event) {
default:
fprintf(stderr, "unexpected msg event %u\n",
- msg.event), exit(1);
+ msg.event); exit(1);
break;
case UFFD_EVENT_PAGEFAULT:
uffd_handle_page_fault(&msg, stats);
@@ -606,8 +633,10 @@ static void *uffd_poll_thread(void *arg)
uffd_reg.range.start = msg.arg.remove.start;
uffd_reg.range.len = msg.arg.remove.end -
msg.arg.remove.start;
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
- fprintf(stderr, "remove failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) {
+ fprintf(stderr, "remove failure\n");
+ exit(1);
+ }
break;
case UFFD_EVENT_REMAP:
area_dst = (char *)(unsigned long)msg.arg.remap.to;
@@ -879,8 +908,10 @@ static int faulting_process(int signal_test)
area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
- if (area_dst == MAP_FAILED)
- perror("mremap"), exit(1);
+ if (area_dst == MAP_FAILED) {
+ perror("mremap");
+ exit(1);
+ }
for (; nr < nr_pages; nr++) {
count = *area_count(area_dst, nr);
@@ -888,7 +919,7 @@ static int faulting_process(int signal_test)
fprintf(stderr,
"nr %lu memory corruption %Lu %Lu\n",
nr, count,
- count_verify[nr]), exit(1);
+ count_verify[nr]); exit(1);
}
/*
* Trigger write protection if there is by writting
@@ -901,8 +932,10 @@ static int faulting_process(int signal_test)
return 1;
for (nr = 0; nr < nr_pages; nr++) {
- if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
- fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+ if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) {
+ fprintf(stderr, "nr %lu is not zero\n", nr);
+ exit(1);
+ }
}
return 0;
@@ -916,12 +949,14 @@ static void retry_uffdio_zeropage(int ufd,
uffdio_zeropage->range.len,
offset);
if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
- if (uffdio_zeropage->zeropage != -EEXIST)
+ if (uffdio_zeropage->zeropage != -EEXIST) {
fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
- uffdio_zeropage->zeropage), exit(1);
+ uffdio_zeropage->zeropage);
+ exit(1);
+ }
} else {
fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
- uffdio_zeropage->zeropage), exit(1);
+ uffdio_zeropage->zeropage); exit(1);
}
}
@@ -933,9 +968,10 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
- if (offset >= nr_pages * page_size)
- fprintf(stderr, "unexpected offset %lu\n",
- offset), exit(1);
+ if (offset >= nr_pages * page_size) {
+ fprintf(stderr, "unexpected offset %lu\n", offset);
+ exit(1);
+ }
uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
uffdio_zeropage.range.len = page_size;
uffdio_zeropage.mode = 0;
@@ -943,22 +979,26 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
if (ret) {
/* real retval in ufdio_zeropage.zeropage */
if (has_zeropage) {
- if (uffdio_zeropage.zeropage == -EEXIST)
- fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
- exit(1);
- else
+ if (uffdio_zeropage.zeropage == -EEXIST) {
+ fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n");
+ exit(1);
+ } else {
fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage);
+ exit(1);
+ }
} else {
- if (uffdio_zeropage.zeropage != -EINVAL)
+ if (uffdio_zeropage.zeropage != -EINVAL) {
fprintf(stderr,
"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage);
+ exit(1);
+ }
}
} else if (has_zeropage) {
if (uffdio_zeropage.zeropage != page_size) {
fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage); exit(1);
} else {
if (test_uffdio_zeropage_eexist && retry) {
test_uffdio_zeropage_eexist = false;
@@ -970,7 +1010,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
} else {
fprintf(stderr,
"UFFDIO_ZEROPAGE succeeded %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage); exit(1);
}
return 0;
@@ -1000,19 +1040,24 @@ static int userfaultfd_zeropage_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
expected_ioctls = uffd_test_ops->expected_ioctls;
if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
+ expected_ioctls) {
fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
+ "unexpected missing ioctl for anon memory\n");
+ exit(1);
+ }
if (uffdio_zeropage(uffd, 0)) {
- if (my_bcmp(area_dst, zeropage, page_size))
- fprintf(stderr, "zeropage is not zero\n"), exit(1);
+ if (my_bcmp(area_dst, zeropage, page_size)) {
+ fprintf(stderr, "zeropage is not zero\n");
+ exit(1);
+ }
}
close(uffd);
@@ -1047,32 +1092,41 @@ static int userfaultfd_events_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+ fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+ exit(1);
+ }
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
- perror("uffd_poll_thread create"), exit(1);
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+ perror("uffd_poll_thread create");
+ exit(1);
+ }
pid = fork();
- if (pid < 0)
- perror("fork"), exit(1);
+ if (pid < 0) {
+ perror("fork");
+ exit(1);
+ }
if (!pid)
return faulting_process(0);
waitpid(pid, &err, 0);
- if (err)
- fprintf(stderr, "faulting process failed\n"), exit(1);
+ if (err) {
+ fprintf(stderr, "faulting process failed\n");
+ exit(1);
+ }
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
- perror("pipe write"), exit(1);
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+ perror("pipe write");
+ exit(1);
+ }
if (pthread_join(uffd_mon, NULL))
return 1;
@@ -1110,38 +1164,49 @@ static int userfaultfd_sig_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+ fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+ exit(1);
+ }
- if (faulting_process(1))
- fprintf(stderr, "faulting process failed\n"), exit(1);
+ if (faulting_process(1)) {
+ fprintf(stderr, "faulting process failed\n");
+ exit(1);
+ }
if (uffd_test_ops->release_pages(area_dst))
return 1;
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
- perror("uffd_poll_thread create"), exit(1);
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+ perror("uffd_poll_thread create");
+ exit(1);
+ }
pid = fork();
- if (pid < 0)
- perror("fork"), exit(1);
+ if (pid < 0) {
+ perror("fork");
+ exit(1);
+ }
if (!pid)
exit(faulting_process(2));
waitpid(pid, &err, 0);
- if (err)
- fprintf(stderr, "faulting process failed\n"), exit(1);
+ if (err) {
+ fprintf(stderr, "faulting process failed\n");
+ exit(1);
+ }
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
- perror("pipe write"), exit(1);
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+ perror("pipe write");
+ exit(1);
+ }
if (pthread_join(uffd_mon, (void **)&userfaults))
return 1;
@@ -1395,7 +1460,7 @@ static void set_test_type(const char *type)
test_type = TEST_SHMEM;
uffd_test_ops = &shmem_uffd_test_ops;
} else {
- fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
+ fprintf(stderr, "Unknown test type: %s\n", type); exit(1);
}
if (test_type == TEST_HUGETLB)
@@ -1403,12 +1468,15 @@ static void set_test_type(const char *type)
else
page_size = sysconf(_SC_PAGE_SIZE);
- if (!page_size)
- fprintf(stderr, "Unable to determine page size\n"),
- exit(2);
+ if (!page_size) {
+ fprintf(stderr, "Unable to determine page size\n");
+ exit(2);
+ }
if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
- > page_size)
- fprintf(stderr, "Impossible to run this test\n"), exit(2);
+ > page_size) {
+ fprintf(stderr, "Impossible to run this test\n");
+ exit(2);
+ }
}
static void sigalrm(int sig)
@@ -1425,8 +1493,10 @@ int main(int argc, char **argv)
if (argc < 4)
usage();
- if (signal(SIGALRM, sigalrm) == SIG_ERR)
- fprintf(stderr, "failed to arm SIGALRM"), exit(1);
+ if (signal(SIGALRM, sigalrm) == SIG_ERR) {
+ fprintf(stderr, "failed to arm SIGALRM");
+ exit(1);
+ }
alarm(ALARM_INTERVAL_SECS);
set_test_type(argv[1]);
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 17a1f53ceba0..d77f4829f1e0 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -587,9 +587,20 @@ ip0 link set wg0 up
kill $ncat_pid
ip0 link del wg0
+# Ensure there aren't circular reference loops
+ip1 link add wg1 type wireguard
+ip2 link add wg2 type wireguard
+ip1 link set wg1 netns $netns2
+ip2 link set wg2 netns $netns1
+pp ip netns delete $netns1
+pp ip netns delete $netns2
+pp ip netns add $netns1
+pp ip netns add $netns2
+
+sleep 2 # Wait for cleanup and grace periods
declare -A objects
while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
- [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue
+ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ ?[0-9]*)\ .*(created|destroyed).* ]] || continue
objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}"
done < /dev/kmsg
alldeleted=1
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index 9803dbb54181..b50c2085c1ac 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -57,7 +57,6 @@ CONFIG_RCU_EQS_DEBUG=y
CONFIG_USER_STACKTRACE_SUPPORT=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_DOUBLEFAULT=y
CONFIG_X86_DEBUG_FPU=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_DEBUG_PAGEALLOC=y
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
index 022a1f3b64ef..1aaef5bf119a 100644
--- a/tools/testing/selftests/x86/.gitignore
+++ b/tools/testing/selftests/x86/.gitignore
@@ -12,5 +12,4 @@ ldt_gdt
iopl
mpx-mini-test
ioperm
-protection_keys
test_vdso
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5d49bfec1e9a..6703c7906b71 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,8 +12,8 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
- protection_keys test_vdso test_vsyscall mov_ss_trap \
- syscall_arg_fault
+ test_vdso test_vsyscall mov_ss_trap \
+ syscall_arg_fault fsgsbase_restore
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
@@ -70,10 +70,10 @@ all_64: $(BINARIES_64)
EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
-$(BINARIES_32): $(OUTPUT)/%_32: %.c
+$(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
-$(BINARIES_64): $(OUTPUT)/%_64: %.c
+$(BINARIES_64): $(OUTPUT)/%_64: %.c helpers.h
$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
# x86_64 users should be encouraged to install 32-bit libraries
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 15a329da59fa..7161cfc2e60b 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -285,7 +285,8 @@ static unsigned short load_gs(void)
/* 32-bit set_thread_area */
long ret;
asm volatile ("int $0x80"
- : "=a" (ret) : "a" (243), "b" (low_desc)
+ : "=a" (ret), "+m" (*low_desc)
+ : "a" (243), "b" (low_desc)
: "r8", "r9", "r10", "r11");
memcpy(&desc, low_desc, sizeof(desc));
munmap(low_desc, sizeof(desc));
@@ -442,6 +443,68 @@ static void test_unexpected_base(void)
#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
+static void test_ptrace_write_gs_read_base(void)
+{
+ int status;
+ pid_t child = fork();
+
+ if (child < 0)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
+
+ printf("[RUN]\tARCH_SET_GS to 1\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ wait(&status);
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ unsigned long base;
+ unsigned long gs_offset = USER_REGS_OFFSET(gs);
+ unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+ /* Read the initial base. It should be 1. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+ if (base == 1) {
+ printf("[OK]\tGSBASE started at 1\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
+ }
+
+ printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
+
+ /* Poke an LDT selector into GS. */
+ if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
+ err(1, "PTRACE_POKEUSER");
+
+ /* And read the base. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+ if (base == 0 || base == 1) {
+ printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
+ }
+ }
+
+ ptrace(PTRACE_CONT, child, NULL, NULL);
+
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
+}
+
static void test_ptrace_write_gsbase(void)
{
int status;
@@ -489,16 +552,36 @@ static void test_ptrace_write_gsbase(void)
* selector value is changed or not by the GSBASE write in
* a ptracer.
*/
- if (gs == 0 && base == 0xFF) {
- printf("[OK]\tGS was reset as expected\n");
- } else {
+ if (gs != *shared_scratch) {
nerrs++;
- printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base);
+ printf("[FAIL]\tGS changed to %lx\n", gs);
+
+ /*
+ * On older kernels, poking a nonzero value into the
+ * base would zero the selector. On newer kernels,
+ * this behavior has changed -- poking the base
+ * changes only the base and, if FSGSBASE is not
+ * available, this may have no effect once the tracee
+ * is resumed.
+ */
+ if (gs == 0)
+ printf("\tNote: this is expected behavior on older kernels.\n");
+ } else if (have_fsgsbase && (base != 0xFF)) {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to %lx\n", base);
+ } else {
+ printf("[OK]\tGS remained 0x%hx", *shared_scratch);
+ if (have_fsgsbase)
+ printf(" and GSBASE changed to 0xFF");
+ printf("\n");
}
}
END:
ptrace(PTRACE_CONT, child, NULL, NULL);
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
}
int main()
@@ -508,6 +591,9 @@ int main()
shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ /* Do these tests before we have an LDT. */
+ test_ptrace_write_gs_read_base();
+
/* Probe FSGSBASE */
sethandler(SIGILL, sigill, 0);
if (sigsetjmp(jmpbuf, 1) == 0) {
diff --git a/tools/testing/selftests/x86/fsgsbase_restore.c b/tools/testing/selftests/x86/fsgsbase_restore.c
new file mode 100644
index 000000000000..6fffadc51579
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase_restore.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * fsgsbase_restore.c, test ptrace vs fsgsbase
+ * Copyright (c) 2020 Andy Lutomirski
+ *
+ * This test case simulates a tracer redirecting tracee execution to
+ * a function and then restoring tracee state using PTRACE_GETREGS and
+ * PTRACE_SETREGS. This is similar to what gdb does when doing
+ * 'p func()'. The catch is that this test has the called function
+ * modify a segment register. This makes sure that ptrace correctly
+ * restores segment state when using PTRACE_SETREGS.
+ *
+ * This is not part of fsgsbase.c, because that test is 64-bit only.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+#include <stddef.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <stdint.h>
+
+#define EXPECTED_VALUE 0x1337f00d
+
+#ifdef __x86_64__
+# define SEG "%gs"
+#else
+# define SEG "%fs"
+#endif
+
+static unsigned int dereference_seg_base(void)
+{
+ int ret;
+ asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret));
+ return ret;
+}
+
+static void init_seg(void)
+{
+ unsigned int *target = mmap(
+ NULL, sizeof(unsigned int),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ if (target == MAP_FAILED)
+ err(1, "mmap");
+
+ *target = EXPECTED_VALUE;
+
+ printf("\tsegment base address = 0x%lx\n", (unsigned long)target);
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = (unsigned int)(uintptr_t)target,
+ .limit = sizeof(unsigned int) - 1,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+ printf("\tusing LDT slot 0\n");
+ asm volatile ("mov %0, %" SEG :: "rm" ((unsigned short)0x7));
+ } else {
+ /* No modify_ldt for us (configured out, perhaps) */
+
+ struct user_desc *low_desc = mmap(
+ NULL, sizeof(desc),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ memcpy(low_desc, &desc, sizeof(desc));
+
+ low_desc->entry_number = -1;
+
+ /* 32-bit set_thread_area */
+ long ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret), "+m" (*low_desc)
+ : "a" (243), "b" (low_desc)
+#ifdef __x86_64__
+ : "r8", "r9", "r10", "r11"
+#endif
+ );
+ memcpy(&desc, low_desc, sizeof(desc));
+ munmap(low_desc, sizeof(desc));
+
+ if (ret != 0) {
+ printf("[NOTE]\tcould not create a segment -- can't test anything\n");
+ exit(0);
+ }
+ printf("\tusing GDT slot %d\n", desc.entry_number);
+
+ unsigned short sel = (unsigned short)((desc.entry_number << 3) | 0x3);
+ asm volatile ("mov %0, %" SEG :: "rm" (sel));
+ }
+}
+
+static void tracee_zap_segment(void)
+{
+ /*
+ * The tracer will redirect execution here. This is meant to
+ * work like gdb's 'p func()' feature. The tricky bit is that
+ * we modify a segment register in order to make sure that ptrace
+ * can correctly restore segment registers.
+ */
+ printf("\tTracee: in tracee_zap_segment()\n");
+
+ /*
+ * Write a nonzero selector with base zero to the segment register.
+ * Using a null selector would defeat the test on AMD pre-Zen2
+ * CPUs, as such CPUs don't clear the base when loading a null
+ * selector.
+ */
+ unsigned short sel;
+ asm volatile ("mov %%ss, %0\n\t"
+ "mov %0, %" SEG
+ : "=rm" (sel));
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tTracee is going back to sleep\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ /* Should not get here. */
+ while (true) {
+ printf("[FAIL]\tTracee hit unreachable code\n");
+ pause();
+ }
+}
+
+int main()
+{
+ printf("\tSetting up a segment\n");
+ init_seg();
+
+ unsigned int val = dereference_seg_base();
+ if (val != EXPECTED_VALUE) {
+ printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
+ return 1;
+ }
+ printf("[OK]\tThe segment points to the right place.\n");
+
+ pid_t chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ if (chld == 0) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0, 0);
+
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tTracee will take a nap until signaled\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ printf("\tTracee was resumed. Will re-check segment.\n");
+
+ val = dereference_seg_base();
+ if (val != EXPECTED_VALUE) {
+ printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
+ exit(1);
+ }
+
+ printf("[OK]\tThe segment points to the right place.\n");
+ exit(0);
+ }
+
+ int status;
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ struct user_regs_struct regs;
+
+ if (ptrace(PTRACE_GETREGS, chld, NULL, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+#ifdef __x86_64__
+ printf("\tChild GS=0x%lx, GSBASE=0x%lx\n", (unsigned long)regs.gs, (unsigned long)regs.gs_base);
+#else
+ printf("\tChild FS=0x%lx\n", (unsigned long)regs.xfs);
+#endif
+
+ struct user_regs_struct regs2 = regs;
+#ifdef __x86_64__
+ regs2.rip = (unsigned long)tracee_zap_segment;
+ regs2.rsp -= 128; /* Don't clobber the redzone. */
+#else
+ regs2.eip = (unsigned long)tracee_zap_segment;
+#endif
+
+ printf("\tTracer: redirecting tracee to tracee_zap_segment()\n");
+ if (ptrace(PTRACE_SETREGS, chld, NULL, &regs2) != 0)
+ err(1, "PTRACE_GETREGS");
+ if (ptrace(PTRACE_CONT, chld, NULL, NULL) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ printf("\tTracer: restoring tracee state\n");
+ if (ptrace(PTRACE_SETREGS, chld, NULL, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+ if (ptrace(PTRACE_DETACH, chld, NULL, NULL) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+
+ if (WIFSIGNALED(status)) {
+ printf("[FAIL]\tTracee crashed\n");
+ return 1;
+ }
+
+ if (!WIFEXITED(status)) {
+ printf("[FAIL]\tTracee stopped for an unexpected reason: %d\n", status);
+ return 1;
+ }
+
+ int exitcode = WEXITSTATUS(status);
+ if (exitcode != 0) {
+ printf("[FAIL]\tTracee reported failure\n");
+ return 1;
+ }
+
+ printf("[OK]\tAll is well.\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h
new file mode 100644
index 000000000000..f5ff2a2615df
--- /dev/null
+++ b/tools/testing/selftests/x86/helpers.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __SELFTESTS_X86_HELPERS_H
+#define __SELFTESTS_X86_HELPERS_H
+
+#include <asm/processor-flags.h>
+
+static inline unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+
+ asm volatile (
+#ifdef __x86_64__
+ "subq $128, %%rsp\n\t"
+ "pushfq\n\t"
+ "popq %0\n\t"
+ "addq $128, %%rsp"
+#else
+ "pushfl\n\t"
+ "popl %0"
+#endif
+ : "=r" (eflags) :: "memory");
+
+ return eflags;
+}
+
+static inline void set_eflags(unsigned long eflags)
+{
+ asm volatile (
+#ifdef __x86_64__
+ "subq $128, %%rsp\n\t"
+ "pushq %0\n\t"
+ "popfq\n\t"
+ "addq $128, %%rsp"
+#else
+ "pushl %0\n\t"
+ "popfl"
+#endif
+ :: "r" (eflags) : "flags", "memory");
+}
+
+#endif /* __SELFTESTS_X86_HELPERS_H */
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
deleted file mode 100644
index 254e5436bdd9..000000000000
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PKEYS_HELPER_H
-#define _PKEYS_HELPER_H
-#define _GNU_SOURCE
-#include <string.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-
-#define NR_PKEYS 16
-#define PKRU_BITS_PER_PKEY 2
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
-extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-static inline void sigsafe_printf(const char *format, ...)
-{
- va_list ap;
-
- if (!dprint_in_signal) {
- va_start(ap, format);
- vprintf(format, ap);
- va_end(ap);
- } else {
- int ret;
- /*
- * No printf() functions are signal-safe.
- * They deadlock easily. Write the format
- * string to get some output, even if
- * incomplete.
- */
- ret = write(1, format, strlen(format));
- if (ret < 0)
- exit(1);
- }
-}
-#define dprintf_level(level, args...) do { \
- if (level <= DEBUG_LEVEL) \
- sigsafe_printf(args); \
-} while (0)
-#define dprintf0(args...) dprintf_level(0, args)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-
-extern unsigned int shadow_pkru;
-static inline unsigned int __rdpkru(void)
-{
- unsigned int eax, edx;
- unsigned int ecx = 0;
- unsigned int pkru;
-
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (eax), "=d" (edx)
- : "c" (ecx));
- pkru = eax;
- return pkru;
-}
-
-static inline unsigned int _rdpkru(int line)
-{
- unsigned int pkru = __rdpkru();
-
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
- line, pkru, shadow_pkru);
- assert(pkru == shadow_pkru);
-
- return pkru;
-}
-
-#define rdpkru() _rdpkru(__LINE__)
-
-static inline void __wrpkru(unsigned int pkru)
-{
- unsigned int eax = pkru;
- unsigned int ecx = 0;
- unsigned int edx = 0;
-
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkru == __rdpkru());
-}
-
-static inline void wrpkru(unsigned int pkru)
-{
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- /* will do the shadow check for us: */
- rdpkru();
- __wrpkru(pkru);
- shadow_pkru = pkru;
- dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
-}
-
-/*
- * These are technically racy. since something could
- * change PKRU between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
- unsigned int pkru = rdpkru();
- int bit = pkey * 2;
-
- if (do_allow)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- dprintf4("pkru now: %08x\n", rdpkru());
- wrpkru(pkru);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
- long pkru = rdpkru();
- int bit = pkey * 2 + 1;
-
- if (do_allow_write)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- wrpkru(pkru);
- dprintf4("pkru now: %08x\n", rdpkru());
-}
-
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
-#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
-#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
-#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
-
-#define PAGE_SIZE 4096
-#define MB (1<<20)
-
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
-#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-
-static inline int cpu_has_pku(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
-
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (!(ecx & X86_FEATURE_PKU)) {
- dprintf2("cpu does not have PKU\n");
- return 0;
- }
- if (!(ecx & X86_FEATURE_OSPKE)) {
- dprintf2("cpu does not have OSPKE\n");
- return 0;
- }
- return 1;
-}
-
-#define XSTATE_PKRU_BIT (9)
-#define XSTATE_PKRU 0x200
-
-int pkru_xstate_offset(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int xstate_offset;
- int xstate_size;
- unsigned long XSTATE_CPUID = 0xd;
- int leaf;
-
- /* assume that XSTATE_PKRU is set in XCR0 */
- leaf = XSTATE_PKRU_BIT;
- {
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (leaf == XSTATE_PKRU_BIT) {
- xstate_offset = ebx;
- xstate_size = eax;
- }
- }
-
- if (xstate_size == 0) {
- printf("could not find size/offset of PKRU in xsave state\n");
- return 0;
- }
-
- return xstate_offset;
-}
-
-#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 1063328e275c..120ac741fe44 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -31,6 +31,8 @@
#include <sys/ptrace.h>
#include <sys/user.h>
+#include "helpers.h"
+
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -67,21 +69,6 @@ static unsigned char altstack_data[SIGSTKSZ];
# define INT80_CLOBBERS
#endif
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index bc0ecc2e862e..bff474b5efc6 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -15,30 +15,11 @@
#include <setjmp.h>
#include <errno.h>
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
+#include "helpers.h"
/* Our sigaltstack scratch space. */
static unsigned char altstack_data[SIGSTKSZ];
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -72,6 +53,7 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
if (ax != -EFAULT && ax != -ENOSYS) {
printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
(unsigned long)ax);
+ printf("\tIP = 0x%lx\n", (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
n_errs++;
} else {
printf("[OK]\tSeems okay\n");
@@ -226,5 +208,30 @@ int main()
}
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+#ifdef __x86_64__
+ printf("[RUN]\tSYSENTER with TF, invalid state, and GSBASE < 0\n");
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sigtrap_consecutive_syscalls = 0;
+
+ asm volatile ("wrgsbase %%rax\n\t"
+ :: "a" (0xffffffffffff0000UL));
+
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "sysenter"
+ : : : "memory", "flags");
+ }
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+#endif
+
return 0;
}
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index 02309a195041..a108b80dd082 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -13,29 +13,11 @@
#include <signal.h>
#include <err.h>
#include <sys/syscall.h>
-#include <asm/processor-flags.h>
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
+#include "helpers.h"
static unsigned int nerrs;
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -59,6 +41,7 @@ static void do_it(unsigned long extraflags)
set_eflags(get_eflags() | extraflags);
syscall(SYS_getpid);
flags = get_eflags();
+ set_eflags(X86_EFLAGS_IF | X86_EFLAGS_FIXED);
if ((flags & extraflags) == extraflags) {
printf("[OK]\tThe syscall worked and flags are still set\n");
} else {
@@ -73,6 +56,12 @@ int main(void)
printf("[RUN]\tSet NT and issue a syscall\n");
do_it(X86_EFLAGS_NT);
+ printf("[RUN]\tSet AC and issue a syscall\n");
+ do_it(X86_EFLAGS_AC);
+
+ printf("[RUN]\tSet NT|AC and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_AC);
+
/*
* Now try it again with TF set -- TF forces returns via IRET in all
* cases except non-ptregs-using 64-bit full fast path syscalls.
@@ -80,8 +69,28 @@ int main(void)
sethandler(SIGTRAP, sigtrap, 0);
+ printf("[RUN]\tSet TF and issue a syscall\n");
+ do_it(X86_EFLAGS_TF);
+
printf("[RUN]\tSet NT|TF and issue a syscall\n");
do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
+ printf("[RUN]\tSet AC|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_AC | X86_EFLAGS_TF);
+
+ printf("[RUN]\tSet NT|AC|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_AC | X86_EFLAGS_TF);
+
+ /*
+ * Now try DF. This is evil and it's plausible that we will crash
+ * glibc, but glibc would have to do something rather surprising
+ * for this to happen.
+ */
+ printf("[RUN]\tSet DF and issue a syscall\n");
+ do_it(X86_EFLAGS_DF);
+
+ printf("[RUN]\tSet TF|DF and issue a syscall\n");
+ do_it(X86_EFLAGS_TF | X86_EFLAGS_DF);
+
return nerrs == 0 ? 0 : 1;
}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index a4f4d4cf22c3..65c141ebfbbd 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -20,6 +20,8 @@
#include <setjmp.h>
#include <sys/uio.h>
+#include "helpers.h"
+
#ifdef __x86_64__
# define VSYS(x) (x)
#else
@@ -460,6 +462,17 @@ static int test_vsys_x(void)
return 0;
}
+/*
+ * Debuggers expect ptrace() to be able to peek at the vsyscall page.
+ * Use process_vm_readv() as a proxy for ptrace() to test this. We
+ * want it to work in the vsyscall=emulate case and to fail in the
+ * vsyscall=xonly case.
+ *
+ * It's worth noting that this ABI is a bit nutty. write(2) can't
+ * read from the vsyscall page on any kernel version or mode. The
+ * fact that ptrace() ever worked was a nice courtesy of old kernels,
+ * but the code to support it is fairly gross.
+ */
static int test_process_vm_readv(void)
{
#ifdef __x86_64__
@@ -475,8 +488,12 @@ static int test_process_vm_readv(void)
remote.iov_len = 4096;
ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
if (ret != 4096) {
- printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
- return 0;
+ /*
+ * We expect process_vm_readv() to work if and only if the
+ * vsyscall page is readable.
+ */
+ printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno);
+ return vsyscall_map_r ? 1 : 0;
}
if (vsyscall_map_r) {
@@ -486,6 +503,9 @@ static int test_process_vm_readv(void)
printf("[FAIL]\tIt worked but returned incorrect data\n");
return 1;
}
+ } else {
+ printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n");
+ return 1;
}
#endif
@@ -493,21 +513,8 @@ static int test_process_vm_readv(void)
}
#ifdef __x86_64__
-#define X86_EFLAGS_TF (1UL << 8)
static volatile sig_atomic_t num_vsyscall_traps;
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
-}
-
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t *)ctx_void;
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
index 0075ccd65407..4c311e1af4c7 100644
--- a/tools/testing/selftests/x86/unwind_vdso.c
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -11,6 +11,8 @@
#include <features.h>
#include <stdio.h>
+#include "helpers.h"
+
#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
int main()
@@ -53,27 +55,6 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
err(1, "sigaction");
}
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
-
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static volatile sig_atomic_t nerrs;
static unsigned long sysinfo;
static bool got_sysinfo = false;
diff --git a/tools/usb/Build b/tools/usb/Build
new file mode 100644
index 000000000000..2ad6f9745816
--- /dev/null
+++ b/tools/usb/Build
@@ -0,0 +1,2 @@
+testusb-y += testusb.o
+ffs-test-y += ffs-test.o
diff --git a/tools/usb/Makefile b/tools/usb/Makefile
index 01d758d73b6d..1b128e551b2e 100644
--- a/tools/usb/Makefile
+++ b/tools/usb/Makefile
@@ -1,14 +1,51 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for USB tools
+include ../scripts/Makefile.include
-PTHREAD_LIBS = -lpthread
-WARNINGS = -Wall -Wextra
-CFLAGS = $(WARNINGS) -g -I../include
-LDFLAGS = $(PTHREAD_LIBS)
+bindir ?= /usr/bin
-all: testusb ffs-test
-%: %.c
- $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -Wextra -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(srctree)/tools/include
+override LDFLAGS += -lpthread
+
+ALL_TARGETS := testusb ffs-test
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+TESTUSB_IN := $(OUTPUT)testusb-in.o
+$(TESTUSB_IN): FORCE
+ $(Q)$(MAKE) $(build)=testusb
+$(OUTPUT)testusb: $(TESTUSB_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
+
+FFS_TEST_IN := $(OUTPUT)ffs-test-in.o
+$(FFS_TEST_IN): FORCE
+ $(Q)$(MAKE) $(build)=ffs-test
+$(OUTPUT)ffs-test: $(FFS_TEST_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
clean:
- $(RM) testusb ffs-test
+ rm -f $(ALL_PROGRAMS)
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.o.cmd' -delete
+
+install: $(ALL_PROGRAMS)
+ install -d -m 755 $(DESTDIR)$(bindir); \
+ for program in $(ALL_PROGRAMS); do \
+ install $$program $(DESTDIR)$(bindir); \
+ done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/usb/usbip/doc/usbip.8 b/tools/usb/usbip/doc/usbip.8
index a6097be25d28..a15d20063b98 100644
--- a/tools/usb/usbip/doc/usbip.8
+++ b/tools/usb/usbip/doc/usbip.8
@@ -83,7 +83,9 @@ List local USB devices.
.SH EXAMPLES
client:# usbip list --remote=server
- - List exportable usb devices on the server.
+ - List devices exported by remote server.
+
+ client:# modprobe vhci-hcd
client:# usbip attach --remote=server --busid=1-2
- Connect the remote USB device.
diff --git a/tools/usb/usbip/doc/usbipd.8 b/tools/usb/usbip/doc/usbipd.8
index ac4635db3f03..fb62a756893b 100644
--- a/tools/usb/usbip/doc/usbipd.8
+++ b/tools/usb/usbip/doc/usbipd.8
@@ -73,7 +73,7 @@ USB/IP client can connect and use exported devices.
.SH EXAMPLES
- server:# modprobe usbip
+ server:# modprobe usbip-host
server:# usbipd -D
- Start usbip daemon.
diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c
index d1d8ba2a4a40..ca78aa368476 100644
--- a/tools/usb/usbip/libsrc/usbip_host_common.c
+++ b/tools/usb/usbip/libsrc/usbip_host_common.c
@@ -23,7 +23,7 @@
#include "list.h"
#include "sysfs_utils.h"
-struct udev *udev_context;
+extern struct udev *udev_context;
static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
{
diff --git a/tools/usb/usbip/vudc/vudc_server_example.sh b/tools/usb/usbip/vudc/vudc_server_example.sh
index 2736be64f203..fed53f51ee01 100755
--- a/tools/usb/usbip/vudc/vudc_server_example.sh
+++ b/tools/usb/usbip/vudc/vudc_server_example.sh
@@ -24,7 +24,7 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
-# For more information, please refer to <http://unlicense.org/>
+# For more information, please refer to <https://unlicense.org/>
################################################################################
################################################################################
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 6683b4a70b05..315e85cabeda 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -11,6 +11,7 @@
#include <linux/compiler.h>
#include <linux/types.h>
+#include <linux/list.h>
#include <linux/printk.h>
#include <linux/bug.h>
#include <errno.h>
@@ -109,8 +110,6 @@ static inline void free_page(unsigned long addr)
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
-#define uninitialized_var(x) x = x
-
# ifndef likely
# define likely(x) (__builtin_expect(!!(x), 1))
# endif
@@ -135,10 +134,4 @@ static inline void free_page(unsigned long addr)
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
-/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
-#define list_add_tail(a, b) do {} while (0)
-#define list_del(a) do {} while (0)
-#define list_for_each_entry(a, b, c) while (0)
-/* end of stubs */
-
#endif /* KERNEL_H */
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index b751350d4ce8..5d90254ddae4 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -11,12 +11,11 @@ struct device {
struct virtio_device {
struct device dev;
u64 features;
+ struct list_head vqs;
};
struct virtqueue {
- /* TODO: commented as list macros are empty stubs for now.
- * Broken but enough for virtio_ring.c
- * struct list_head list; */
+ struct list_head list;
void (*callback)(struct virtqueue *vq);
const char *name;
struct virtio_device *vdev;
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index dbf14c1e2188..f2640e505c4e 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -42,16 +42,16 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
(__virtio_test_bit((dev), feature))
/**
- * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * virtio_has_dma_quirk - determine whether this device has the DMA quirk
* @vdev: the device
*/
-static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev)
{
/*
* Note the reverse polarity of the quirk feature (compared to most
* other features), this is for compatibility with legacy systems.
*/
- return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+ return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
}
static inline bool virtio_is_little_endian(struct virtio_device *vdev)
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index b427def67e7e..cb3f29c09aff 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <getopt.h>
+#include <limits.h>
#include <string.h>
#include <poll.h>
#include <sys/eventfd.h>
@@ -18,6 +19,8 @@
#include <linux/virtio_ring.h>
#include "../../drivers/vhost/test.h"
+#define RANDOM_BATCH -1
+
/* Unused */
void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
@@ -43,6 +46,10 @@ struct vdev_info {
struct vhost_memory *mem;
};
+static const struct vhost_vring_file no_backend = { .fd = -1 },
+ backend = { .fd = 1 };
+static const struct vhost_vring_state null_state = {};
+
bool vq_notify(struct virtqueue *vq)
{
struct vq_info *info = vq->priv;
@@ -88,6 +95,19 @@ void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
assert(r >= 0);
}
+static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev)
+{
+ if (info->vq)
+ vring_del_virtqueue(info->vq);
+
+ memset(info->ring, 0, vring_size(num, 4096));
+ vring_init(&info->vring, num, info->ring, 4096);
+ info->vq = __vring_new_virtqueue(info->idx, info->vring, vdev, true,
+ false, vq_notify, vq_callback, "test");
+ assert(info->vq);
+ info->vq->priv = info;
+}
+
static void vq_info_add(struct vdev_info *dev, int num)
{
struct vq_info *info = &dev->vqs[dev->nvqs];
@@ -97,14 +117,7 @@ static void vq_info_add(struct vdev_info *dev, int num)
info->call = eventfd(0, EFD_NONBLOCK);
r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
assert(r >= 0);
- memset(info->ring, 0, vring_size(num, 4096));
- vring_init(&info->vring, num, info->ring, 4096);
- info->vq = vring_new_virtqueue(info->idx,
- info->vring.num, 4096, &dev->vdev,
- true, false, info->ring,
- vq_notify, vq_callback, "test");
- assert(info->vq);
- info->vq->priv = info;
+ vq_reset(info, num, &dev->vdev);
vhost_vq_setup(dev, info);
dev->fds[info->idx].fd = info->call;
dev->fds[info->idx].events = POLLIN;
@@ -116,6 +129,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
int r;
memset(dev, 0, sizeof *dev);
dev->vdev.features = features;
+ INIT_LIST_HEAD(&dev->vdev.vqs);
dev->buf_size = 1024;
dev->buf = malloc(dev->buf_size);
assert(dev->buf);
@@ -152,41 +166,93 @@ static void wait_for_interrupt(struct vdev_info *dev)
}
static void run_test(struct vdev_info *dev, struct vq_info *vq,
- bool delayed, int bufs)
+ bool delayed, int batch, int reset_n, int bufs)
{
struct scatterlist sl;
- long started = 0, completed = 0;
- long completed_before;
+ long started = 0, completed = 0, next_reset = reset_n;
+ long completed_before, started_before;
int r, test = 1;
unsigned len;
long long spurious = 0;
+ const bool random_batch = batch == RANDOM_BATCH;
+
r = ioctl(dev->control, VHOST_TEST_RUN, &test);
assert(r >= 0);
+ if (!reset_n) {
+ next_reset = INT_MAX;
+ }
+
for (;;) {
virtqueue_disable_cb(vq->vq);
completed_before = completed;
+ started_before = started;
do {
- if (started < bufs) {
+ const bool reset = completed > next_reset;
+ if (random_batch)
+ batch = (random() % vq->vring.num) + 1;
+
+ while (started < bufs &&
+ (started - completed) < batch) {
sg_init_one(&sl, dev->buf, dev->buf_size);
r = virtqueue_add_outbuf(vq->vq, &sl, 1,
dev->buf + started,
GFP_ATOMIC);
- if (likely(r == 0)) {
- ++started;
- if (unlikely(!virtqueue_kick(vq->vq)))
+ if (unlikely(r != 0)) {
+ if (r == -ENOSPC &&
+ started > started_before)
+ r = 0;
+ else
r = -1;
+ break;
}
- } else
+
+ ++started;
+
+ if (unlikely(!virtqueue_kick(vq->vq))) {
+ r = -1;
+ break;
+ }
+ }
+
+ if (started >= bufs)
r = -1;
+ if (reset) {
+ r = ioctl(dev->control, VHOST_TEST_SET_BACKEND,
+ &no_backend);
+ assert(!r);
+ }
+
/* Flush out completed bufs if any */
- if (virtqueue_get_buf(vq->vq, &len)) {
+ while (virtqueue_get_buf(vq->vq, &len)) {
++completed;
r = 0;
}
+ if (reset) {
+ struct vhost_vring_state s = { .index = 0 };
+
+ vq_reset(vq, vq->vring.num, &dev->vdev);
+
+ r = ioctl(dev->control, VHOST_GET_VRING_BASE,
+ &s);
+ assert(!r);
+
+ s.num = 0;
+ r = ioctl(dev->control, VHOST_SET_VRING_BASE,
+ &null_state);
+ assert(!r);
+
+ r = ioctl(dev->control, VHOST_TEST_SET_BACKEND,
+ &backend);
+ assert(!r);
+
+ started = completed;
+ while (completed > next_reset)
+ next_reset += completed;
+ }
} while (r == 0);
- if (completed == completed_before)
+ if (completed == completed_before && started == started_before)
++spurious;
assert(completed <= bufs);
assert(started <= bufs);
@@ -203,7 +269,9 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq,
test = 0;
r = ioctl(dev->control, VHOST_TEST_RUN, &test);
assert(r >= 0);
- fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious);
+ fprintf(stderr,
+ "spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n",
+ spurious, started, completed);
}
const char optstring[] = "h";
@@ -245,6 +313,16 @@ const struct option longopts[] = {
.val = 'd',
},
{
+ .name = "batch",
+ .val = 'b',
+ .has_arg = required_argument,
+ },
+ {
+ .name = "reset",
+ .val = 'r',
+ .has_arg = optional_argument,
+ },
+ {
}
};
@@ -255,6 +333,8 @@ static void help(void)
" [--no-event-idx]"
" [--no-virtio-1]"
" [--delayed-interrupt]"
+ " [--batch=random/N]"
+ " [--reset=N]"
"\n");
}
@@ -263,6 +343,7 @@ int main(int argc, char **argv)
struct vdev_info dev;
unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
(1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
+ long batch = 1, reset = 0;
int o;
bool delayed = false;
@@ -289,6 +370,24 @@ int main(int argc, char **argv)
case 'D':
delayed = true;
break;
+ case 'b':
+ if (0 == strcmp(optarg, "random")) {
+ batch = RANDOM_BATCH;
+ } else {
+ batch = strtol(optarg, NULL, 10);
+ assert(batch > 0);
+ assert(batch < (long)INT_MAX + 1);
+ }
+ break;
+ case 'r':
+ if (!optarg) {
+ reset = 1;
+ } else {
+ reset = strtol(optarg, NULL, 10);
+ assert(reset > 0);
+ assert(reset < (long)INT_MAX + 1);
+ }
+ break;
default:
assert(0);
break;
@@ -298,6 +397,6 @@ int main(int argc, char **argv)
done:
vdev_info_init(&dev, features);
vq_info_add(&dev, 256);
- run_test(&dev, &dev.vqs[0], delayed, 0x100000);
+ run_test(&dev, &dev.vqs[0], delayed, batch, reset, 0x100000);
return 0;
}
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
index 293653463303..fa87b58bd5fa 100644
--- a/tools/virtio/vringh_test.c
+++ b/tools/virtio/vringh_test.c
@@ -307,6 +307,7 @@ static int parallel_test(u64 features,
close(to_host[0]);
gvdev.vdev.features = features;
+ INIT_LIST_HEAD(&gvdev.vdev.vqs);
gvdev.to_host_fd = to_host[1];
gvdev.notifies = 0;
@@ -453,6 +454,7 @@ int main(int argc, char *argv[])
getrange = getrange_iov;
vdev.features = 0;
+ INIT_LIST_HEAD(&vdev.vqs);
while (argv[1]) {
if (strcmp(argv[1], "--indirect") == 0)
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 58c0eab71bca..0517c744b04e 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -78,6 +78,7 @@
#define KPF_ARCH 38
#define KPF_UNCACHED 39
#define KPF_SOFTDIRTY 40
+#define KPF_ARCH_2 41
/* [48-] take some arbitrary free slots for expanding overloaded flags
* not part of kernel API
@@ -135,6 +136,7 @@ static const char * const page_flag_names[] = {
[KPF_ARCH] = "h:arch",
[KPF_UNCACHED] = "c:uncached",
[KPF_SOFTDIRTY] = "f:softdirty",
+ [KPF_ARCH_2] = "H:arch_2",
[KPF_READAHEAD] = "I:readahead",
[KPF_SLOB_FREE] = "P:slob_free",
diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
index d3a8755c039c..85eb65ea16d3 100644
--- a/tools/vm/page_owner_sort.c
+++ b/tools/vm/page_owner_sort.c
@@ -4,8 +4,7 @@
*
* Example use:
* cat /sys/kernel/debug/page_owner > page_owner_full.txt
- * grep -v ^PFN page_owner_full.txt > page_owner.txt
- * ./page_owner_sort page_owner.txt sorted_page_owner.txt
+ * ./page_owner_sort page_owner_full.txt sorted_page_owner.txt
*
* See Documentation/vm/page_owner.rst
*/
@@ -38,6 +37,8 @@ int read_block(char *buf, int buf_size, FILE *fin)
while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) {
if (*curr == '\n') /* empty line */
return curr - buf;
+ if (!strncmp(curr, "PFN", 3))
+ continue;
curr += strlen(curr);
}