aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Build1
-rw-r--r--tools/perf/Documentation/guest-files.txt16
-rw-r--r--tools/perf/Documentation/guestmount.txt11
-rw-r--r--tools/perf/Documentation/intel-hybrid.txt10
-rw-r--r--tools/perf/Documentation/perf-buildid-list.txt4
-rw-r--r--tools/perf/Documentation/perf-c2c.txt31
-rw-r--r--tools/perf/Documentation/perf-dlfilter.txt22
-rw-r--r--tools/perf/Documentation/perf-inject.txt21
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt181
-rw-r--r--tools/perf/Documentation/perf-kvm.txt25
-rw-r--r--tools/perf/Documentation/perf-kwork.txt180
-rw-r--r--tools/perf/Documentation/perf-lock.txt55
-rw-r--r--tools/perf/Documentation/perf-record.txt17
-rw-r--r--tools/perf/Documentation/perf-script.txt16
-rw-r--r--tools/perf/Documentation/perf-stat.txt21
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt20
-rw-r--r--tools/perf/Makefile.config46
-rw-r--r--tools/perf/Makefile.perf42
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c2
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c2
-rw-r--r--tools/perf/arch/arm64/util/pmu.c4
-rw-r--r--tools/perf/arch/x86/tests/Build1
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c2
-rw-r--r--tools/perf/arch/x86/tests/intel-cqm.c2
-rw-r--r--tools/perf/arch/x86/tests/rdpmc.c182
-rw-r--r--tools/perf/arch/x86/util/cpuid.h34
-rw-r--r--tools/perf/arch/x86/util/evlist.c64
-rw-r--r--tools/perf/arch/x86/util/evsel.c72
-rw-r--r--tools/perf/arch/x86/util/header.c27
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c2
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c2
-rw-r--r--tools/perf/arch/x86/util/iostat.c2
-rw-r--r--tools/perf/arch/x86/util/topdown.c51
-rw-r--r--tools/perf/arch/x86/util/topdown.h1
-rw-r--r--tools/perf/arch/x86/util/tsc.c77
-rw-r--r--tools/perf/builtin-annotate.c8
-rw-r--r--tools/perf/builtin-buildid-list.c39
-rw-r--r--tools/perf/builtin-c2c.c466
-rw-r--r--tools/perf/builtin-inject.c1046
-rw-r--r--tools/perf/builtin-kvm.c8
-rw-r--r--tools/perf/builtin-kwork.c1832
-rw-r--r--tools/perf/builtin-list.c2
-rw-r--r--tools/perf/builtin-lock.c957
-rw-r--r--tools/perf/builtin-record.c89
-rw-r--r--tools/perf/builtin-report.c6
-rw-r--r--tools/perf/builtin-sched.c26
-rw-r--r--tools/perf/builtin-script.c32
-rw-r--r--tools/perf/builtin-stat.c86
-rw-r--r--tools/perf/builtin-timechart.c1
-rw-r--r--tools/perf/builtin-trace.c5
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/command-list.txt1
-rw-r--r--tools/perf/dlfilters/dlfilter-show-cycles.c4
-rw-r--r--tools/perf/include/perf/perf_dlfilter.h8
-rw-r--r--tools/perf/perf.c1
-rw-r--r--tools/perf/pmu-events/Build21
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv2
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z16/pai.json1101
-rw-r--r--tools/perf/pmu-events/arch/test/test_soc/cpu/metrics.json64
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/cache.json178
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/floating-point.json19
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/frontend.json38
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/other.json97
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/pipeline.json507
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json63
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json130
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/uncore-cache.json152
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/uncore-other.json82
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/uncore.json278
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json142
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json3619
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json2867
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/uncore-other.json1233
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json492
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json576
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/cache.json22
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/memory.json39
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json3433
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json1428
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json2849
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/uncore-other.json3252
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json437
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/cache.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json730
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/other.json63
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json4401
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json22493
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-power.json201
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/cache.json956
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/floating-point.json19
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/frontend.json34
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/memory.json388
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/other.json527
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/pipeline.json203
-rw-r--r--tools/perf/pmu-events/arch/x86/elkhartlake/virtual-memory.json151
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/cache.json78
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json85
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/memory.json75
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/pipeline.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/uncore-other.json7
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/cache.json44
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json496
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/memory.json52
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/pipeline.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json3422
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json1428
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json2839
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/uncore-other.json3170
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json477
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/cache.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json126
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/uncore-other.json31
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/cache.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json697
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/memory.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/other.json51
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/pipeline.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-memory.json1523
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-other.json37134
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-power.json225
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json94
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/uncore-other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json100
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json3301
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json1741
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json1775
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-other.json2398
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json677
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json17
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/pipeline.json16
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json1900
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json824
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json445
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-other.json1538
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json351
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/uncore-memory.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/uncore-other.json4103
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv74
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/cache.json262
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/frontend.json24
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/memory.json185
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/other.json46
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json254
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json46
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/cache.json14
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/memory.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/cache.json2950
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/floating-point.json182
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/frontend.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/memory.json672
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/other.json170
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/pipeline.json830
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/virtual-memory.json92
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/frontend.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/uncore-other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json135
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json16
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json23
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/other.json68
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json99
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json572
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-other.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/virtual-memory.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json178
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/uncore-cache.json142
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/uncore-other.json79
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/uncore.json254
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/other.json66
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/pipeline.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json673
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json3567
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json22316
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-power.json201
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/cache.json (renamed from tools/perf/pmu-events/arch/x86/tremontx/cache.json)60
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/floating-point.json (renamed from tools/perf/pmu-events/arch/x86/tremontx/floating-point.json)9
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/frontend.json (renamed from tools/perf/pmu-events/arch/x86/tremontx/frontend.json)20
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/memory.json (renamed from tools/perf/pmu-events/arch/x86/tremontx/memory.json)4
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/other.json (renamed from tools/perf/pmu-events/arch/x86/tremontx/other.json)18
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/pipeline.json (renamed from tools/perf/pmu-events/arch/x86/tremontx/pipeline.json)98
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-memory.json619
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-other.json25192
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-power.json235
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/virtual-memory.json (renamed from tools/perf/pmu-events/arch/x86/tremontx/virtual-memory.json)69
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/cache.json48
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/other.json1
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json378
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/uncore-other.json65
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/tremontx/uncore-memory.json245
-rw-r--r--tools/perf/pmu-events/arch/x86/tremontx/uncore-other.json2395
-rw-r--r--tools/perf/pmu-events/arch/x86/tremontx/uncore-power.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/floating-point.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/empty-pmu-events.c342
-rw-r--r--tools/perf/pmu-events/jevents.c1342
-rwxr-xr-xtools/perf/pmu-events/jevents.py725
-rw-r--r--tools/perf/pmu-events/jsmn.c352
-rw-r--r--tools/perf/pmu-events/jsmn.h68
-rw-r--r--tools/perf/pmu-events/json.c162
-rw-r--r--tools/perf/pmu-events/json.h39
-rw-r--r--tools/perf/pmu-events/pmu-events.h40
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Build6
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py34
-rw-r--r--tools/perf/scripts/python/intel-pt-events.py32
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/bpf-script-example.c35
-rw-r--r--tools/perf/tests/builtin-test-list.c207
-rw-r--r--tools/perf/tests/builtin-test-list.h12
-rw-r--r--tools/perf/tests/builtin-test.c152
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/cpumap.c19
-rw-r--r--tools/perf/tests/event-times.c2
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c4
-rw-r--r--tools/perf/tests/expand-cgroup.c25
-rw-r--r--tools/perf/tests/expr.c13
-rw-r--r--tools/perf/tests/hists_cumulate.c2
-rw-r--r--tools/perf/tests/hists_filter.c4
-rw-r--r--tools/perf/tests/hists_link.c4
-rw-r--r--tools/perf/tests/hists_output.c2
-rw-r--r--tools/perf/tests/keep-tracking.c4
-rw-r--r--tools/perf/tests/mmap-basic.c127
-rw-r--r--tools/perf/tests/parse-metric.c77
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c31
-rw-r--r--tools/perf/tests/pmu-events.c474
-rw-r--r--tools/perf/tests/sample-parsing.c14
-rw-r--r--tools/perf/tests/shell/lib/perf_json_output_lint.py96
-rwxr-xr-xtools/perf/tests/shell/record_offcpu.sh57
-rwxr-xr-xtools/perf/tests/shell/stat+csv_output.sh7
-rwxr-xr-xtools/perf/tests/shell/stat+json_output.sh147
-rwxr-xr-xtools/perf/tests/shell/stat.sh19
-rwxr-xr-xtools/perf/tests/shell/stat_all_metrics.sh47
-rwxr-xr-xtools/perf/tests/shell/test_arm_spe.sh30
-rwxr-xr-xtools/perf/tests/shell/test_brstack.sh114
-rw-r--r--tools/perf/tests/switch-tracking.c24
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h16
-rw-r--r--tools/perf/util/Build5
-rw-r--r--tools/perf/util/affinity.c8
-rw-r--r--tools/perf/util/amd-sample-raw.c68
-rw-r--r--tools/perf/util/annotate.c7
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c1
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h12
-rw-r--r--tools/perf/util/arm-spe.c130
-rw-r--r--tools/perf/util/auxtrace.c30
-rw-r--r--tools/perf/util/auxtrace.h4
-rw-r--r--tools/perf/util/bpf-loader.c224
-rw-r--r--tools/perf/util/bpf_kwork.c346
-rw-r--r--tools/perf/util/bpf_lock_contention.c189
-rw-r--r--tools/perf/util/bpf_off_cpu.c53
-rw-r--r--tools/perf/util/bpf_skel/kwork_trace.bpf.c383
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c175
-rw-r--r--tools/perf/util/bpf_skel/off_cpu.bpf.c38
-rw-r--r--tools/perf/util/build-id.c106
-rw-r--r--tools/perf/util/build-id.h16
-rw-r--r--tools/perf/util/callchain.c18
-rw-r--r--tools/perf/util/cpumap.c80
-rw-r--r--tools/perf/util/cpumap.h4
-rw-r--r--tools/perf/util/cs-etm.c2
-rw-r--r--tools/perf/util/data-convert-json.c5
-rw-r--r--tools/perf/util/data.c43
-rw-r--r--tools/perf/util/data.h2
-rw-r--r--tools/perf/util/dlfilter.c2
-rw-r--r--tools/perf/util/dso.h6
-rw-r--r--tools/perf/util/dsos.c15
-rw-r--r--tools/perf/util/env.c62
-rw-r--r--tools/perf/util/env.h14
-rw-r--r--tools/perf/util/event.c1
-rw-r--r--tools/perf/util/event.h48
-rw-r--r--tools/perf/util/events_stats.h2
-rw-r--r--tools/perf/util/evlist.c53
-rw-r--r--tools/perf/util/evlist.h9
-rw-r--r--tools/perf/util/evsel.c70
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/expr.c13
-rw-r--r--tools/perf/util/genelf.c18
-rw-r--r--tools/perf/util/header.c192
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/intel-pt.c183
-rw-r--r--tools/perf/util/jitdump.c9
-rw-r--r--tools/perf/util/kwork.h257
-rw-r--r--tools/perf/util/llvm-utils.c2
-rw-r--r--tools/perf/util/lock-contention.h147
-rw-r--r--tools/perf/util/machine.c57
-rw-r--r--tools/perf/util/machine.h7
-rw-r--r--tools/perf/util/mem-events.c46
-rw-r--r--tools/perf/util/mem-events.h3
-rw-r--r--tools/perf/util/metricgroup.c278
-rw-r--r--tools/perf/util/metricgroup.h5
-rw-r--r--tools/perf/util/ordered-events.h6
-rw-r--r--tools/perf/util/parse-events.c736
-rw-r--r--tools/perf/util/parse-events.h35
-rw-r--r--tools/perf/util/perf_api_probe.c2
-rw-r--r--tools/perf/util/pmu.c154
-rw-r--r--tools/perf/util/pmu.h12
-rw-r--r--tools/perf/util/print-events.c572
-rw-r--r--tools/perf/util/print-events.h22
-rw-r--r--tools/perf/util/probe-event.c8
-rw-r--r--tools/perf/util/record.c4
-rw-r--r--tools/perf/util/record.h1
-rw-r--r--tools/perf/util/s390-sample-raw.c50
-rw-r--r--tools/perf/util/scripting-engines/Build4
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c36
-rw-r--r--tools/perf/util/session.c180
-rw-r--r--tools/perf/util/session.h4
-rw-r--r--tools/perf/util/setup.py12
-rw-r--r--tools/perf/util/stat-display.c383
-rw-r--r--tools/perf/util/stat-shadow.c24
-rw-r--r--tools/perf/util/stat.c1
-rw-r--r--tools/perf/util/stat.h2
-rw-r--r--tools/perf/util/symbol-elf.c71
-rw-r--r--tools/perf/util/symbol.c6
-rw-r--r--tools/perf/util/synthetic-events.c204
-rw-r--r--tools/perf/util/synthetic-events.h4
-rw-r--r--tools/perf/util/thread.c1
-rw-r--r--tools/perf/util/thread.h1
-rw-r--r--tools/perf/util/tool.h3
-rw-r--r--tools/perf/util/topdown.c7
-rw-r--r--tools/perf/util/topdown.h3
-rw-r--r--tools/perf/util/trace-event-info.c96
-rw-r--r--tools/perf/util/tracepoint.c63
-rw-r--r--tools/perf/util/tracepoint.h25
-rw-r--r--tools/perf/util/tsc.h1
-rw-r--r--tools/perf/util/util.c70
-rw-r--r--tools/perf/util/util.h15
445 files changed, 190594 insertions, 15165 deletions
diff --git a/tools/perf/Build b/tools/perf/Build
index db61dbe2b543..496b096153bb 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -25,6 +25,7 @@ perf-y += builtin-data.o
perf-y += builtin-version.o
perf-y += builtin-c2c.o
perf-y += builtin-daemon.o
+perf-y += builtin-kwork.o
perf-$(CONFIG_TRACE) += builtin-trace.o
perf-$(CONFIG_LIBELF) += builtin-probe.o
diff --git a/tools/perf/Documentation/guest-files.txt b/tools/perf/Documentation/guest-files.txt
new file mode 100644
index 000000000000..8cc0b092f996
--- /dev/null
+++ b/tools/perf/Documentation/guest-files.txt
@@ -0,0 +1,16 @@
+include::guestmount.txt[]
+
+--guestkallsyms=<path>::
+ Guest OS /proc/kallsyms file copy. perf reads it to get guest
+ kernel symbols. Users copy it out from guest OS.
+
+--guestmodules=<path>::
+ Guest OS /proc/modules file copy. perf reads it to get guest
+ kernel module information. Users copy it out from guest OS.
+
+--guestvmlinux=<path>::
+ Guest OS kernel vmlinux.
+
+--guest-code::
+ Indicate that guest code can be found in the hypervisor process,
+ which is a common case for KVM test programs.
diff --git a/tools/perf/Documentation/guestmount.txt b/tools/perf/Documentation/guestmount.txt
new file mode 100644
index 000000000000..6edf12363add
--- /dev/null
+++ b/tools/perf/Documentation/guestmount.txt
@@ -0,0 +1,11 @@
+--guestmount=<path>::
+ Guest OS root file system mount directory. Users mount guest OS
+ root directories under <path> by a specific filesystem access method,
+ typically, sshfs.
+ For example, start 2 guest OS, one's pid is 8888 and the other's is 9999:
+[verse]
+ $ mkdir \~/guestmount
+ $ cd \~/guestmount
+ $ sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
+ $ sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
+ $ perf {GMEXAMPLECMD} --guestmount=~/guestmount {GMEXAMPLESUBCMD}
diff --git a/tools/perf/Documentation/intel-hybrid.txt b/tools/perf/Documentation/intel-hybrid.txt
index c9302096dc46..e7a776ad25d7 100644
--- a/tools/perf/Documentation/intel-hybrid.txt
+++ b/tools/perf/Documentation/intel-hybrid.txt
@@ -21,11 +21,6 @@ cat /sys/devices/cpu_atom/cpus
It indicates cpu0-cpu15 are core cpus and cpu16-cpu23 are atom cpus.
-Quickstart
-
-List hybrid event
------------------
-
As before, use perf-list to list the symbolic event.
perf list
@@ -40,7 +35,6 @@ the event is belong to. Same event name but with different pmu can
be supported.
Enable hybrid event with a specific pmu
----------------------------------------
To enable a core only event or atom only event, following syntax is supported:
@@ -53,7 +47,6 @@ For example, count the 'cycles' event on core cpus.
perf stat -e cpu_core/cycles/
Create two events for one hardware event automatically
-------------------------------------------------------
When creating one event and the event is available on both atom and core,
two events are created automatically. One is for atom, the other is for
@@ -132,7 +125,6 @@ For perf-stat result, it displays two events:
The first 'cycles' is core event, the second 'cycles' is atom event.
Thread mode example:
---------------------
perf-stat reports the scaled counts for hybrid event and with a percentage
displayed. The percentage is the event's running time/enabling time.
@@ -176,14 +168,12 @@ perf_event_attr:
604,097,080 cpu_atom/cycles/ (99.57%)
perf-record:
-------------
If there is no '-e' specified in perf record, on hybrid platform,
it creates two default 'cycles' and adds them to event list. One
is for core, the other is for atom.
perf-stat:
-----------
If there is no '-e' specified in perf stat, on hybrid platform,
besides of software events, following events are created and
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
index 25c52efcc7f0..e1e8fdbe06b9 100644
--- a/tools/perf/Documentation/perf-buildid-list.txt
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -33,6 +33,10 @@ OPTIONS
-k::
--kernel::
Show running kernel build id.
+-m::
+--kernel-maps::
+ Show buildid, start/end text address, and path of running kernel and
+ its modules.
-v::
--verbose::
Be more verbose.
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 6f69173731aa..f1f7ae6b08d1 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -109,7 +109,9 @@ REPORT OPTIONS
-d::
--display::
- Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
+ Switch to HITM type (rmt, lcl) or peer snooping type (peer) to display
+ and sort on. Total HITMs (tot) as default, except Arm64 uses peer mode
+ as default.
--stitch-lbr::
Show callgraph with stitched LBRs, which may have more complete
@@ -174,12 +176,18 @@ For each cacheline in the 1) list we display following data:
Cacheline
- cacheline address (hex number)
- Rmt/Lcl Hitm
+ Rmt/Lcl Hitm (Display with HITM types)
- cacheline percentage of all Remote/Local HITM accesses
- LLC Load Hitm - Total, LclHitm, RmtHitm
+ Peer Snoop (Display with peer type)
+ - cacheline percentage of all peer accesses
+
+ LLC Load Hitm - Total, LclHitm, RmtHitm (For display with HITM types)
- count of Total/Local/Remote load HITMs
+ Load Peer - Total, Local, Remote (For display with peer type)
+ - count of Total/Local/Remote load from peer cache or DRAM
+
Total records
- sum of all cachelines accesses
@@ -201,16 +209,21 @@ For each cacheline in the 1) list we display following data:
- count of LLC load accesses, includes LLC hits and LLC HITMs
RMT Load Hit - RmtHit, RmtHitm
- - count of remote load accesses, includes remote hits and remote HITMs
+ - count of remote load accesses, includes remote hits and remote HITMs;
+ on Arm neoverse cores, RmtHit is used to account remote accesses,
+ includes remote DRAM or any upward cache level in remote node
Load Dram - Lcl, Rmt
- count of local and remote DRAM accesses
For each offset in the 2) list we display following data:
- HITM - Rmt, Lcl
+ HITM - Rmt, Lcl (Display with HITM types)
- % of Remote/Local HITM accesses for given offset within cacheline
+ Peer Snoop - Rmt, Lcl (Display with peer type)
+ - % of Remote/Local peer accesses for given offset within cacheline
+
Store Refs - L1 Hit, L1 Miss, N/A
- % of store accesses that hit L1, missed L1 and N/A (no available) memory
level for given offset within cacheline
@@ -227,9 +240,12 @@ For each offset in the 2) list we display following data:
Code address
- code address responsible for the accesses
- cycles - rmt hitm, lcl hitm, load
+ cycles - rmt hitm, lcl hitm, load (Display with HITM types)
- sum of cycles for given accesses - Remote/Local HITM and generic load
+ cycles - rmt peer, lcl peer, load (Display with peer type)
+ - sum of cycles for given accesses - Remote/Local peer load and generic load
+
cpu cnt
- number of cpus that participated on the access
@@ -251,7 +267,8 @@ The 'Node' field displays nodes that accesses given cacheline
offset. Its output comes in 3 flavors:
- node IDs separated by ','
- node IDs with stats for each ID, in following format:
- Node{cpus %hitms %stores}
+ Node{cpus %hitms %stores} (Display with HITM types)
+ Node{cpus %peers %stores} (Display with peer type)
- node IDs with list of affected CPUs in following format:
Node{cpu list}
diff --git a/tools/perf/Documentation/perf-dlfilter.txt b/tools/perf/Documentation/perf-dlfilter.txt
index 594f5a5a0c9e..fb22e3b31dc5 100644
--- a/tools/perf/Documentation/perf-dlfilter.txt
+++ b/tools/perf/Documentation/perf-dlfilter.txt
@@ -107,9 +107,31 @@ struct perf_dlfilter_sample {
__u64 raw_callchain_nr; /* Number of raw_callchain entries */
const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
const char *event;
+ __s32 machine_pid;
+ __s32 vcpu;
};
----
+Note: 'machine_pid' and 'vcpu' are not original members, but were added together later.
+'size' can be used to determine their presence at run time.
+PERF_DLFILTER_HAS_MACHINE_PID will be defined if they are present at compile time.
+For example:
+[source,c]
+----
+#include <perf/perf_dlfilter.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+static inline bool have_machine_pid(const struct perf_dlfilter_sample *sample)
+{
+#ifdef PERF_DLFILTER_HAS_MACHINE_PID
+ return sample->size >= offsetof(struct perf_dlfilter_sample, vcpu) + sizeof(sample->vcpu);
+#else
+ return false;
+#endif
+}
+----
+
The perf_dlfilter_fns structure
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 0570a1ccd344..ffc293fdf61d 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -85,6 +85,27 @@ include::itrace.txt[]
without updating it. Currently this option is supported only by
Intel PT, refer linkperf:perf-intel-pt[1]
+--guest-data=<path>,<pid>[,<time offset>[,<time scale>]]::
+ Insert events from a perf.data file recorded in a virtual machine at
+ the same time as the input perf.data file was recorded on the host.
+ The Process ID (PID) of the QEMU hypervisor process must be provided,
+ and the time offset and time scale (multiplier) will likely be needed
+ to convert guest time stamps into host time stamps. For example, for
+ x86 the TSC Offset and Multiplier could be provided for a virtual machine
+ using Linux command line option no-kvmclock.
+ Currently only mmap, mmap2, comm, task, context_switch, ksymbol,
+ and text_poke events are inserted, as well as build ID information.
+ The QEMU option -name debug-threads=on is needed so that thread names
+ can be used to determine which thread is running which VCPU. Note
+ libvirt seems to use this by default.
+ When using perf record in the guest, option --sample-identifier
+ should be used, and also --buildid-all and --switch-events may be
+ useful.
+
+:GMEXAMPLECMD: inject
+:GMEXAMPLESUBCMD:
+include::guestmount.txt[]
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1],
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 238ab9d3cb93..3dc3f0ccbd51 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -267,7 +267,7 @@ Note that, as with all events, the event is suffixed with event modifiers:
H host
p precise ip
-'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
+'h', 'G' and 'H' are for virtualization which are not used by Intel PT.
'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
meaningful for Intel PT.
@@ -1218,10 +1218,10 @@ XED
include::build-xed.txt[]
-Tracing Virtual Machines
-------------------------
+Tracing Virtual Machines (kernel only)
+--------------------------------------
-Currently, only kernel tracing is supported and only with either "timeless" decoding
+Currently, kernel tracing is supported with either "timeless" decoding
(i.e. no TSC timestamps) or VM Time Correlation. VM Time Correlation is an extra step
using 'perf inject' and requires unchanging VMX TSC Offset and no VMX TSC Scaling.
@@ -1400,6 +1400,179 @@ There were none.
:17006 17006 [001] 11500.262869216: ffffffff8220116e error_entry+0xe ([guest.kernel.kallsyms]) pushq %rax
+Tracing Virtual Machines (including user space)
+-----------------------------------------------
+
+It is possible to use perf record to record sideband events within a virtual machine, so that an Intel PT trace on the host can be decoded.
+Sideband events from the guest perf.data file can be injected into the host perf.data file using perf inject.
+
+Here is an example of the steps needed:
+
+On the guest machine:
+
+Check that no-kvmclock kernel command line option was used to boot:
+
+Note, this is essential to enable time correlation between host and guest machines.
+
+ $ cat /proc/cmdline
+ BOOT_IMAGE=/boot/vmlinuz-5.10.0-16-amd64 root=UUID=cb49c910-e573-47e0-bce7-79e293df8e1d ro no-kvmclock
+
+There is no BPF support at present so, if possible, disable JIT compiling:
+
+ $ echo 0 | sudo tee /proc/sys/net/core/bpf_jit_enable
+ 0
+
+Start perf record to collect sideband events:
+
+ $ sudo perf record -o guest-sideband-testing-guest-perf.data --sample-identifier --buildid-all --switch-events --kcore -a -e dummy
+
+On the host machine:
+
+Start perf record to collect Intel PT trace:
+
+Note, the host trace will get very big, very fast, so the steps from starting to stopping the host trace really need to be done so that they happen in the shortest time possible.
+
+ $ sudo perf record -o guest-sideband-testing-host-perf.data -m,64M --kcore -a -e intel_pt/cyc/
+
+On the guest machine:
+
+Run a small test case, just 'uname' in this example:
+
+ $ uname
+ Linux
+
+On the host machine:
+
+Stop the Intel PT trace:
+
+ ^C
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 76.122 MB guest-sideband-testing-host-perf.data ]
+
+On the guest machine:
+
+Stop the Intel PT trace:
+
+ ^C
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 1.247 MB guest-sideband-testing-guest-perf.data ]
+
+And then copy guest-sideband-testing-guest-perf.data to the host (not shown here).
+
+On the host machine:
+
+With the 2 perf.data recordings, and with their ownership changed to the user.
+
+Identify the TSC Offset:
+
+ $ perf inject -i guest-sideband-testing-host-perf.data --vm-time-correlation=dry-run
+ VMCS: 0x103fc6 TSC Offset 0xfffffa6ae070cb20
+ VMCS: 0x103ff2 TSC Offset 0xfffffa6ae070cb20
+ VMCS: 0x10fdaa TSC Offset 0xfffffa6ae070cb20
+ VMCS: 0x24d57c TSC Offset 0xfffffa6ae070cb20
+
+Correct Intel PT TSC timestamps for the guest machine:
+
+ $ perf inject -i guest-sideband-testing-host-perf.data --vm-time-correlation=0xfffffa6ae070cb20 --force
+
+Identify the guest machine PID:
+
+ $ perf script -i guest-sideband-testing-host-perf.data --no-itrace --show-task-events | grep KVM
+ CPU 0/KVM 0 [000] 0.000000: PERF_RECORD_COMM: CPU 0/KVM:13376/13381
+ CPU 1/KVM 0 [000] 0.000000: PERF_RECORD_COMM: CPU 1/KVM:13376/13382
+ CPU 2/KVM 0 [000] 0.000000: PERF_RECORD_COMM: CPU 2/KVM:13376/13383
+ CPU 3/KVM 0 [000] 0.000000: PERF_RECORD_COMM: CPU 3/KVM:13376/13384
+
+Note, the QEMU option -name debug-threads=on is needed so that thread names
+can be used to determine which thread is running which VCPU as above. libvirt seems to use this by default.
+
+Create a guestmount, assuming the guest machine is 'vm_to_test':
+
+ $ mkdir -p ~/guestmount/13376
+ $ sshfs -o direct_io vm_to_test:/ ~/guestmount/13376
+
+Inject the guest perf.data file into the host perf.data file:
+
+Note, due to the guestmount option, guest object files and debug files will be copied into the build ID cache from the guest machine, with the notable exception of VDSO.
+If needed, VDSO can be copied manually in a fashion similar to that used by the perf-archive script.
+
+ $ perf inject -i guest-sideband-testing-host-perf.data -o inj --guestmount ~/guestmount --guest-data=guest-sideband-testing-guest-perf.data,13376,0xfffffa6ae070cb20
+
+Show an excerpt from the result. In this case the CPU and time range have been to chosen to show interaction between guest and host when 'uname' is starting to run on the guest machine:
+
+Notes:
+
+ - the CPU displayed, [002] in this case, is always the host CPU
+ - events happening in the virtual machine start with VM:13376 VCPU:003, which shows the hypervisor PID 13376 and the VCPU number
+ - only calls and errors are displayed i.e. --itrace=ce
+ - branches entering and exiting the virtual machine are split, and show as 2 branches to/from "0 [unknown] ([unknown])"
+
+ $ perf script -i inj --itrace=ce -F+machine_pid,+vcpu,+addr,+pid,+tid,-period --ns --time 7919.408803365,7919.408804631 -C 2
+ CPU 3/KVM 13376/13384 [002] 7919.408803365: branches: ffffffffc0f8ebe0 vmx_vcpu_enter_exit+0xc0 ([kernel.kallsyms]) => ffffffffc0f8edc0 __vmx_vcpu_run+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803365: branches: ffffffffc0f8edd5 __vmx_vcpu_run+0x15 ([kernel.kallsyms]) => ffffffffc0f8eca0 vmx_update_host_rsp+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803365: branches: ffffffffc0f8ee1b __vmx_vcpu_run+0x5b ([kernel.kallsyms]) => ffffffffc0f8ed60 vmx_vmenter+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803461: branches: ffffffffc0f8ed62 vmx_vmenter+0x2 ([kernel.kallsyms]) => 0 [unknown] ([unknown])
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408803461: branches: 0 [unknown] ([unknown]) => 7f851c9b5a5c init_cacheinfo+0x3ac (/usr/lib/x86_64-linux-gnu/libc-2.31.so)
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408803567: branches: 7f851c9b5a5a init_cacheinfo+0x3aa (/usr/lib/x86_64-linux-gnu/libc-2.31.so) => 0 [unknown] ([unknown])
+ CPU 3/KVM 13376/13384 [002] 7919.408803567: branches: 0 [unknown] ([unknown]) => ffffffffc0f8ed80 vmx_vmexit+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803596: branches: ffffffffc0f6619a vmx_vcpu_run+0x26a ([kernel.kallsyms]) => ffffffffb2255c60 x86_virt_spec_ctrl+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803801: branches: ffffffffc0f66445 vmx_vcpu_run+0x515 ([kernel.kallsyms]) => ffffffffb2290b30 native_write_msr+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803850: branches: ffffffffc0f661f8 vmx_vcpu_run+0x2c8 ([kernel.kallsyms]) => ffffffffc1092300 kvm_load_host_xsave_state+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803850: branches: ffffffffc1092327 kvm_load_host_xsave_state+0x27 ([kernel.kallsyms]) => ffffffffc1092220 kvm_load_host_xsave_state.part.0+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803862: branches: ffffffffc0f662cf vmx_vcpu_run+0x39f ([kernel.kallsyms]) => ffffffffc0f63f90 vmx_recover_nmi_blocking+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803862: branches: ffffffffc0f662e9 vmx_vcpu_run+0x3b9 ([kernel.kallsyms]) => ffffffffc0f619a0 __vmx_complete_interrupts+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803872: branches: ffffffffc109cfb2 vcpu_enter_guest+0x752 ([kernel.kallsyms]) => ffffffffc0f5f570 vmx_handle_exit_irqoff+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803881: branches: ffffffffc109d028 vcpu_enter_guest+0x7c8 ([kernel.kallsyms]) => ffffffffb234f900 __srcu_read_lock+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803897: branches: ffffffffc109d06f vcpu_enter_guest+0x80f ([kernel.kallsyms]) => ffffffffc0f72e30 vmx_handle_exit+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803897: branches: ffffffffc0f72e3d vmx_handle_exit+0xd ([kernel.kallsyms]) => ffffffffc0f727c0 __vmx_handle_exit+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803897: branches: ffffffffc0f72b15 __vmx_handle_exit+0x355 ([kernel.kallsyms]) => ffffffffc0f60ae0 vmx_flush_pml_buffer+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803903: branches: ffffffffc0f72994 __vmx_handle_exit+0x1d4 ([kernel.kallsyms]) => ffffffffc10b7090 kvm_emulate_cpuid+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803903: branches: ffffffffc10b70f1 kvm_emulate_cpuid+0x61 ([kernel.kallsyms]) => ffffffffc10b6e10 kvm_cpuid+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803941: branches: ffffffffc10b7125 kvm_emulate_cpuid+0x95 ([kernel.kallsyms]) => ffffffffc1093110 kvm_skip_emulated_instruction+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803941: branches: ffffffffc109311f kvm_skip_emulated_instruction+0xf ([kernel.kallsyms]) => ffffffffc0f5e180 vmx_get_rflags+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803951: branches: ffffffffc109312a kvm_skip_emulated_instruction+0x1a ([kernel.kallsyms]) => ffffffffc0f5fd30 vmx_skip_emulated_instruction+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803951: branches: ffffffffc0f5fd79 vmx_skip_emulated_instruction+0x49 ([kernel.kallsyms]) => ffffffffc0f5fb50 skip_emulated_instruction+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803956: branches: ffffffffc0f5fc68 skip_emulated_instruction+0x118 ([kernel.kallsyms]) => ffffffffc0f6a940 vmx_cache_reg+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803964: branches: ffffffffc0f5fc11 skip_emulated_instruction+0xc1 ([kernel.kallsyms]) => ffffffffc0f5f9e0 vmx_set_interrupt_shadow+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803980: branches: ffffffffc109f8b1 vcpu_run+0x71 ([kernel.kallsyms]) => ffffffffc10ad2f0 kvm_cpu_has_pending_timer+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803980: branches: ffffffffc10ad2fb kvm_cpu_has_pending_timer+0xb ([kernel.kallsyms]) => ffffffffc10b0490 apic_has_pending_timer+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803991: branches: ffffffffc109f899 vcpu_run+0x59 ([kernel.kallsyms]) => ffffffffc109c860 vcpu_enter_guest+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803993: branches: ffffffffc109cd4c vcpu_enter_guest+0x4ec ([kernel.kallsyms]) => ffffffffc0f69140 vmx_prepare_switch_to_guest+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803996: branches: ffffffffc109cd7d vcpu_enter_guest+0x51d ([kernel.kallsyms]) => ffffffffb234f930 __srcu_read_unlock+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803996: branches: ffffffffc109cd9c vcpu_enter_guest+0x53c ([kernel.kallsyms]) => ffffffffc0f609b0 vmx_sync_pir_to_irr+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408803996: branches: ffffffffc0f60a6d vmx_sync_pir_to_irr+0xbd ([kernel.kallsyms]) => ffffffffc10adc20 kvm_lapic_find_highest_irr+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804010: branches: ffffffffc0f60abd vmx_sync_pir_to_irr+0x10d ([kernel.kallsyms]) => ffffffffc0f60820 vmx_set_rvi+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804019: branches: ffffffffc109ceca vcpu_enter_guest+0x66a ([kernel.kallsyms]) => ffffffffb2249840 fpregs_assert_state_consistent+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804021: branches: ffffffffc109cf10 vcpu_enter_guest+0x6b0 ([kernel.kallsyms]) => ffffffffc0f65f30 vmx_vcpu_run+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804024: branches: ffffffffc0f6603b vmx_vcpu_run+0x10b ([kernel.kallsyms]) => ffffffffb229bed0 __get_current_cr3_fast+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804024: branches: ffffffffc0f66055 vmx_vcpu_run+0x125 ([kernel.kallsyms]) => ffffffffb2253050 cr4_read_shadow+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804030: branches: ffffffffc0f6608d vmx_vcpu_run+0x15d ([kernel.kallsyms]) => ffffffffc10921e0 kvm_load_guest_xsave_state+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804030: branches: ffffffffc1092207 kvm_load_guest_xsave_state+0x27 ([kernel.kallsyms]) => ffffffffc1092110 kvm_load_guest_xsave_state.part.0+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804032: branches: ffffffffc0f660c6 vmx_vcpu_run+0x196 ([kernel.kallsyms]) => ffffffffb22061a0 perf_guest_get_msrs+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804032: branches: ffffffffb22061a9 perf_guest_get_msrs+0x9 ([kernel.kallsyms]) => ffffffffb220cda0 intel_guest_get_msrs+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804039: branches: ffffffffc0f66109 vmx_vcpu_run+0x1d9 ([kernel.kallsyms]) => ffffffffc0f652c0 clear_atomic_switch_msr+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804040: branches: ffffffffc0f66119 vmx_vcpu_run+0x1e9 ([kernel.kallsyms]) => ffffffffc0f73f60 intel_pmu_lbr_is_enabled+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804042: branches: ffffffffc0f73f81 intel_pmu_lbr_is_enabled+0x21 ([kernel.kallsyms]) => ffffffffc10b68e0 kvm_find_cpuid_entry+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804045: branches: ffffffffc0f66454 vmx_vcpu_run+0x524 ([kernel.kallsyms]) => ffffffffc0f61ff0 vmx_update_hv_timer+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804057: branches: ffffffffc0f66142 vmx_vcpu_run+0x212 ([kernel.kallsyms]) => ffffffffc10af100 kvm_wait_lapic_expire+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804057: branches: ffffffffc0f66156 vmx_vcpu_run+0x226 ([kernel.kallsyms]) => ffffffffb2255c60 x86_virt_spec_ctrl+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804057: branches: ffffffffc0f66161 vmx_vcpu_run+0x231 ([kernel.kallsyms]) => ffffffffc0f8eb20 vmx_vcpu_enter_exit+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804057: branches: ffffffffc0f8eb44 vmx_vcpu_enter_exit+0x24 ([kernel.kallsyms]) => ffffffffb2353e10 rcu_note_context_switch+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804057: branches: ffffffffb2353e1c rcu_note_context_switch+0xc ([kernel.kallsyms]) => ffffffffb2353db0 rcu_qs+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804066: branches: ffffffffc0f8ebe0 vmx_vcpu_enter_exit+0xc0 ([kernel.kallsyms]) => ffffffffc0f8edc0 __vmx_vcpu_run+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804066: branches: ffffffffc0f8edd5 __vmx_vcpu_run+0x15 ([kernel.kallsyms]) => ffffffffc0f8eca0 vmx_update_host_rsp+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804066: branches: ffffffffc0f8ee1b __vmx_vcpu_run+0x5b ([kernel.kallsyms]) => ffffffffc0f8ed60 vmx_vmenter+0x0 ([kernel.kallsyms])
+ CPU 3/KVM 13376/13384 [002] 7919.408804162: branches: ffffffffc0f8ed62 vmx_vmenter+0x2 ([kernel.kallsyms]) => 0 [unknown] ([unknown])
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408804162: branches: 0 [unknown] ([unknown]) => 7f851c9b5a5c init_cacheinfo+0x3ac (/usr/lib/x86_64-linux-gnu/libc-2.31.so)
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408804273: branches: 7f851cb7c0e4 _dl_init+0x74 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) => 7f851cb7bf50 call_init.part.0+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so)
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408804526: branches: 55e0c00136f0 _start+0x0 (/usr/bin/uname) => ffffffff83200ac0 asm_exc_page_fault+0x0 ([kernel.kallsyms])
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408804526: branches: ffffffff83200ac3 asm_exc_page_fault+0x3 ([kernel.kallsyms]) => ffffffff83201290 error_entry+0x0 ([kernel.kallsyms])
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408804534: branches: ffffffff832012fa error_entry+0x6a ([kernel.kallsyms]) => ffffffff830b59a0 sync_regs+0x0 ([kernel.kallsyms])
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408804631: branches: ffffffff83200ad9 asm_exc_page_fault+0x19 ([kernel.kallsyms]) => ffffffff830b8210 exc_page_fault+0x0 ([kernel.kallsyms])
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408804631: branches: ffffffff830b82a4 exc_page_fault+0x94 ([kernel.kallsyms]) => ffffffff830b80e0 __kvm_handle_async_pf+0x0 ([kernel.kallsyms])
+ VM:13376 VCPU:003 uname 3404/3404 [002] 7919.408804631: branches: ffffffff830b80ed __kvm_handle_async_pf+0xd ([kernel.kallsyms]) => ffffffff830b80c0 kvm_read_and_reset_apf_flags+0x0 ([kernel.kallsyms])
+
+
Tracing Virtual Machines - Guest Code
-------------------------------------
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index 83c742adf86e..2ad3f5d9f72b 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -77,26 +77,11 @@ OPTIONS
Collect host side performance profile.
--guest::
Collect guest side performance profile.
---guestmount=<path>::
- Guest os root file system mount directory. Users mounts guest os
- root directories under <path> by a specific filesystem access method,
- typically, sshfs. For example, start 2 guest os. The one's pid is 8888
- and the other's is 9999.
- #mkdir ~/guestmount; cd ~/guestmount
- #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
- #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
- #perf kvm --host --guest --guestmount=~/guestmount top
---guestkallsyms=<path>::
- Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
- kernel symbols. Users copy it out from guest os.
---guestmodules=<path>::
- Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
- kernel module information. Users copy it out from guest os.
---guestvmlinux=<path>::
- Guest os kernel vmlinux.
---guest-code::
- Indicate that guest code can be found in the hypervisor process,
- which is a common case for KVM test programs.
+
+:GMEXAMPLECMD: kvm --host --guest
+:GMEXAMPLESUBCMD: top
+include::guest-files.txt[]
+
-v::
--verbose::
Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/Documentation/perf-kwork.txt b/tools/perf/Documentation/perf-kwork.txt
new file mode 100644
index 000000000000..3c36324712b6
--- /dev/null
+++ b/tools/perf/Documentation/perf-kwork.txt
@@ -0,0 +1,180 @@
+perf-kowrk(1)
+=============
+
+NAME
+----
+perf-kwork - Tool to trace/measure kernel work properties (latencies)
+
+SYNOPSIS
+--------
+[verse]
+'perf kwork' {record}
+
+DESCRIPTION
+-----------
+There are several variants of 'perf kwork':
+
+ 'perf kwork record <command>' to record the kernel work
+ of an arbitrary workload.
+
+ 'perf kwork report' to report the per kwork runtime.
+
+ 'perf kwork latency' to report the per kwork latencies.
+
+ 'perf kwork timehist' provides an analysis of kernel work events.
+
+ Example usage:
+ perf kwork record -- sleep 1
+ perf kwork report
+ perf kwork report -b
+ perf kwork latency
+ perf kwork latency -b
+ perf kwork timehist
+
+ By default it shows the individual work events such as irq, workqeueu,
+ including the run time and delay (time between raise and actually entry):
+
+ Runtime start Runtime end Cpu Kwork name Runtime Delaytime
+ (TYPE)NAME:NUM (msec) (msec)
+ ----------------- ----------------- ------ ------------------------- ---------- ----------
+ 1811186.976062 1811186.976327 [0000] (s)RCU:9 0.266 0.114
+ 1811186.978452 1811186.978547 [0000] (s)SCHED:7 0.095 0.171
+ 1811186.980327 1811186.980490 [0000] (s)SCHED:7 0.162 0.083
+ 1811186.981221 1811186.981271 [0000] (s)SCHED:7 0.050 0.077
+ 1811186.984267 1811186.984318 [0000] (s)SCHED:7 0.051 0.075
+ 1811186.987252 1811186.987315 [0000] (s)SCHED:7 0.063 0.081
+ 1811186.987785 1811186.987843 [0006] (s)RCU:9 0.058 0.645
+ 1811186.988319 1811186.988383 [0000] (s)SCHED:7 0.064 0.143
+ 1811186.989404 1811186.989607 [0002] (s)TIMER:1 0.203 0.111
+ 1811186.989660 1811186.989732 [0002] (s)SCHED:7 0.072 0.310
+ 1811186.991295 1811186.991407 [0002] eth0:10 0.112
+ 1811186.991639 1811186.991734 [0002] (s)NET_RX:3 0.095 0.277
+ 1811186.989860 1811186.991826 [0002] (w)vmstat_shepherd 1.966 0.345
+ ...
+
+ Times are in msec.usec.
+
+OPTIONS
+-------
+-D::
+--dump-raw-trace=::
+ Display verbose dump of the sched data.
+
+-f::
+--force::
+ Don't complain, do it.
+
+-k::
+--kwork::
+ List of kwork to profile (irq, softirq, workqueue, etc)
+
+-v::
+--verbose::
+ Be more verbose. (show symbol address, etc)
+
+OPTIONS for 'perf kwork report'
+----------------------------
+
+-b::
+--use-bpf::
+ Use BPF to measure kwork runtime
+
+-C::
+--cpu::
+ Only show events for the given CPU(s) (comma separated list).
+
+-i::
+--input::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-n::
+--name::
+ Only show events for the given name.
+
+-s::
+--sort::
+ Sort by key(s): runtime, max, count
+
+-S::
+--with-summary::
+ Show summary with statistics
+
+--time::
+ Only analyze samples within given time window: <start>,<stop>. Times
+ have the format seconds.microseconds. If start is not given (i.e., time
+ string is ',x.y') then analysis starts at the beginning of the file. If
+ stop time is not given (i.e, time string is 'x.y,') then analysis goes
+ to end of file.
+
+OPTIONS for 'perf kwork latency'
+----------------------------
+
+-b::
+--use-bpf::
+ Use BPF to measure kwork latency
+
+-C::
+--cpu::
+ Only show events for the given CPU(s) (comma separated list).
+
+-i::
+--input::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-n::
+--name::
+ Only show events for the given name.
+
+-s::
+--sort::
+ Sort by key(s): avg, max, count
+
+--time::
+ Only analyze samples within given time window: <start>,<stop>. Times
+ have the format seconds.microseconds. If start is not given (i.e., time
+ string is ',x.y') then analysis starts at the beginning of the file. If
+ stop time is not given (i.e, time string is 'x.y,') then analysis goes
+ to end of file.
+
+OPTIONS for 'perf kwork timehist'
+---------------------------------
+
+-C::
+--cpu::
+ Only show events for the given CPU(s) (comma separated list).
+
+-g::
+--call-graph::
+ Display call chains if present (default off).
+
+-i::
+--input::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-k::
+--vmlinux=<file>::
+ Vmlinux pathname
+
+-n::
+--name::
+ Only show events for the given name.
+
+--kallsyms=<file>::
+ Kallsyms pathname
+
+--max-stack::
+ Maximum number of functions to display in backtrace, default 5.
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+--time::
+ Only analyze samples within given time window: <start>,<stop>. Times
+ have the format seconds.microseconds. If start is not given (i.e., time
+ string is ',x.y') then analysis starts at the beginning of the file. If
+ stop time is not given (i.e, time string is 'x.y,') then analysis goes
+ to end of file.
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 656b537b2fba..193c5d8b8db9 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -8,7 +8,7 @@ perf-lock - Analyze lock events
SYNOPSIS
--------
[verse]
-'perf lock' {record|report|script|info}
+'perf lock' {record|report|script|info|contention}
DESCRIPTION
-----------
@@ -27,6 +27,8 @@ and statistics with this 'perf lock' command.
'perf lock info' shows metadata like threads or addresses
of lock instances.
+ 'perf lock contention' shows contention statistics.
+
COMMON OPTIONS
--------------
@@ -46,6 +48,13 @@ COMMON OPTIONS
--force::
Don't complain, do it.
+--vmlinux=<file>::
+ vmlinux pathname
+
+--kallsyms=<file>::
+ kallsyms pathname
+
+
REPORT OPTIONS
--------------
@@ -96,6 +105,50 @@ INFO OPTIONS
--map::
dump map of lock instances (address:name table)
+CONTENTION OPTIONS
+--------------
+
+-k::
+--key=<value>::
+ Sorting key. Possible values: contended, wait_total (default),
+ wait_max, wait_min, avg_wait.
+
+-F::
+--field=<value>::
+ Output fields. By default it shows all but the wait_min fields
+ and users can customize that using this. Possible values:
+ contended, wait_total, wait_max, wait_min, avg_wait.
+
+-t::
+--threads::
+ Show per-thread lock contention stat
+
+-b::
+--use-bpf::
+ Use BPF program to collect lock contention stats instead of
+ using the input data.
+
+-a::
+--all-cpus::
+ System-wide collection from all CPUs.
+
+-C::
+--cpu::
+ Collect samples only on the list of CPUs provided. Multiple CPUs can be
+ provided as a comma-separated list with no space: 0,1. Ranges of CPUs
+ are specified with -: 0-2. Default is to monitor all CPUs.
+
+-p::
+--pid=::
+ Record events on existing process ID (comma separated list).
+
+--tid=::
+ Record events on existing thread ID (comma separated list).
+
+--map-nr-entries::
+ Maximum number of BPF map entries (default: 10240).
+
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index cf8ad50f3de1..0228efc96686 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -275,6 +275,11 @@ OPTIONS
User can change the size by passing the size after comma like
"--call-graph dwarf,4096".
+ When "fp" recording is used, perf tries to save stack enties
+ up to the number specified in sysctl.kernel.perf_event_max_stack
+ by default. User can change the number by passing it after comma
+ like "--call-graph fp,32".
+
-q::
--quiet::
Don't print any message, useful for scripting.
@@ -313,6 +318,11 @@ OPTIONS
--sample-cpu::
Record the sample cpu.
+--sample-identifier::
+ Record the sample identifier i.e. PERF_SAMPLE_IDENTIFIER bit set in
+ the sample_type member of the struct perf_event_attr argument to the
+ perf_event_open system call.
+
-n::
--no-samples::
Don't sample.
@@ -387,6 +397,9 @@ following filters are defined:
- abort_tx: only when the target is a hardware transaction abort
- cond: conditional branches
- save_type: save branch type during sampling in case binary is not available later
+ For the platforms with Intel Arch LBR support (12th-Gen+ client or
+ 4th-Gen Xeon+ server), the save branch type is unconditionally enabled
+ when the taken branch stack sampling is enabled.
+
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
@@ -747,8 +760,6 @@ events in data directory files. Option specified with no or empty value
defaults to CPU layout. Masks defined or provided by the option value are
filtered through the mask provided by -C option.
-include::intel-hybrid.txt[]
-
--debuginfod[=URLs]::
Specify debuginfod URL to be used when cacheing perf.data binaries,
it follows the same syntax as the DEBUGINFOD_URLS variable, like:
@@ -768,6 +779,8 @@ include::intel-hybrid.txt[]
only, as of now. So the applications built without the frame
pointer might see bogus addresses.
+include::intel-hybrid.txt[]
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 1a557ff8f210..68e37de5fae4 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -79,6 +79,9 @@ OPTIONS
--dump-raw-trace=::
Display verbose dump of the trace data.
+--dump-unsorted-raw-trace=::
+ Same as --dump-raw-trace but not sorted in time order.
+
-L::
--Latency=::
Show latency attributes (irqs/preemption disabled, etc).
@@ -130,7 +133,8 @@ OPTIONS
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth,
- phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat.
+ phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat,
+ machine_pid, vcpu.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@@ -223,6 +227,10 @@ OPTIONS
The ipc (instructions per cycle) field is synthesized and may have a value when
Instruction Trace decoding.
+ The machine_pid and vcpu fields are derived from data resulting from using
+ perf inject to insert a perf.data file recorded inside a virtual machine into
+ a perf.data file recorded on the host at the same time.
+
Finally, a user may not set fields to none for all event types.
i.e., -F "" is not allowed.
@@ -499,9 +507,9 @@ include::itrace.txt[]
The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match.
---guest-code::
- Indicate that guest code can be found in the hypervisor process,
- which is a common case for KVM test programs.
+:GMEXAMPLECMD: script
+:GMEXAMPLESUBCMD:
+include::guest-files.txt[]
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index d8a33f4a47c5..d7ff1867feda 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -570,6 +570,27 @@ Additional metrics may be printed with all earlier fields being empty.
include::intel-hybrid.txt[]
+JSON FORMAT
+-----------
+
+With -j, perf stat is able to print out a JSON format output
+that can be used for parsing.
+
+- timestamp : optional usec time stamp in fractions of second (with -I)
+- optional aggregate options:
+ - core : core identifier (with --per-core)
+ - die : die identifier (with --per-die)
+ - socket : socket identifier (with --per-socket)
+ - node : node identifier (with --per-node)
+ - thread : thread identifier (with --per-thread)
+- counter-value : counter value
+- unit : unit of the counter value or empty
+- event : event name
+- variance : optional variance if multiple values are collected (with -r)
+- runtime : run time of counter
+- metric-value : optional metric value
+- metric-unit : optional unit of metric
+
SEE ALSO
--------
linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index f56d0e0fbff6..635ba043fd7d 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -419,18 +419,20 @@ Example:
cpu_core cpu list : 0-15
cpu_atom cpu list : 16-23
- HEADER_HYBRID_CPU_PMU_CAPS = 31,
+ HEADER_PMU_CAPS = 31,
- A list of hybrid CPU PMU capabilities.
+ List of pmu capabilities (except cpu pmu which is already
+ covered by HEADER_CPU_PMU_CAPS). Note that hybrid cpu pmu
+ capabilities are also stored here.
struct {
u32 nr_pmu;
struct {
- u32 nr_cpu_pmu_caps;
+ u32 nr_caps;
{
char name[];
char value[];
- } [nr_cpu_pmu_caps];
+ } [nr_caps];
char pmu_name[];
} [nr_pmu];
};
@@ -607,6 +609,16 @@ struct compressed_event {
char data[];
};
+ PERF_RECORD_FINISHED_INIT = 82,
+
+Marks the end of records for the system, pre-existing threads in system wide
+sessions, etc. Those are the ones prefixed PERF_RECORD_USER_*.
+
+This is used, for instance, to 'perf inject' events after init and before
+regular events, those emitted by the kernel, to support combining guest and
+host records.
+
+
The header is followed by compressed data frame that can be decompressed
into array of perf trace records. The size of the entire compressed event
record including the header is limited by the max value of header.size.
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 73e0762092fe..2171f02daf59 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -241,15 +241,15 @@ endif
# Try different combinations to accommodate systems that only have
# python[2][3]-config in weird combinations in the following order of
# priority from lowest to highest:
-# * python3-config
-# * python-config
# * python2-config as per pep-0394.
+# * python-config
+# * python3-config
# * $(PYTHON)-config (If PYTHON is user supplied but PYTHON_CONFIG isn't)
#
PYTHON_AUTO := python-config
-PYTHON_AUTO := $(if $(call get-executable,python3-config),python3-config,$(PYTHON_AUTO))
-PYTHON_AUTO := $(if $(call get-executable,python-config),python-config,$(PYTHON_AUTO))
PYTHON_AUTO := $(if $(call get-executable,python2-config),python2-config,$(PYTHON_AUTO))
+PYTHON_AUTO := $(if $(call get-executable,python-config),python-config,$(PYTHON_AUTO))
+PYTHON_AUTO := $(if $(call get-executable,python3-config),python3-config,$(PYTHON_AUTO))
# If PYTHON is defined but PYTHON_CONFIG isn't, then take $(PYTHON)-config as if it was the user
# supplied value for PYTHON_CONFIG. Because it's "user supplied", error out if it doesn't exist.
@@ -265,7 +265,7 @@ endif
# defined. get-executable-or-default fails with an error if the first argument is supplied but
# doesn't exist.
override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO))
-override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_AUTO)))
+override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_CONFIG)))
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
@@ -297,8 +297,6 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
-FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
-
CORE_CFLAGS += -fno-omit-frame-pointer
CORE_CFLAGS += -ggdb3
CORE_CFLAGS += -funwind-tables
@@ -328,8 +326,8 @@ ifneq ($(TCMALLOC),)
endif
ifeq ($(FEATURES_DUMP),)
-# We will display at the end of this Makefile.config, using $(call feature_display_entries)
-# As we may retry some feature detection here, see the disassembler-four-args case, for instance
+# We will display at the end of this Makefile.config, using $(call feature_display_entries),
+# as we may retry some feature detection here.
FEATURE_DISPLAY_DEFERRED := 1
include $(srctree)/tools/build/Makefile.feature
else
@@ -342,7 +340,7 @@ endif
ifeq ($(DEBUG),0)
ifeq ($(feature-fortify-source), 1)
- CORE_CFLAGS += -D_FORTIFY_SOURCE=2
+ CORE_CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
endif
endif
@@ -889,6 +887,25 @@ else
endif
endif
+ifneq ($(NO_JEVENTS),1)
+ ifeq ($(wildcard pmu-events/arch/$(SRCARCH)/mapfile.csv),)
+ NO_JEVENTS := 1
+ endif
+endif
+ifneq ($(NO_JEVENTS),1)
+ NO_JEVENTS := 0
+ ifndef PYTHON
+ $(warning No python interpreter disabling jevent generation)
+ NO_JEVENTS := 1
+ else
+ # jevents.py uses f-strings present in Python 3.6 released in Dec. 2016.
+ JEVENTS_PYTHON_GOOD := $(shell $(PYTHON) -c 'import sys;print("1" if(sys.version_info.major >= 3 and sys.version_info.minor >= 6) else "0")' 2> /dev/null)
+ ifneq ($(JEVENTS_PYTHON_GOOD), 1)
+ $(warning Python interpreter too old (older than 3.6) disabling jevent generation)
+ NO_JEVENTS := 1
+ endif
+ endif
+endif
ifndef NO_LIBBFD
ifeq ($(feature-libbfd), 1)
@@ -904,14 +921,13 @@ ifndef NO_LIBBFD
ifeq ($(feature-libbfd-liberty), 1)
EXTLIBS += -lbfd -lopcodes -liberty
- FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
else
ifeq ($(feature-libbfd-liberty-z), 1)
EXTLIBS += -lbfd -lopcodes -liberty -lz
- FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
endif
endif
$(call feature_check,disassembler-four-args)
+ $(call feature_check,disassembler-init-styled)
endif
ifeq ($(feature-libbfd-buildid), 1)
@@ -1025,6 +1041,10 @@ ifeq ($(feature-disassembler-four-args), 1)
CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
endif
+ifeq ($(feature-disassembler-init-styled), 1)
+ CFLAGS += -DDISASM_INIT_STYLED
+endif
+
ifeq (${IS_64_BIT}, 1)
ifndef NO_PERF_READ_VDSO32
$(call feature_check,compile-32)
@@ -1329,7 +1349,7 @@ endif
# re-generate FEATURE-DUMP as we may have called feature_check, found out
# extra libraries to add to LDFLAGS of some other test and then redo those
-# tests, see the block about libbfd, disassembler-four-args, for instance.
+# tests.
$(shell rm -f $(FEATURE_DUMP_FILENAME))
$(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME)))
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 8f738e11356d..bd947885a639 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -651,25 +651,15 @@ strip: $(PROGRAMS) $(OUTPUT)perf
PERF_IN := $(OUTPUT)perf-in.o
-JEVENTS := $(OUTPUT)pmu-events/jevents
-JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
-
PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
-
-export JEVENTS
+export NO_JEVENTS
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
$(PERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=perf
-$(JEVENTS_IN): FORCE
- $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=jevents
-
-$(JEVENTS): $(JEVENTS_IN)
- $(QUIET_LINK)$(HOSTCC) $(JEVENTS_IN) -o $@
-
-$(PMU_EVENTS_IN): $(JEVENTS) FORCE
+$(PMU_EVENTS_IN): FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
@@ -964,11 +954,11 @@ ifndef NO_LIBBPF
$(call QUIET_INSTALL, bpf-headers) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \
- $(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
- $(INSTALL) include/bpf/linux/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'
+ $(INSTALL) include/bpf/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
+ $(INSTALL) include/bpf/linux/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'
$(call QUIET_INSTALL, bpf-examples) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
- $(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+ $(INSTALL) examples/bpf/*.c -m 644 -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
endif
$(call QUIET_INSTALL, perf-archive) \
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -977,13 +967,13 @@ endif
ifndef NO_LIBAUDIT
$(call QUIET_INSTALL, strace/groups) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \
- $(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
+ $(INSTALL) trace/strace/groups/* -m 644 -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
endif
ifndef NO_LIBPERL
$(call QUIET_INSTALL, perl-scripts) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
- $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
- $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
+ $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/*.pl -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \
$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
endif
@@ -1000,22 +990,23 @@ endif
$(INSTALL) $(DLFILTERS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/dlfilters';
$(call QUIET_INSTALL, perf_completion-script) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
- $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+ $(INSTALL) perf-completion.sh -m 644 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
$(call QUIET_INSTALL, perf-tip) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(tip_instdir_SQ)'; \
- $(INSTALL) Documentation/tips.txt -t '$(DESTDIR_SQ)$(tip_instdir_SQ)'
+ $(INSTALL) Documentation/tips.txt -m 644 -t '$(DESTDIR_SQ)$(tip_instdir_SQ)'
install-tests: all install-gtk
$(call QUIET_INSTALL, tests) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
- $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) tests/attr.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) tests/pe-file.exe* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
- $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+ $(INSTALL) tests/attr/* -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
- $(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
+ $(INSTALL) tests/shell/lib/*.sh -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
+ $(INSTALL) tests/shell/lib/*.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
install-bin: install-tools install-tests install-traceevent-plugins
@@ -1038,7 +1029,8 @@ SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
-SKELETONS += $(SKEL_OUT)/off_cpu.skel.h
+SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
+SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@
@@ -1089,7 +1081,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS)
$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
- $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
+ $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)$(LIBJVMTI).so
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 1b54638d53b0..a346d5f3dafa 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -438,7 +438,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
if (opts->full_auxtrace) {
struct evsel *tracking_evsel;
- err = parse_events(evlist, "dummy:u", NULL);
+ err = parse_event(evlist, "dummy:u");
if (err)
goto out;
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 6f4db2ac5420..d4c234076541 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -257,7 +257,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
/* Add dummy event to keep tracking */
- err = parse_events(evlist, "dummy:u", NULL);
+ err = parse_event(evlist, "dummy:u");
if (err)
return err;
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index 79124bba713e..f849b1e88d43 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -3,7 +3,7 @@
#include "../../../util/cpumap.h"
#include "../../../util/pmu.h"
-const struct pmu_events_map *pmu_events_map__find(void)
+const struct pmu_events_table *pmu_events_table__find(void)
{
struct perf_pmu *pmu = NULL;
@@ -18,7 +18,7 @@ const struct pmu_events_map *pmu_events_map__find(void)
if (pmu->cpus->nr != cpu__max_cpu().cpu)
return NULL;
- return perf_pmu__find_map(pmu);
+ return perf_pmu__find_table(pmu);
}
return NULL;
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 28d793390198..70b5bcbc15df 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -2,7 +2,6 @@ perf-$(CONFIG_DWARF_UNWIND) += regs_load.o
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
-perf-y += rdpmc.o
perf-y += sample-parsing.o
perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 64fb73d14d2f..04018b8aa85b 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -3,7 +3,6 @@
#include "tests/tests.h"
#include "arch-tests.h"
-DEFINE_SUITE("x86 rdpmc", rdpmc);
#ifdef HAVE_AUXTRACE_SUPPORT
DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder);
@@ -14,7 +13,6 @@ DEFINE_SUITE("x86 bp modify", bp_modify);
DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing);
struct test_suite *arch_tests[] = {
- &suite__rdpmc,
#ifdef HAVE_DWARF_UNWIND_SUPPORT
&suite__dwarf_unwind,
#endif
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index cb5b2c6c3b3b..360a082fc928 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -56,7 +56,7 @@ int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, in
return TEST_FAIL;
}
- ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
+ ret = parse_event(evlist, "intel_cqm/llc_occupancy/");
if (ret) {
pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
err = TEST_SKIP;
diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
deleted file mode 100644
index 498413ad9c97..000000000000
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ /dev/null
@@ -1,182 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <errno.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include "perf-sys.h"
-#include "debug.h"
-#include "tests/tests.h"
-#include "cloexec.h"
-#include "event.h"
-#include <internal/lib.h> // page_size
-#include "arch-tests.h"
-
-static u64 rdpmc(unsigned int counter)
-{
- unsigned int low, high;
-
- asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
-
- return low | ((u64)high) << 32;
-}
-
-static u64 rdtsc(void)
-{
- unsigned int low, high;
-
- asm volatile("rdtsc" : "=a" (low), "=d" (high));
-
- return low | ((u64)high) << 32;
-}
-
-static u64 mmap_read_self(void *addr)
-{
- struct perf_event_mmap_page *pc = addr;
- u32 seq, idx, time_mult = 0, time_shift = 0;
- u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
-
- do {
- seq = pc->lock;
- barrier();
-
- enabled = pc->time_enabled;
- running = pc->time_running;
-
- if (enabled != running) {
- cyc = rdtsc();
- time_mult = pc->time_mult;
- time_shift = pc->time_shift;
- time_offset = pc->time_offset;
- }
-
- idx = pc->index;
- count = pc->offset;
- if (idx)
- count += rdpmc(idx - 1);
-
- barrier();
- } while (pc->lock != seq);
-
- if (enabled != running) {
- u64 quot, rem;
-
- quot = (cyc >> time_shift);
- rem = cyc & (((u64)1 << time_shift) - 1);
- delta = time_offset + quot * time_mult +
- ((rem * time_mult) >> time_shift);
-
- enabled += delta;
- if (idx)
- running += delta;
-
- quot = count / running;
- rem = count % running;
- count = quot * enabled + (rem * enabled) / running;
- }
-
- return count;
-}
-
-/*
- * If the RDPMC instruction faults then signal this back to the test parent task:
- */
-static void segfault_handler(int sig __maybe_unused,
- siginfo_t *info __maybe_unused,
- void *uc __maybe_unused)
-{
- exit(-1);
-}
-
-static int __test__rdpmc(void)
-{
- volatile int tmp = 0;
- u64 i, loops = 1000;
- int n;
- int fd;
- void *addr;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_INSTRUCTIONS,
- .exclude_kernel = 1,
- };
- u64 delta_sum = 0;
- struct sigaction sa;
- char sbuf[STRERR_BUFSIZE];
-
- sigfillset(&sa.sa_mask);
- sa.sa_sigaction = segfault_handler;
- sa.sa_flags = 0;
- sigaction(SIGSEGV, &sa, NULL);
-
- fd = sys_perf_event_open(&attr, 0, -1, -1,
- perf_event_open_cloexec_flag());
- if (fd < 0) {
- pr_err("Error: sys_perf_event_open() syscall returned "
- "with %d (%s)\n", fd,
- str_error_r(errno, sbuf, sizeof(sbuf)));
- return -1;
- }
-
- addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
- if (addr == (void *)(-1)) {
- pr_err("Error: mmap() syscall returned with (%s)\n",
- str_error_r(errno, sbuf, sizeof(sbuf)));
- goto out_close;
- }
-
- for (n = 0; n < 6; n++) {
- u64 stamp, now, delta;
-
- stamp = mmap_read_self(addr);
-
- for (i = 0; i < loops; i++)
- tmp++;
-
- now = mmap_read_self(addr);
- loops *= 10;
-
- delta = now - stamp;
- pr_debug("%14d: %14Lu\n", n, (long long)delta);
-
- delta_sum += delta;
- }
-
- munmap(addr, page_size);
- pr_debug(" ");
-out_close:
- close(fd);
-
- if (!delta_sum)
- return -1;
-
- return 0;
-}
-
-int test__rdpmc(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
-{
- int status = 0;
- int wret = 0;
- int ret;
- int pid;
-
- pid = fork();
- if (pid < 0)
- return -1;
-
- if (!pid) {
- ret = __test__rdpmc();
-
- exit(ret);
- }
-
- wret = waitpid(pid, &status, 0);
- if (wret < 0 || status)
- return -1;
-
- return 0;
-}
diff --git a/tools/perf/arch/x86/util/cpuid.h b/tools/perf/arch/x86/util/cpuid.h
new file mode 100644
index 000000000000..0a3ae0ace7e9
--- /dev/null
+++ b/tools/perf/arch/x86/util/cpuid.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_CPUID_H
+#define PERF_CPUID_H 1
+
+
+static inline void
+cpuid(unsigned int op, unsigned int op2, unsigned int *a, unsigned int *b,
+ unsigned int *c, unsigned int *d)
+{
+ /*
+ * Preserve %ebx/%rbx register by either placing it in %rdi or saving it
+ * on the stack - x86-64 needs to avoid the stack red zone. In PIC
+ * compilations %ebx contains the address of the global offset
+ * table. %rbx is occasionally used to address stack variables in
+ * presence of dynamic allocas.
+ */
+ asm(
+#if defined(__x86_64__)
+ "mov %%rbx, %%rdi\n"
+ "cpuid\n"
+ "xchg %%rdi, %%rbx\n"
+#else
+ "pushl %%ebx\n"
+ "cpuid\n"
+ "movl %%ebx, %%edi\n"
+ "popl %%ebx\n"
+#endif
+ : "=a"(*a), "=D"(*b), "=c"(*c), "=d"(*d)
+ : "a"(op), "2"(op2));
+}
+
+void get_cpuid_0(char *vendor, unsigned int *lvl);
+
+#endif
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index 68f681ad54c1..cb59ce9b9638 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -3,20 +3,66 @@
#include "util/pmu.h"
#include "util/evlist.h"
#include "util/parse-events.h"
+#include "util/event.h"
+#include "util/pmu-hybrid.h"
#include "topdown.h"
-#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
-#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
+static int ___evlist__add_default_attrs(struct evlist *evlist,
+ struct perf_event_attr *attrs,
+ size_t nr_attrs)
+{
+ struct perf_cpu_map *cpus;
+ struct evsel *evsel, *n;
+ struct perf_pmu *pmu;
+ LIST_HEAD(head);
+ size_t i = 0;
+
+ for (i = 0; i < nr_attrs; i++)
+ event_attr_init(attrs + i);
+
+ if (!perf_pmu__has_hybrid())
+ return evlist__add_attrs(evlist, attrs, nr_attrs);
+
+ for (i = 0; i < nr_attrs; i++) {
+ if (attrs[i].type == PERF_TYPE_SOFTWARE) {
+ evsel = evsel__new(attrs + i);
+ if (evsel == NULL)
+ goto out_delete_partial_list;
+ list_add_tail(&evsel->core.node, &head);
+ continue;
+ }
+
+ perf_pmu__for_each_hybrid_pmu(pmu) {
+ evsel = evsel__new(attrs + i);
+ if (evsel == NULL)
+ goto out_delete_partial_list;
+ evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ cpus = perf_cpu_map__get(pmu->cpus);
+ evsel->core.cpus = cpus;
+ evsel->core.own_cpus = perf_cpu_map__get(cpus);
+ evsel->pmu_name = strdup(pmu->name);
+ list_add_tail(&evsel->core.node, &head);
+ }
+ }
+
+ evlist__splice_list_tail(evlist, &head);
+
+ return 0;
+
+out_delete_partial_list:
+ __evlist__for_each_entry_safe(&head, n, evsel)
+ evsel__delete(evsel);
+ return -1;
+}
-int arch_evlist__add_default_attrs(struct evlist *evlist)
+int arch_evlist__add_default_attrs(struct evlist *evlist,
+ struct perf_event_attr *attrs,
+ size_t nr_attrs)
{
- if (!pmu_have_event("cpu", "slots"))
- return 0;
+ if (nr_attrs)
+ return ___evlist__add_default_attrs(evlist, attrs, nr_attrs);
- if (pmu_have_event("cpu", "topdown-heavy-ops"))
- return parse_events(evlist, TOPDOWN_L2_EVENTS, NULL);
- else
- return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
+ return topdown_parse_events(evlist);
}
struct evsel *arch_evlist__leader(struct list_head *list)
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index 3501399cef35..ea3972d785d1 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -6,6 +6,10 @@
#include "util/pmu.h"
#include "linux/string.h"
#include "evsel.h"
+#include "util/debug.h"
+
+#define IBS_FETCH_L3MISSONLY (1ULL << 59)
+#define IBS_OP_L3MISSONLY (1ULL << 16)
void arch_evsel__set_sample_weight(struct evsel *evsel)
{
@@ -61,3 +65,71 @@ bool arch_evsel__must_be_in_group(const struct evsel *evsel)
(strcasestr(evsel->name, "slots") ||
strcasestr(evsel->name, "topdown"));
}
+
+int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+{
+ u64 event = evsel->core.attr.config & PERF_HW_EVENT_MASK;
+ u64 pmu = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
+ const char *event_name;
+
+ if (event < PERF_COUNT_HW_MAX && evsel__hw_names[event])
+ event_name = evsel__hw_names[event];
+ else
+ event_name = "unknown-hardware";
+
+ /* The PMU type is not required for the non-hybrid platform. */
+ if (!pmu)
+ return scnprintf(bf, size, "%s", event_name);
+
+ return scnprintf(bf, size, "%s/%s/",
+ evsel->pmu_name ? evsel->pmu_name : "cpu",
+ event_name);
+}
+
+static void ibs_l3miss_warn(void)
+{
+ pr_warning(
+"WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled\n"
+"and tagged operation does not cause L3 Miss. This causes sampling period skew.\n");
+}
+
+void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr)
+{
+ struct perf_pmu *evsel_pmu, *ibs_fetch_pmu, *ibs_op_pmu;
+ static int warned_once;
+ /* 0: Uninitialized, 1: Yes, -1: No */
+ static int is_amd;
+
+ if (warned_once || is_amd == -1)
+ return;
+
+ if (!is_amd) {
+ struct perf_env *env = evsel__env(evsel);
+
+ if (!perf_env__cpuid(env) || !env->cpuid ||
+ !strstarts(env->cpuid, "AuthenticAMD")) {
+ is_amd = -1;
+ return;
+ }
+ is_amd = 1;
+ }
+
+ evsel_pmu = evsel__find_pmu(evsel);
+ if (!evsel_pmu)
+ return;
+
+ ibs_fetch_pmu = perf_pmu__find("ibs_fetch");
+ ibs_op_pmu = perf_pmu__find("ibs_op");
+
+ if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) {
+ if (attr->config & IBS_FETCH_L3MISSONLY) {
+ ibs_l3miss_warn();
+ warned_once = 1;
+ }
+ } else if (ibs_op_pmu && ibs_op_pmu->type == evsel_pmu->type) {
+ if (attr->config & IBS_OP_L3MISSONLY) {
+ ibs_l3miss_warn();
+ warned_once = 1;
+ }
+ }
+}
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index 578c8c568ffd..a51444a77a5f 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -9,18 +9,17 @@
#include "../../../util/debug.h"
#include "../../../util/header.h"
+#include "cpuid.h"
-static inline void
-cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
- unsigned int *d)
+void get_cpuid_0(char *vendor, unsigned int *lvl)
{
- __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
- "movl %%ebx, %%esi\n\t.byte 0x5b"
- : "=a" (*a),
- "=S" (*b),
- "=c" (*c),
- "=d" (*d)
- : "a" (op));
+ unsigned int b, c, d;
+
+ cpuid(0, 0, lvl, &b, &c, &d);
+ strncpy(&vendor[0], (char *)(&b), 4);
+ strncpy(&vendor[4], (char *)(&d), 4);
+ strncpy(&vendor[8], (char *)(&c), 4);
+ vendor[12] = '\0';
}
static int
@@ -31,14 +30,10 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt)
int nb;
char vendor[16];
- cpuid(0, &lvl, &b, &c, &d);
- strncpy(&vendor[0], (char *)(&b), 4);
- strncpy(&vendor[4], (char *)(&d), 4);
- strncpy(&vendor[8], (char *)(&c), 4);
- vendor[12] = '\0';
+ get_cpuid_0(vendor, &lvl);
if (lvl >= 1) {
- cpuid(1, &a, &b, &c, &d);
+ cpuid(1, 0, &a, &b, &c, &d);
family = (a >> 8) & 0xf; /* bits 11 - 8 */
model = (a >> 4) & 0xf; /* Bits 7 - 4 */
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index bcccfbade5c6..439c2956f3e7 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -233,7 +233,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
struct evsel *tracking_evsel;
int err;
- err = parse_events(evlist, "dummy:u", NULL);
+ err = parse_event(evlist, "dummy:u");
if (err)
return err;
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 06c2cdfd8f2f..13933020a79e 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -426,7 +426,7 @@ static int intel_pt_track_switches(struct evlist *evlist)
if (!evlist__can_select_event(evlist, sched_switch))
return -EPERM;
- err = parse_events(evlist, sched_switch, NULL);
+ err = parse_event(evlist, sched_switch);
if (err) {
pr_debug2("%s: failed to parse %s, error %d\n",
__func__, sched_switch, err);
diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c
index 792cd75ade33..404de795ec0b 100644
--- a/tools/perf/arch/x86/util/iostat.c
+++ b/tools/perf/arch/x86/util/iostat.c
@@ -316,7 +316,7 @@ static int iostat_event_group(struct evlist *evl,
sprintf(iostat_cmd, iostat_cmd_template,
list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx,
list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx);
- ret = parse_events(evl, iostat_cmd, NULL);
+ ret = parse_event(evl, iostat_cmd);
if (ret)
goto err;
}
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index f81a7cfe4d63..54810f9acd6f 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -3,9 +3,17 @@
#include "api/fs/fs.h"
#include "util/pmu.h"
#include "util/topdown.h"
+#include "util/evlist.h"
+#include "util/debug.h"
+#include "util/pmu-hybrid.h"
#include "topdown.h"
#include "evsel.h"
+#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
+#define TOPDOWN_L1_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/}"
+#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
+#define TOPDOWN_L2_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/,cpu_core/topdown-heavy-ops/,cpu_core/topdown-br-mispredict/,cpu_core/topdown-fetch-lat/,cpu_core/topdown-mem-bound/}"
+
/* Check whether there is a PMU which supports the perf metrics. */
bool topdown_sys_has_perf_metrics(void)
{
@@ -73,3 +81,46 @@ bool arch_topdown_sample_read(struct evsel *leader)
return false;
}
+
+const char *arch_get_topdown_pmu_name(struct evlist *evlist, bool warn)
+{
+ const char *pmu_name;
+
+ if (!perf_pmu__has_hybrid())
+ return "cpu";
+
+ if (!evlist->hybrid_pmu_name) {
+ if (warn)
+ pr_warning("WARNING: default to use cpu_core topdown events\n");
+ evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
+ }
+
+ pmu_name = evlist->hybrid_pmu_name;
+
+ return pmu_name;
+}
+
+int topdown_parse_events(struct evlist *evlist)
+{
+ const char *topdown_events;
+ const char *pmu_name;
+
+ if (!topdown_sys_has_perf_metrics())
+ return 0;
+
+ pmu_name = arch_get_topdown_pmu_name(evlist, false);
+
+ if (pmu_have_event(pmu_name, "topdown-heavy-ops")) {
+ if (!strcmp(pmu_name, "cpu_core"))
+ topdown_events = TOPDOWN_L2_EVENTS_CORE;
+ else
+ topdown_events = TOPDOWN_L2_EVENTS;
+ } else {
+ if (!strcmp(pmu_name, "cpu_core"))
+ topdown_events = TOPDOWN_L1_EVENTS_CORE;
+ else
+ topdown_events = TOPDOWN_L1_EVENTS;
+ }
+
+ return parse_event(evlist, topdown_events);
+}
diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h
index 46bf9273e572..7eb81f042838 100644
--- a/tools/perf/arch/x86/util/topdown.h
+++ b/tools/perf/arch/x86/util/topdown.h
@@ -3,5 +3,6 @@
#define _TOPDOWN_H 1
bool topdown_sys_has_perf_metrics(void);
+int topdown_parse_events(struct evlist *evlist);
#endif
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 559365f8fe52..eb2b5195bd02 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -1,7 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
+#include <math.h>
+#include <string.h>
+#include "../../../util/debug.h"
#include "../../../util/tsc.h"
+#include "cpuid.h"
u64 rdtsc(void)
{
@@ -11,3 +15,76 @@ u64 rdtsc(void)
return low | ((u64)high) << 32;
}
+
+/*
+ * Derive the TSC frequency in Hz from the /proc/cpuinfo, for example:
+ * ...
+ * model name : Intel(R) Xeon(R) Gold 6154 CPU @ 3.00GHz
+ * ...
+ * will return 3000000000.
+ */
+static double cpuinfo_tsc_freq(void)
+{
+ double result = 0;
+ FILE *cpuinfo;
+ char *line = NULL;
+ size_t len = 0;
+
+ cpuinfo = fopen("/proc/cpuinfo", "r");
+ if (!cpuinfo) {
+ pr_err("Failed to read /proc/cpuinfo for TSC frequency");
+ return NAN;
+ }
+ while (getline(&line, &len, cpuinfo) > 0) {
+ if (!strncmp(line, "model name", 10)) {
+ char *pos = strstr(line + 11, " @ ");
+
+ if (pos && sscanf(pos, " @ %lfGHz", &result) == 1) {
+ result *= 1000000000;
+ goto out;
+ }
+ }
+ }
+out:
+ if (fpclassify(result) == FP_ZERO)
+ pr_err("Failed to find TSC frequency in /proc/cpuinfo");
+
+ free(line);
+ fclose(cpuinfo);
+ return result;
+}
+
+double arch_get_tsc_freq(void)
+{
+ unsigned int a, b, c, d, lvl;
+ static bool cached;
+ static double tsc;
+ char vendor[16];
+
+ if (cached)
+ return tsc;
+
+ cached = true;
+ get_cpuid_0(vendor, &lvl);
+ if (!strstr(vendor, "Intel"))
+ return 0;
+
+ /*
+ * Don't support Time Stamp Counter and
+ * Nominal Core Crystal Clock Information Leaf.
+ */
+ if (lvl < 0x15) {
+ tsc = cpuinfo_tsc_freq();
+ return tsc;
+ }
+
+ cpuid(0x15, 0, &a, &b, &c, &d);
+ /* TSC frequency is not enumerated */
+ if (!a || !b || !c) {
+ tsc = cpuinfo_tsc_freq();
+ return tsc;
+ }
+
+ tsc = (double)c * (double)b / (double)a;
+ return tsc;
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 2ffe071dbcff..f839e69492e8 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -50,7 +50,9 @@ struct perf_annotate {
bool use_tui;
#endif
bool use_stdio, use_stdio2;
+#ifdef HAVE_GTK2_SUPPORT
bool use_gtk;
+#endif
bool skip_missing;
bool has_br_stack;
bool group_set;
@@ -526,7 +528,9 @@ int cmd_annotate(int argc, const char **argv)
OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
+#ifdef HAVE_GTK2_SUPPORT
OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
+#endif
#ifdef HAVE_SLANG_SUPPORT
OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
#endif
@@ -614,10 +618,12 @@ int cmd_annotate(int argc, const char **argv)
if (annotate_check_args(&annotate.opts) < 0)
return -EINVAL;
+#ifdef HAVE_GTK2_SUPPORT
if (symbol_conf.show_nr_samples && annotate.use_gtk) {
pr_err("--show-nr-samples is not available in --gtk mode at this time\n");
return ret;
}
+#endif
ret = symbol__validate_sym_arguments();
if (ret)
@@ -656,8 +662,10 @@ int cmd_annotate(int argc, const char **argv)
else if (annotate.use_tui)
use_browser = 1;
#endif
+#ifdef HAVE_GTK2_SUPPORT
else if (annotate.use_gtk)
use_browser = 2;
+#endif
setup_browser(true);
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index cebadd632234..00bfe89f0b5d 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -12,14 +12,44 @@
#include "util/build-id.h"
#include "util/debug.h"
#include "util/dso.h"
+#include "util/map.h"
#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
#include "util/session.h"
#include "util/symbol.h"
#include "util/data.h"
#include <errno.h>
+#include <inttypes.h>
#include <linux/err.h>
+static int buildid__map_cb(struct map *map, void *arg __maybe_unused)
+{
+ const struct dso *dso = map->dso;
+ char bid_buf[SBUILD_ID_SIZE];
+
+ memset(bid_buf, 0, sizeof(bid_buf));
+ if (dso->has_build_id)
+ build_id__sprintf(&dso->bid, bid_buf);
+ printf("%s %16" PRIx64 " %16" PRIx64, bid_buf, map->start, map->end);
+ if (dso->long_name != NULL) {
+ printf(" %s", dso->long_name);
+ } else if (dso->short_name != NULL) {
+ printf(" %s", dso->short_name);
+ }
+ printf("\n");
+
+ return 0;
+}
+
+static void buildid__show_kernel_maps(void)
+{
+ struct machine *machine;
+
+ machine = machine__new_host();
+ machine__for_each_kernel_map(machine, buildid__map_cb, NULL);
+ machine__delete(machine);
+}
+
static int sysfs__fprintf_build_id(FILE *fp)
{
char sbuild_id[SBUILD_ID_SIZE];
@@ -99,6 +129,7 @@ out:
int cmd_buildid_list(int argc, const char **argv)
{
bool show_kernel = false;
+ bool show_kernel_maps = false;
bool with_hits = false;
bool force = false;
const struct option options[] = {
@@ -106,6 +137,8 @@ int cmd_buildid_list(int argc, const char **argv)
OPT_STRING('i', "input", &input_name, "file", "input file name"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_BOOLEAN('k', "kernel", &show_kernel, "Show current kernel build id"),
+ OPT_BOOLEAN('m', "kernel-maps", &show_kernel_maps,
+ "Show build id of current kernel + modules"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_END()
};
@@ -117,8 +150,12 @@ int cmd_buildid_list(int argc, const char **argv)
argc = parse_options(argc, argv, options, buildid_list_usage, 0);
setup_pager();
- if (show_kernel)
+ if (show_kernel) {
return !(sysfs__fprintf_build_id(stdout) > 0);
+ } else if (show_kernel_maps) {
+ buildid__show_kernel_maps();
+ return 0;
+ }
return perf_session__list_build_ids(force, with_hits);
}
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 4898ee57d156..438fc222e213 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -55,6 +55,8 @@ struct c2c_hists {
struct compute_stats {
struct stats lcl_hitm;
struct stats rmt_hitm;
+ struct stats lcl_peer;
+ struct stats rmt_peer;
struct stats load;
};
@@ -113,16 +115,18 @@ struct perf_c2c {
};
enum {
- DISPLAY_LCL,
- DISPLAY_RMT,
- DISPLAY_TOT,
+ DISPLAY_LCL_HITM,
+ DISPLAY_RMT_HITM,
+ DISPLAY_TOT_HITM,
+ DISPLAY_SNP_PEER,
DISPLAY_MAX,
};
static const char *display_str[DISPLAY_MAX] = {
- [DISPLAY_LCL] = "Local",
- [DISPLAY_RMT] = "Remote",
- [DISPLAY_TOT] = "Total",
+ [DISPLAY_LCL_HITM] = "Local HITMs",
+ [DISPLAY_RMT_HITM] = "Remote HITMs",
+ [DISPLAY_TOT_HITM] = "Total HITMs",
+ [DISPLAY_SNP_PEER] = "Peer Snoop",
};
static const struct option c2c_options[] = {
@@ -142,21 +146,29 @@ static void *c2c_he_zalloc(size_t size)
c2c_he->cpuset = bitmap_zalloc(c2c.cpus_cnt);
if (!c2c_he->cpuset)
- return NULL;
+ goto out_free;
c2c_he->nodeset = bitmap_zalloc(c2c.nodes_cnt);
if (!c2c_he->nodeset)
- return NULL;
+ goto out_free;
c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
if (!c2c_he->node_stats)
- return NULL;
+ goto out_free;
init_stats(&c2c_he->cstats.lcl_hitm);
init_stats(&c2c_he->cstats.rmt_hitm);
+ init_stats(&c2c_he->cstats.lcl_peer);
+ init_stats(&c2c_he->cstats.rmt_peer);
init_stats(&c2c_he->cstats.load);
return &c2c_he->he;
+
+out_free:
+ free(c2c_he->nodeset);
+ free(c2c_he->cpuset);
+ free(c2c_he);
+ return NULL;
}
static void c2c_he_free(void *he)
@@ -253,6 +265,10 @@ static void compute_stats(struct c2c_hist_entry *c2c_he,
update_stats(&cstats->rmt_hitm, weight);
else if (stats->lcl_hitm)
update_stats(&cstats->lcl_hitm, weight);
+ else if (stats->rmt_peer)
+ update_stats(&cstats->rmt_peer, weight);
+ else if (stats->lcl_peer)
+ update_stats(&cstats->lcl_peer, weight);
else if (stats->load)
update_stats(&cstats->load, weight);
}
@@ -650,6 +666,9 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \
STAT_FN(rmt_hitm)
STAT_FN(lcl_hitm)
+STAT_FN(rmt_peer)
+STAT_FN(lcl_peer)
+STAT_FN(tot_peer)
STAT_FN(store)
STAT_FN(st_l1hit)
STAT_FN(st_l1miss)
@@ -787,7 +806,7 @@ percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per);
}
-static double percent_hitm(struct c2c_hist_entry *c2c_he)
+static double percent_costly_snoop(struct c2c_hist_entry *c2c_he)
{
struct c2c_hists *hists;
struct c2c_stats *stats;
@@ -800,17 +819,22 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he)
total = &hists->stats;
switch (c2c.display) {
- case DISPLAY_RMT:
+ case DISPLAY_RMT_HITM:
st = stats->rmt_hitm;
tot = total->rmt_hitm;
break;
- case DISPLAY_LCL:
+ case DISPLAY_LCL_HITM:
st = stats->lcl_hitm;
tot = total->lcl_hitm;
break;
- case DISPLAY_TOT:
+ case DISPLAY_TOT_HITM:
st = stats->tot_hitm;
tot = total->tot_hitm;
+ break;
+ case DISPLAY_SNP_PEER:
+ st = stats->tot_peer;
+ tot = total->tot_peer;
+ break;
default:
break;
}
@@ -827,8 +851,8 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he)
})
static int
-percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hist_entry *he)
+percent_costly_snoop_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
{
struct c2c_hist_entry *c2c_he;
int width = c2c_width(fmt, hpp, he->hists);
@@ -836,20 +860,20 @@ percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
double per;
c2c_he = container_of(he, struct c2c_hist_entry, he);
- per = percent_hitm(c2c_he);
+ per = percent_costly_snoop(c2c_he);
return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
}
static int
-percent_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hist_entry *he)
+percent_costly_snoop_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
{
- return percent_color(fmt, hpp, he, percent_hitm);
+ return percent_color(fmt, hpp, he, percent_costly_snoop);
}
static int64_t
-percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
- struct hist_entry *left, struct hist_entry *right)
+percent_costly_snoop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
{
struct c2c_hist_entry *c2c_left;
struct c2c_hist_entry *c2c_right;
@@ -859,8 +883,8 @@ percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
c2c_left = container_of(left, struct c2c_hist_entry, he);
c2c_right = container_of(right, struct c2c_hist_entry, he);
- per_left = percent_hitm(c2c_left);
- per_right = percent_hitm(c2c_right);
+ per_left = percent_costly_snoop(c2c_left);
+ per_right = percent_costly_snoop(c2c_right);
return per_left - per_right;
}
@@ -899,6 +923,8 @@ static double percent_ ## __f(struct c2c_hist_entry *c2c_he) \
PERCENT_FN(rmt_hitm)
PERCENT_FN(lcl_hitm)
+PERCENT_FN(rmt_peer)
+PERCENT_FN(lcl_peer)
PERCENT_FN(st_l1hit)
PERCENT_FN(st_l1miss)
PERCENT_FN(st_na)
@@ -966,6 +992,68 @@ percent_lcl_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
}
static int
+percent_lcl_peer_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, lcl_peer);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_lcl_peer_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_lcl_peer);
+}
+
+static int64_t
+percent_lcl_peer_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, lcl_peer);
+ per_right = PERCENT(right, lcl_peer);
+
+ return per_left - per_right;
+}
+
+static int
+percent_rmt_peer_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, rmt_peer);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_rmt_peer_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_rmt_peer);
+}
+
+static int64_t
+percent_rmt_peer_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, rmt_peer);
+ per_right = PERCENT(right, rmt_peer);
+
+ return per_left - per_right;
+}
+
+static int
percent_stores_l1hit_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
@@ -1142,18 +1230,22 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
advance_hpp(hpp, ret);
switch (c2c.display) {
- case DISPLAY_RMT:
+ case DISPLAY_RMT_HITM:
ret = display_metrics(hpp, stats->rmt_hitm,
c2c_he->stats.rmt_hitm);
break;
- case DISPLAY_LCL:
+ case DISPLAY_LCL_HITM:
ret = display_metrics(hpp, stats->lcl_hitm,
c2c_he->stats.lcl_hitm);
break;
- case DISPLAY_TOT:
+ case DISPLAY_TOT_HITM:
ret = display_metrics(hpp, stats->tot_hitm,
c2c_he->stats.tot_hitm);
break;
+ case DISPLAY_SNP_PEER:
+ ret = display_metrics(hpp, stats->tot_peer,
+ c2c_he->stats.tot_peer);
+ break;
default:
break;
}
@@ -1213,6 +1305,8 @@ __func(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) \
MEAN_ENTRY(mean_rmt_entry, rmt_hitm);
MEAN_ENTRY(mean_lcl_entry, lcl_hitm);
MEAN_ENTRY(mean_load_entry, load);
+MEAN_ENTRY(mean_rmt_peer_entry, rmt_peer);
+MEAN_ENTRY(mean_lcl_peer_entry, lcl_peer);
static int
cpucnt_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -1360,6 +1454,30 @@ static struct c2c_dimension dim_rmt_hitm = {
.width = 7,
};
+static struct c2c_dimension dim_tot_peer = {
+ .header = HEADER_SPAN("------- Load Peer -------", "Total", 2),
+ .name = "tot_peer",
+ .cmp = tot_peer_cmp,
+ .entry = tot_peer_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_lcl_peer = {
+ .header = HEADER_SPAN_LOW("Local"),
+ .name = "lcl_peer",
+ .cmp = lcl_peer_cmp,
+ .entry = lcl_peer_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_rmt_peer = {
+ .header = HEADER_SPAN_LOW("Remote"),
+ .name = "rmt_peer",
+ .cmp = rmt_peer_cmp,
+ .entry = rmt_peer_entry,
+ .width = 7,
+};
+
static struct c2c_dimension dim_cl_rmt_hitm = {
.header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
.name = "cl_rmt_hitm",
@@ -1376,6 +1494,22 @@ static struct c2c_dimension dim_cl_lcl_hitm = {
.width = 7,
};
+static struct c2c_dimension dim_cl_rmt_peer = {
+ .header = HEADER_SPAN("----- Peer -----", "Rmt", 1),
+ .name = "cl_rmt_peer",
+ .cmp = rmt_peer_cmp,
+ .entry = rmt_peer_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_cl_lcl_peer = {
+ .header = HEADER_SPAN_LOW("Lcl"),
+ .name = "cl_lcl_peer",
+ .cmp = lcl_peer_cmp,
+ .entry = lcl_peer_entry,
+ .width = 7,
+};
+
static struct c2c_dimension dim_tot_stores = {
.header = HEADER_BOTH("Total", "Stores"),
.name = "tot_stores",
@@ -1488,17 +1622,18 @@ static struct c2c_dimension dim_tot_loads = {
.width = 7,
};
-static struct c2c_header percent_hitm_header[] = {
- [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
- [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
- [DISPLAY_TOT] = HEADER_BOTH("Tot", "Hitm"),
+static struct c2c_header percent_costly_snoop_header[] = {
+ [DISPLAY_LCL_HITM] = HEADER_BOTH("Lcl", "Hitm"),
+ [DISPLAY_RMT_HITM] = HEADER_BOTH("Rmt", "Hitm"),
+ [DISPLAY_TOT_HITM] = HEADER_BOTH("Tot", "Hitm"),
+ [DISPLAY_SNP_PEER] = HEADER_BOTH("Peer", "Snoop"),
};
-static struct c2c_dimension dim_percent_hitm = {
- .name = "percent_hitm",
- .cmp = percent_hitm_cmp,
- .entry = percent_hitm_entry,
- .color = percent_hitm_color,
+static struct c2c_dimension dim_percent_costly_snoop = {
+ .name = "percent_costly_snoop",
+ .cmp = percent_costly_snoop_cmp,
+ .entry = percent_costly_snoop_entry,
+ .color = percent_costly_snoop_color,
.width = 7,
};
@@ -1520,6 +1655,24 @@ static struct c2c_dimension dim_percent_lcl_hitm = {
.width = 7,
};
+static struct c2c_dimension dim_percent_rmt_peer = {
+ .header = HEADER_SPAN("-- Peer Snoop --", "Rmt", 1),
+ .name = "percent_rmt_peer",
+ .cmp = percent_rmt_peer_cmp,
+ .entry = percent_rmt_peer_entry,
+ .color = percent_rmt_peer_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_lcl_peer = {
+ .header = HEADER_SPAN_LOW("Lcl"),
+ .name = "percent_lcl_peer",
+ .cmp = percent_lcl_peer_cmp,
+ .entry = percent_lcl_peer_entry,
+ .color = percent_lcl_peer_color,
+ .width = 7,
+};
+
static struct c2c_dimension dim_percent_stores_l1hit = {
.header = HEADER_SPAN("------- Store Refs ------", "L1 Hit", 2),
.name = "percent_stores_l1hit",
@@ -1588,12 +1741,6 @@ static struct c2c_dimension dim_dso = {
.se = &sort_dso,
};
-static struct c2c_header header_node[3] = {
- HEADER_LOW("Node"),
- HEADER_LOW("Node{cpus %hitms %stores}"),
- HEADER_LOW("Node{cpu list}"),
-};
-
static struct c2c_dimension dim_node = {
.name = "node",
.cmp = empty_cmp,
@@ -1625,6 +1772,22 @@ static struct c2c_dimension dim_mean_load = {
.width = 8,
};
+static struct c2c_dimension dim_mean_rmt_peer = {
+ .header = HEADER_SPAN("---------- cycles ----------", "rmt peer", 2),
+ .name = "mean_rmt_peer",
+ .cmp = empty_cmp,
+ .entry = mean_rmt_peer_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_mean_lcl_peer = {
+ .header = HEADER_SPAN_LOW("lcl peer"),
+ .name = "mean_lcl_peer",
+ .cmp = empty_cmp,
+ .entry = mean_lcl_peer_entry,
+ .width = 8,
+};
+
static struct c2c_dimension dim_cpucnt = {
.header = HEADER_BOTH("cpu", "cnt"),
.name = "cpucnt",
@@ -1672,8 +1835,13 @@ static struct c2c_dimension *dimensions[] = {
&dim_tot_hitm,
&dim_lcl_hitm,
&dim_rmt_hitm,
+ &dim_tot_peer,
+ &dim_lcl_peer,
+ &dim_rmt_peer,
&dim_cl_lcl_hitm,
&dim_cl_rmt_hitm,
+ &dim_cl_lcl_peer,
+ &dim_cl_rmt_peer,
&dim_tot_stores,
&dim_stores_l1hit,
&dim_stores_l1miss,
@@ -1688,9 +1856,11 @@ static struct c2c_dimension *dimensions[] = {
&dim_ld_rmthit,
&dim_tot_recs,
&dim_tot_loads,
- &dim_percent_hitm,
+ &dim_percent_costly_snoop,
&dim_percent_rmt_hitm,
&dim_percent_lcl_hitm,
+ &dim_percent_rmt_peer,
+ &dim_percent_lcl_peer,
&dim_percent_stores_l1hit,
&dim_percent_stores_l1miss,
&dim_percent_stores_na,
@@ -1703,6 +1873,8 @@ static struct c2c_dimension *dimensions[] = {
&dim_node,
&dim_mean_rmt,
&dim_mean_lcl,
+ &dim_mean_rmt_peer,
+ &dim_mean_lcl_peer,
&dim_mean_load,
&dim_cpucnt,
&dim_srcline,
@@ -1941,18 +2113,22 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
c2c_he = container_of(he, struct c2c_hist_entry, he);
switch (c2c.display) {
- case DISPLAY_LCL:
+ case DISPLAY_LCL_HITM:
he->filtered = filter_display(c2c_he->stats.lcl_hitm,
stats->lcl_hitm);
break;
- case DISPLAY_RMT:
+ case DISPLAY_RMT_HITM:
he->filtered = filter_display(c2c_he->stats.rmt_hitm,
stats->rmt_hitm);
break;
- case DISPLAY_TOT:
+ case DISPLAY_TOT_HITM:
he->filtered = filter_display(c2c_he->stats.tot_hitm,
stats->tot_hitm);
break;
+ case DISPLAY_SNP_PEER:
+ he->filtered = filter_display(c2c_he->stats.tot_peer,
+ stats->tot_peer);
+ break;
default:
break;
}
@@ -1972,15 +2148,17 @@ static inline bool is_valid_hist_entry(struct hist_entry *he)
return true;
switch (c2c.display) {
- case DISPLAY_LCL:
+ case DISPLAY_LCL_HITM:
has_record = !!c2c_he->stats.lcl_hitm;
break;
- case DISPLAY_RMT:
+ case DISPLAY_RMT_HITM:
has_record = !!c2c_he->stats.rmt_hitm;
break;
- case DISPLAY_TOT:
+ case DISPLAY_TOT_HITM:
has_record = !!c2c_he->stats.tot_hitm;
break;
+ case DISPLAY_SNP_PEER:
+ has_record = !!c2c_he->stats.tot_peer;
default:
break;
}
@@ -2069,9 +2247,33 @@ static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused)
return 0;
}
+static struct c2c_header header_node_0 = HEADER_LOW("Node");
+static struct c2c_header header_node_1_hitms_stores =
+ HEADER_LOW("Node{cpus %hitms %stores}");
+static struct c2c_header header_node_1_peers_stores =
+ HEADER_LOW("Node{cpus %peers %stores}");
+static struct c2c_header header_node_2 = HEADER_LOW("Node{cpu list}");
+
static void setup_nodes_header(void)
{
- dim_node.header = header_node[c2c.node_info];
+ switch (c2c.node_info) {
+ case 0:
+ dim_node.header = header_node_0;
+ break;
+ case 1:
+ if (c2c.display == DISPLAY_SNP_PEER)
+ dim_node.header = header_node_1_peers_stores;
+ else
+ dim_node.header = header_node_1_hitms_stores;
+ break;
+ case 2:
+ dim_node.header = header_node_2;
+ break;
+ default:
+ break;
+ }
+
+ return;
}
static int setup_nodes(struct perf_session *session)
@@ -2136,13 +2338,14 @@ static int setup_nodes(struct perf_session *session)
}
#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm)
+#define HAS_PEER(__h) ((__h)->stats.lcl_peer || (__h)->stats.rmt_peer)
static int resort_shared_cl_cb(struct hist_entry *he, void *arg __maybe_unused)
{
struct c2c_hist_entry *c2c_he;
c2c_he = container_of(he, struct c2c_hist_entry, he);
- if (HAS_HITMS(c2c_he)) {
+ if (HAS_HITMS(c2c_he) || HAS_PEER(c2c_he)) {
c2c.shared_clines++;
c2c_add_stats(&c2c.shared_clines_stats, &c2c_he->stats);
}
@@ -2202,6 +2405,8 @@ static void print_c2c__display_stats(FILE *out)
fprintf(out, " Load LLC Misses : %10d\n", llc_misses);
fprintf(out, " Load access blocked by data : %10d\n", stats->blk_data);
fprintf(out, " Load access blocked by address : %10d\n", stats->blk_addr);
+ fprintf(out, " Load HIT Local Peer : %10d\n", stats->lcl_peer);
+ fprintf(out, " Load HIT Remote Peer : %10d\n", stats->rmt_peer);
fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.);
fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.);
fprintf(out, " LLC Misses to Remote cache (HIT) : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.);
@@ -2230,6 +2435,7 @@ static void print_shared_cacheline_info(FILE *out)
fprintf(out, " L1D hits on shared lines : %10d\n", stats->ld_l1hit);
fprintf(out, " L2D hits on shared lines : %10d\n", stats->ld_l2hit);
fprintf(out, " LLC hits on shared lines : %10d\n", stats->ld_llchit + stats->lcl_hitm);
+ fprintf(out, " Load hits on peer cache or nodes : %10d\n", stats->lcl_peer + stats->rmt_peer);
fprintf(out, " Locked Access on shared lines : %10d\n", stats->locks);
fprintf(out, " Blocked Access on shared lines : %10d\n", stats->blk_data + stats->blk_addr);
fprintf(out, " Store HITs on shared lines : %10d\n", stats->store);
@@ -2272,13 +2478,22 @@ static void print_pareto(FILE *out)
int ret;
const char *cl_output;
- cl_output = "cl_num,"
- "cl_rmt_hitm,"
- "cl_lcl_hitm,"
- "cl_stores_l1hit,"
- "cl_stores_l1miss,"
- "cl_stores_na,"
- "dcacheline";
+ if (c2c.display != DISPLAY_SNP_PEER)
+ cl_output = "cl_num,"
+ "cl_rmt_hitm,"
+ "cl_lcl_hitm,"
+ "cl_stores_l1hit,"
+ "cl_stores_l1miss,"
+ "cl_stores_na,"
+ "dcacheline";
+ else
+ cl_output = "cl_num,"
+ "cl_rmt_peer,"
+ "cl_lcl_peer,"
+ "cl_stores_l1hit,"
+ "cl_stores_l1miss,"
+ "cl_stores_na,"
+ "dcacheline";
perf_hpp_list__init(&hpp_list);
ret = hpp_list__parse(&hpp_list, cl_output, NULL);
@@ -2314,7 +2529,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
fprintf(out, "%-36s: %s\n", first ? " Events" : "", evsel__name(evsel));
first = false;
}
- fprintf(out, " Cachelines sort on : %s HITMs\n",
+ fprintf(out, " Cachelines sort on : %s\n",
display_str[c2c.display]);
fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort);
}
@@ -2471,7 +2686,7 @@ static int perf_c2c_browser__title(struct hist_browser *browser,
{
scnprintf(bf, size,
"Shared Data Cache Line Table "
- "(%lu entries, sorted on %s HITMs)",
+ "(%lu entries, sorted on %s)",
browser->nr_non_filtered_entries,
display_str[c2c.display]);
return 0;
@@ -2585,7 +2800,7 @@ static int ui_quirks(void)
nodestr = "CL";
}
- dim_percent_hitm.header = percent_hitm_header[c2c.display];
+ dim_percent_costly_snoop.header = percent_costly_snoop_header[c2c.display];
/* Fix the zero line for dcacheline column. */
buf = fill_line("Cacheline", dim_dcacheline.width +
@@ -2669,14 +2884,16 @@ static int setup_callchain(struct evlist *evlist)
static int setup_display(const char *str)
{
- const char *display = str ?: "tot";
+ const char *display = str;
if (!strcmp(display, "tot"))
- c2c.display = DISPLAY_TOT;
+ c2c.display = DISPLAY_TOT_HITM;
else if (!strcmp(display, "rmt"))
- c2c.display = DISPLAY_RMT;
+ c2c.display = DISPLAY_RMT_HITM;
else if (!strcmp(display, "lcl"))
- c2c.display = DISPLAY_LCL;
+ c2c.display = DISPLAY_LCL_HITM;
+ else if (!strcmp(display, "peer"))
+ c2c.display = DISPLAY_SNP_PEER;
else {
pr_err("failed: unknown display type: %s\n", str);
return -1;
@@ -2723,10 +2940,12 @@ static int build_cl_output(char *cl_sort, bool no_source)
}
if (asprintf(&c2c.cl_output,
- "%s%s%s%s%s%s%s%s%s%s",
+ "%s%s%s%s%s%s%s%s%s%s%s%s",
c2c.use_stdio ? "cl_num_empty," : "",
- "percent_rmt_hitm,"
- "percent_lcl_hitm,"
+ c2c.display == DISPLAY_SNP_PEER ? "percent_rmt_peer,"
+ "percent_lcl_peer," :
+ "percent_rmt_hitm,"
+ "percent_lcl_hitm,",
"percent_stores_l1hit,"
"percent_stores_l1miss,"
"percent_stores_na,"
@@ -2734,8 +2953,10 @@ static int build_cl_output(char *cl_sort, bool no_source)
add_pid ? "pid," : "",
add_tid ? "tid," : "",
add_iaddr ? "iaddr," : "",
- "mean_rmt,"
- "mean_lcl,"
+ c2c.display == DISPLAY_SNP_PEER ? "mean_rmt_peer,"
+ "mean_lcl_peer," :
+ "mean_rmt,"
+ "mean_lcl,",
"mean_load,"
"tot_recs,"
"cpucnt,",
@@ -2756,6 +2977,7 @@ err:
static int setup_coalesce(const char *coalesce, bool no_source)
{
const char *c = coalesce ?: coalesce_default;
+ const char *sort_str = NULL;
if (asprintf(&c2c.cl_sort, "offset,%s", c) < 0)
return -ENOMEM;
@@ -2763,12 +2985,16 @@ static int setup_coalesce(const char *coalesce, bool no_source)
if (build_cl_output(c2c.cl_sort, no_source))
return -1;
- if (asprintf(&c2c.cl_resort, "offset,%s",
- c2c.display == DISPLAY_TOT ?
- "tot_hitm" :
- c2c.display == DISPLAY_RMT ?
- "rmt_hitm,lcl_hitm" :
- "lcl_hitm,rmt_hitm") < 0)
+ if (c2c.display == DISPLAY_TOT_HITM)
+ sort_str = "tot_hitm";
+ else if (c2c.display == DISPLAY_RMT_HITM)
+ sort_str = "rmt_hitm,lcl_hitm";
+ else if (c2c.display == DISPLAY_LCL_HITM)
+ sort_str = "lcl_hitm,rmt_hitm";
+ else if (c2c.display == DISPLAY_SNP_PEER)
+ sort_str = "tot_peer";
+
+ if (asprintf(&c2c.cl_resort, "offset,%s", sort_str) < 0)
return -ENOMEM;
pr_debug("coalesce sort fields: %s\n", c2c.cl_sort);
@@ -2814,7 +3040,7 @@ static int perf_c2c__report(int argc, const char **argv)
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
callchain_help, &parse_callchain_opt,
callchain_default_opt),
- OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"),
+ OPT_STRING('d', "display", &display, "Switch HITM output type", "tot,lcl,rmt,peer"),
OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
"coalesce fields: pid,tid,iaddr,dso"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
@@ -2848,27 +3074,39 @@ static int perf_c2c__report(int argc, const char **argv)
data.path = input_name;
data.force = symbol_conf.force;
+ session = perf_session__new(&data, &c2c.tool);
+ if (IS_ERR(session)) {
+ err = PTR_ERR(session);
+ pr_debug("Error creating perf session\n");
+ goto out;
+ }
+
+ /*
+ * Use the 'tot' as default display type if user doesn't specify it;
+ * since Arm64 platform doesn't support HITMs flag, use 'peer' as the
+ * default display type.
+ */
+ if (!display) {
+ if (!strcmp(perf_env__arch(&session->header.env), "arm64"))
+ display = "peer";
+ else
+ display = "tot";
+ }
+
err = setup_display(display);
if (err)
- goto out;
+ goto out_session;
err = setup_coalesce(coalesce, no_source);
if (err) {
pr_debug("Failed to initialize hists\n");
- goto out;
+ goto out_session;
}
err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
if (err) {
pr_debug("Failed to initialize hists\n");
- goto out;
- }
-
- session = perf_session__new(&data, &c2c.tool);
- if (IS_ERR(session)) {
- err = PTR_ERR(session);
- pr_debug("Error creating perf session\n");
- goto out;
+ goto out_session;
}
session->itrace_synth_opts = &itrace_synth_opts;
@@ -2876,7 +3114,7 @@ static int perf_c2c__report(int argc, const char **argv)
err = setup_nodes(session);
if (err) {
pr_err("Failed setup nodes\n");
- goto out;
+ goto out_session;
}
err = mem2node__init(&c2c.mem2node, &session->header.env);
@@ -2909,27 +3147,45 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_mem2node;
}
- output_str = "cl_idx,"
- "dcacheline,"
- "dcacheline_node,"
- "dcacheline_count,"
- "percent_hitm,"
- "tot_hitm,lcl_hitm,rmt_hitm,"
- "tot_recs,"
- "tot_loads,"
- "tot_stores,"
- "stores_l1hit,stores_l1miss,stores_na,"
- "ld_fbhit,ld_l1hit,ld_l2hit,"
- "ld_lclhit,lcl_hitm,"
- "ld_rmthit,rmt_hitm,"
- "dram_lcl,dram_rmt";
-
- if (c2c.display == DISPLAY_TOT)
+ if (c2c.display != DISPLAY_SNP_PEER)
+ output_str = "cl_idx,"
+ "dcacheline,"
+ "dcacheline_node,"
+ "dcacheline_count,"
+ "percent_costly_snoop,"
+ "tot_hitm,lcl_hitm,rmt_hitm,"
+ "tot_recs,"
+ "tot_loads,"
+ "tot_stores,"
+ "stores_l1hit,stores_l1miss,stores_na,"
+ "ld_fbhit,ld_l1hit,ld_l2hit,"
+ "ld_lclhit,lcl_hitm,"
+ "ld_rmthit,rmt_hitm,"
+ "dram_lcl,dram_rmt";
+ else
+ output_str = "cl_idx,"
+ "dcacheline,"
+ "dcacheline_node,"
+ "dcacheline_count,"
+ "percent_costly_snoop,"
+ "tot_peer,lcl_peer,rmt_peer,"
+ "tot_recs,"
+ "tot_loads,"
+ "tot_stores,"
+ "stores_l1hit,stores_l1miss,stores_na,"
+ "ld_fbhit,ld_l1hit,ld_l2hit,"
+ "ld_lclhit,lcl_hitm,"
+ "ld_rmthit,rmt_hitm,"
+ "dram_lcl,dram_rmt";
+
+ if (c2c.display == DISPLAY_TOT_HITM)
sort_str = "tot_hitm";
- else if (c2c.display == DISPLAY_RMT)
+ else if (c2c.display == DISPLAY_RMT_HITM)
sort_str = "rmt_hitm";
- else if (c2c.display == DISPLAY_LCL)
+ else if (c2c.display == DISPLAY_LCL_HITM)
sort_str = "lcl_hitm";
+ else if (c2c.display == DISPLAY_SNP_PEER)
+ sort_str = "tot_peer";
c2c_hists__reinit(&c2c.hists, output_str, sort_str);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 54d4e508a092..2a0f992ca0be 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -26,6 +26,7 @@
#include "util/thread.h"
#include "util/namespaces.h"
#include "util/util.h"
+#include "util/tsc.h"
#include <internal/lib.h>
@@ -35,8 +36,70 @@
#include <linux/list.h>
#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <linux/hash.h>
#include <errno.h>
#include <signal.h>
+#include <inttypes.h>
+
+struct guest_event {
+ struct perf_sample sample;
+ union perf_event *event;
+ char event_buf[PERF_SAMPLE_MAX_SIZE];
+};
+
+struct guest_id {
+ /* hlist_node must be first, see free_hlist() */
+ struct hlist_node node;
+ u64 id;
+ u64 host_id;
+ u32 vcpu;
+};
+
+struct guest_tid {
+ /* hlist_node must be first, see free_hlist() */
+ struct hlist_node node;
+ /* Thread ID of QEMU thread */
+ u32 tid;
+ u32 vcpu;
+};
+
+struct guest_vcpu {
+ /* Current host CPU */
+ u32 cpu;
+ /* Thread ID of QEMU thread */
+ u32 tid;
+};
+
+struct guest_session {
+ char *perf_data_file;
+ u32 machine_pid;
+ u64 time_offset;
+ double time_scale;
+ struct perf_tool tool;
+ struct perf_data data;
+ struct perf_session *session;
+ char *tmp_file_name;
+ int tmp_fd;
+ struct perf_tsc_conversion host_tc;
+ struct perf_tsc_conversion guest_tc;
+ bool copy_kcore_dir;
+ bool have_tc;
+ bool fetched;
+ bool ready;
+ u16 dflt_id_hdr_size;
+ u64 dflt_id;
+ u64 highest_id;
+ /* Array of guest_vcpu */
+ struct guest_vcpu *vcpu;
+ size_t vcpu_cnt;
+ /* Hash table for guest_id */
+ struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
+ /* Hash table for guest_tid */
+ struct hlist_head tids[PERF_EVLIST__HLIST_SIZE];
+ /* Place to stash next guest event */
+ struct guest_event ev;
+};
struct perf_inject {
struct perf_tool tool;
@@ -59,6 +122,7 @@ struct perf_inject {
struct itrace_synth_opts itrace_synth_opts;
char event_copy[PERF_SAMPLE_MAX_SIZE];
struct perf_file_section secs[HEADER_FEAT_BITS];
+ struct guest_session guest_session;
};
struct event_entry {
@@ -698,6 +762,841 @@ found:
return perf_event__repipe(tool, event_sw, &sample_sw, machine);
}
+static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
+{
+ if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
+ return NULL;
+ return &gs->vcpu[vcpu];
+}
+
+static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
+{
+ ssize_t ret = writen(gs->tmp_fd, buf, sz);
+
+ return ret < 0 ? ret : 0;
+}
+
+static int guest_session__repipe(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct guest_session *gs = container_of(tool, struct guest_session, tool);
+
+ return guest_session__output_bytes(gs, event, event->header.size);
+}
+
+static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
+{
+ struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
+ int hash;
+
+ if (!guest_tid)
+ return -ENOMEM;
+
+ guest_tid->tid = tid;
+ guest_tid->vcpu = vcpu;
+ hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
+ hlist_add_head(&guest_tid->node, &gs->tids[hash]);
+
+ return 0;
+}
+
+static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
+ union perf_event *event,
+ u64 offset __maybe_unused, void *data)
+{
+ struct guest_session *gs = data;
+ unsigned int vcpu;
+ struct guest_vcpu *guest_vcpu;
+ int ret;
+
+ if (event->header.type != PERF_RECORD_COMM ||
+ event->comm.pid != gs->machine_pid)
+ return 0;
+
+ /*
+ * QEMU option -name debug-threads=on, causes thread names formatted as
+ * below, although it is not an ABI. Also libvirt seems to use this by
+ * default. Here we rely on it to tell us which thread is which VCPU.
+ */
+ ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
+ if (ret <= 0)
+ return ret;
+ pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
+ event->comm.tid, event->comm.comm, vcpu);
+ if (vcpu > INT_MAX) {
+ pr_err("Invalid VCPU %u\n", vcpu);
+ return -EINVAL;
+ }
+ guest_vcpu = guest_session__vcpu(gs, vcpu);
+ if (!guest_vcpu)
+ return -ENOMEM;
+ if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
+ pr_err("Fatal error: Two threads found with the same VCPU\n");
+ return -EINVAL;
+ }
+ guest_vcpu->tid = event->comm.tid;
+
+ return guest_session__map_tid(gs, event->comm.tid, vcpu);
+}
+
+static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
+{
+ return perf_session__peek_events(session, session->header.data_offset,
+ session->header.data_size,
+ host_peek_vm_comms_cb, gs);
+}
+
+static bool evlist__is_id_used(struct evlist *evlist, u64 id)
+{
+ return evlist__id2sid(evlist, id);
+}
+
+static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
+{
+ do {
+ gs->highest_id += 1;
+ } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
+
+ return gs->highest_id;
+}
+
+static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
+{
+ struct guest_id *guest_id = zalloc(sizeof(*guest_id));
+ int hash;
+
+ if (!guest_id)
+ return -ENOMEM;
+
+ guest_id->id = id;
+ guest_id->host_id = host_id;
+ guest_id->vcpu = vcpu;
+ hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
+ hlist_add_head(&guest_id->node, &gs->heads[hash]);
+
+ return 0;
+}
+
+static u64 evlist__find_highest_id(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ u64 highest_id = 1;
+
+ evlist__for_each_entry(evlist, evsel) {
+ u32 j;
+
+ for (j = 0; j < evsel->core.ids; j++) {
+ u64 id = evsel->core.id[j];
+
+ if (id > highest_id)
+ highest_id = id;
+ }
+ }
+
+ return highest_id;
+}
+
+static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
+{
+ struct evlist *evlist = gs->session->evlist;
+ struct evsel *evsel;
+ int ret;
+
+ evlist__for_each_entry(evlist, evsel) {
+ u32 j;
+
+ for (j = 0; j < evsel->core.ids; j++) {
+ struct perf_sample_id *sid;
+ u64 host_id;
+ u64 id;
+
+ id = evsel->core.id[j];
+ sid = evlist__id2sid(evlist, id);
+ if (!sid || sid->cpu.cpu == -1)
+ continue;
+ host_id = guest_session__allocate_new_id(gs, host_evlist);
+ ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
+{
+ struct hlist_head *head;
+ struct guest_id *guest_id;
+ int hash;
+
+ hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+ head = &gs->heads[hash];
+
+ hlist_for_each_entry(guest_id, head, node)
+ if (guest_id->id == id)
+ return guest_id;
+
+ return NULL;
+}
+
+static int process_attr(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+ return perf_event__process_attr(tool, event, &inject->session->evlist);
+}
+
+static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
+{
+ struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
+ struct perf_event_attr attr = evsel->core.attr;
+ u64 *id_array;
+ u32 *vcpu_array;
+ int ret = -ENOMEM;
+ u32 i;
+
+ id_array = calloc(evsel->core.ids, sizeof(*id_array));
+ if (!id_array)
+ return -ENOMEM;
+
+ vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
+ if (!vcpu_array)
+ goto out;
+
+ for (i = 0; i < evsel->core.ids; i++) {
+ u64 id = evsel->core.id[i];
+ struct guest_id *guest_id = guest_session__lookup_id(gs, id);
+
+ if (!guest_id) {
+ pr_err("Failed to find guest id %"PRIu64"\n", id);
+ ret = -EINVAL;
+ goto out;
+ }
+ id_array[i] = guest_id->host_id;
+ vcpu_array[i] = guest_id->vcpu;
+ }
+
+ attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+ attr.exclude_host = 1;
+ attr.exclude_guest = 0;
+
+ ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
+ id_array, process_attr);
+ if (ret)
+ pr_err("Failed to add guest attr.\n");
+
+ for (i = 0; i < evsel->core.ids; i++) {
+ struct perf_sample_id *sid;
+ u32 vcpu = vcpu_array[i];
+
+ sid = evlist__id2sid(inject->session->evlist, id_array[i]);
+ /* Guest event is per-thread from the host point of view */
+ sid->cpu.cpu = -1;
+ sid->tid = gs->vcpu[vcpu].tid;
+ sid->machine_pid = gs->machine_pid;
+ sid->vcpu.cpu = vcpu;
+ }
+out:
+ free(vcpu_array);
+ free(id_array);
+ return ret;
+}
+
+static int guest_session__add_attrs(struct guest_session *gs)
+{
+ struct evlist *evlist = gs->session->evlist;
+ struct evsel *evsel;
+ int ret;
+
+ evlist__for_each_entry(evlist, evsel) {
+ ret = guest_session__add_attr(gs, evsel);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
+{
+ struct perf_session *session = inject->session;
+ struct evlist *evlist = session->evlist;
+ struct machine *machine = &session->machines.host;
+ size_t from = evlist->core.nr_entries - new_cnt;
+
+ return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
+ evlist, machine, from);
+}
+
+static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
+{
+ struct hlist_head *head;
+ struct guest_tid *guest_tid;
+ int hash;
+
+ hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
+ head = &gs->tids[hash];
+
+ hlist_for_each_entry(guest_tid, head, node)
+ if (guest_tid->tid == tid)
+ return guest_tid;
+
+ return NULL;
+}
+
+static bool dso__is_in_kernel_space(struct dso *dso)
+{
+ if (dso__is_vdso(dso))
+ return false;
+
+ return dso__is_kcore(dso) ||
+ dso->kernel ||
+ is_kernel_module(dso->long_name, PERF_RECORD_MISC_CPUMODE_UNKNOWN);
+}
+
+static u64 evlist__first_id(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.ids)
+ return evsel->core.id[0];
+ }
+ return 0;
+}
+
+static int process_build_id(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+ return perf_event__process_build_id(inject->session, event);
+}
+
+static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
+{
+ struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
+ u8 cpumode = dso__is_in_kernel_space(dso) ?
+ PERF_RECORD_MISC_GUEST_KERNEL :
+ PERF_RECORD_MISC_GUEST_USER;
+
+ if (!machine)
+ return -ENOMEM;
+
+ dso->hit = 1;
+
+ return perf_event__synthesize_build_id(&inject->tool, dso, cpumode,
+ process_build_id, machine);
+}
+
+static int guest_session__add_build_ids(struct guest_session *gs)
+{
+ struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
+ struct machine *machine = &gs->session->machines.host;
+ struct dso *dso;
+ int ret;
+
+ /* Build IDs will be put in the Build ID feature section */
+ perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
+
+ dsos__for_each_with_build_id(dso, &machine->dsos.head) {
+ ret = synthesize_build_id(inject, dso, gs->machine_pid);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int guest_session__ksymbol_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct guest_session *gs = container_of(tool, struct guest_session, tool);
+
+ /* Only support out-of-line i.e. no BPF support */
+ if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
+ return 0;
+
+ return guest_session__output_bytes(gs, event, event->header.size);
+}
+
+static int guest_session__start(struct guest_session *gs, const char *name, bool force)
+{
+ char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
+ struct perf_session *session;
+ int ret;
+
+ /* Only these events will be injected */
+ gs->tool.mmap = guest_session__repipe;
+ gs->tool.mmap2 = guest_session__repipe;
+ gs->tool.comm = guest_session__repipe;
+ gs->tool.fork = guest_session__repipe;
+ gs->tool.exit = guest_session__repipe;
+ gs->tool.lost = guest_session__repipe;
+ gs->tool.context_switch = guest_session__repipe;
+ gs->tool.ksymbol = guest_session__ksymbol_event;
+ gs->tool.text_poke = guest_session__repipe;
+ /*
+ * Processing a build ID creates a struct dso with that build ID. Later,
+ * all guest dsos are iterated and the build IDs processed into the host
+ * session where they will be output to the Build ID feature section
+ * when the perf.data file header is written.
+ */
+ gs->tool.build_id = perf_event__process_build_id;
+ /* Process the id index to know what VCPU an ID belongs to */
+ gs->tool.id_index = perf_event__process_id_index;
+
+ gs->tool.ordered_events = true;
+ gs->tool.ordering_requires_timestamps = true;
+
+ gs->data.path = name;
+ gs->data.force = force;
+ gs->data.mode = PERF_DATA_MODE_READ;
+
+ session = perf_session__new(&gs->data, &gs->tool);
+ if (IS_ERR(session))
+ return PTR_ERR(session);
+ gs->session = session;
+
+ /*
+ * Initial events have zero'd ID samples. Get default ID sample size
+ * used for removing them.
+ */
+ gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
+ /* And default ID for adding back a host-compatible ID sample */
+ gs->dflt_id = evlist__first_id(session->evlist);
+ if (!gs->dflt_id) {
+ pr_err("Guest data has no sample IDs");
+ return -EINVAL;
+ }
+
+ /* Temporary file for guest events */
+ gs->tmp_file_name = strdup(tmp_file_name);
+ if (!gs->tmp_file_name)
+ return -ENOMEM;
+ gs->tmp_fd = mkstemp(gs->tmp_file_name);
+ if (gs->tmp_fd < 0)
+ return -errno;
+
+ if (zstd_init(&gs->session->zstd_data, 0) < 0)
+ pr_warning("Guest session decompression initialization failed.\n");
+
+ /*
+ * perf does not support processing 2 sessions simultaneously, so output
+ * guest events to a temporary file.
+ */
+ ret = perf_session__process_events(gs->session);
+ if (ret)
+ return ret;
+
+ if (lseek(gs->tmp_fd, 0, SEEK_SET))
+ return -errno;
+
+ return 0;
+}
+
+/* Free hlist nodes assuming hlist_node is the first member of hlist entries */
+static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
+{
+ struct hlist_node *pos, *n;
+ size_t i;
+
+ for (i = 0; i < hlist_sz; ++i) {
+ hlist_for_each_safe(pos, n, &heads[i]) {
+ hlist_del(pos);
+ free(pos);
+ }
+ }
+}
+
+static void guest_session__exit(struct guest_session *gs)
+{
+ if (gs->session) {
+ perf_session__delete(gs->session);
+ free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
+ free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
+ }
+ if (gs->tmp_file_name) {
+ if (gs->tmp_fd >= 0)
+ close(gs->tmp_fd);
+ unlink(gs->tmp_file_name);
+ free(gs->tmp_file_name);
+ }
+ free(gs->vcpu);
+ free(gs->perf_data_file);
+}
+
+static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
+{
+ tc->time_shift = time_conv->time_shift;
+ tc->time_mult = time_conv->time_mult;
+ tc->time_zero = time_conv->time_zero;
+ tc->time_cycles = time_conv->time_cycles;
+ tc->time_mask = time_conv->time_mask;
+ tc->cap_user_time_zero = time_conv->cap_user_time_zero;
+ tc->cap_user_time_short = time_conv->cap_user_time_short;
+}
+
+static void guest_session__get_tc(struct guest_session *gs)
+{
+ struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
+
+ get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
+ get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
+}
+
+static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
+{
+ u64 tsc;
+
+ if (!guest_time) {
+ *host_time = 0;
+ return;
+ }
+
+ if (gs->guest_tc.cap_user_time_zero)
+ tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
+ else
+ tsc = guest_time;
+
+ /*
+ * This is the correct order of operations for x86 if the TSC Offset and
+ * Multiplier values are used.
+ */
+ tsc -= gs->time_offset;
+ tsc /= gs->time_scale;
+
+ if (gs->host_tc.cap_user_time_zero)
+ *host_time = tsc_to_perf_time(tsc, &gs->host_tc);
+ else
+ *host_time = tsc;
+}
+
+static int guest_session__fetch(struct guest_session *gs)
+{
+ void *buf = gs->ev.event_buf;
+ struct perf_event_header *hdr = buf;
+ size_t hdr_sz = sizeof(*hdr);
+ ssize_t ret;
+
+ ret = readn(gs->tmp_fd, buf, hdr_sz);
+ if (ret < 0)
+ return ret;
+
+ if (!ret) {
+ /* Zero size means EOF */
+ hdr->size = 0;
+ return 0;
+ }
+
+ buf += hdr_sz;
+
+ ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
+ if (ret < 0)
+ return ret;
+
+ gs->ev.event = (union perf_event *)gs->ev.event_buf;
+ gs->ev.sample.time = 0;
+
+ if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
+ pr_err("Unexpected type fetching guest event");
+ return 0;
+ }
+
+ ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
+ if (ret) {
+ pr_err("Parse failed fetching guest event");
+ return ret;
+ }
+
+ if (!gs->have_tc) {
+ guest_session__get_tc(gs);
+ gs->have_tc = true;
+ }
+
+ guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
+
+ return 0;
+}
+
+static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
+ const struct perf_sample *sample)
+{
+ struct evsel *evsel;
+ void *array;
+ int ret;
+
+ evsel = evlist__id2evsel(evlist, sample->id);
+ array = ev;
+
+ if (!evsel) {
+ pr_err("No evsel for id %"PRIu64"\n", sample->id);
+ return -EINVAL;
+ }
+
+ array += ev->header.size;
+ ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
+ if (ret < 0)
+ return ret;
+
+ if (ret & 7) {
+ pr_err("Bad id sample size %d\n", ret);
+ return -EINVAL;
+ }
+
+ ev->header.size += ret;
+
+ return 0;
+}
+
+static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
+{
+ struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
+ int ret;
+
+ if (!gs->ready)
+ return 0;
+
+ while (1) {
+ struct perf_sample *sample;
+ struct guest_id *guest_id;
+ union perf_event *ev;
+ u16 id_hdr_size;
+ u8 cpumode;
+ u64 id;
+
+ if (!gs->fetched) {
+ ret = guest_session__fetch(gs);
+ if (ret)
+ return ret;
+ gs->fetched = true;
+ }
+
+ ev = gs->ev.event;
+ sample = &gs->ev.sample;
+
+ if (!ev->header.size)
+ return 0; /* EOF */
+
+ if (sample->time > timestamp)
+ return 0;
+
+ /* Change cpumode to guest */
+ cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ if (cpumode & PERF_RECORD_MISC_USER)
+ cpumode = PERF_RECORD_MISC_GUEST_USER;
+ else
+ cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+ ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
+ ev->header.misc |= cpumode;
+
+ id = sample->id;
+ if (!id) {
+ id = gs->dflt_id;
+ id_hdr_size = gs->dflt_id_hdr_size;
+ } else {
+ struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
+
+ id_hdr_size = evsel__id_hdr_size(evsel);
+ }
+
+ if (id_hdr_size & 7) {
+ pr_err("Bad id_hdr_size %u\n", id_hdr_size);
+ return -EINVAL;
+ }
+
+ if (ev->header.size & 7) {
+ pr_err("Bad event size %u\n", ev->header.size);
+ return -EINVAL;
+ }
+
+ /* Remove guest id sample */
+ ev->header.size -= id_hdr_size;
+
+ if (ev->header.size & 7) {
+ pr_err("Bad raw event size %u\n", ev->header.size);
+ return -EINVAL;
+ }
+
+ guest_id = guest_session__lookup_id(gs, id);
+ if (!guest_id) {
+ pr_err("Guest event with unknown id %llu\n",
+ (unsigned long long)id);
+ return -EINVAL;
+ }
+
+ /* Change to host ID to avoid conflicting ID values */
+ sample->id = guest_id->host_id;
+ sample->stream_id = guest_id->host_id;
+
+ if (sample->cpu != (u32)-1) {
+ if (sample->cpu >= gs->vcpu_cnt) {
+ pr_err("Guest event with unknown VCPU %u\n",
+ sample->cpu);
+ return -EINVAL;
+ }
+ /* Change to host CPU instead of guest VCPU */
+ sample->cpu = gs->vcpu[sample->cpu].cpu;
+ }
+
+ /* New id sample with new ID and CPU */
+ ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
+ if (ret)
+ return ret;
+
+ if (ev->header.size & 7) {
+ pr_err("Bad new event size %u\n", ev->header.size);
+ return -EINVAL;
+ }
+
+ gs->fetched = false;
+
+ ret = output_bytes(inject, ev, ev->header.size);
+ if (ret)
+ return ret;
+ }
+}
+
+static int guest_session__flush_events(struct guest_session *gs)
+{
+ return guest_session__inject_events(gs, -1);
+}
+
+static int host__repipe(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ int ret;
+
+ ret = guest_session__inject_events(&inject->guest_session, sample->time);
+ if (ret)
+ return ret;
+
+ return perf_event__repipe(tool, event, sample, machine);
+}
+
+static int host__finished_init(struct perf_session *session, union perf_event *event)
+{
+ struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool);
+ struct guest_session *gs = &inject->guest_session;
+ int ret;
+
+ /*
+ * Peek through host COMM events to find QEMU threads and the VCPU they
+ * are running.
+ */
+ ret = host_peek_vm_comms(session, gs);
+ if (ret)
+ return ret;
+
+ if (!gs->vcpu_cnt) {
+ pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
+ return -EINVAL;
+ }
+
+ /*
+ * Allocate new (unused) host sample IDs and map them to the guest IDs.
+ */
+ gs->highest_id = evlist__find_highest_id(session->evlist);
+ ret = guest_session__map_ids(gs, session->evlist);
+ if (ret)
+ return ret;
+
+ ret = guest_session__add_attrs(gs);
+ if (ret)
+ return ret;
+
+ ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
+ if (ret) {
+ pr_err("Failed to synthesize id_index\n");
+ return ret;
+ }
+
+ ret = guest_session__add_build_ids(gs);
+ if (ret) {
+ pr_err("Failed to add guest build IDs\n");
+ return ret;
+ }
+
+ gs->ready = true;
+
+ ret = guest_session__inject_events(gs, 0);
+ if (ret)
+ return ret;
+
+ return perf_event__repipe_op2_synth(session, event);
+}
+
+/*
+ * Obey finished-round ordering. The FINISHED_ROUND event is first processed
+ * which flushes host events to file up until the last flush time. Then inject
+ * guest events up to the same time. Finally write out the FINISHED_ROUND event
+ * itself.
+ */
+static int host__finished_round(struct perf_tool *tool,
+ union perf_event *event,
+ struct ordered_events *oe)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ int ret = perf_event__process_finished_round(tool, event, oe);
+ u64 timestamp = ordered_events__last_flush_time(oe);
+
+ if (ret)
+ return ret;
+
+ ret = guest_session__inject_events(&inject->guest_session, timestamp);
+ if (ret)
+ return ret;
+
+ return perf_event__repipe_oe_synth(tool, event, oe);
+}
+
+static int host__context_switch(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ struct guest_session *gs = &inject->guest_session;
+ u32 pid = event->context_switch.next_prev_pid;
+ u32 tid = event->context_switch.next_prev_tid;
+ struct guest_tid *guest_tid;
+ u32 vcpu;
+
+ if (out || pid != gs->machine_pid)
+ goto out;
+
+ guest_tid = guest_session__lookup_tid(gs, tid);
+ if (!guest_tid)
+ goto out;
+
+ if (sample->cpu == (u32)-1) {
+ pr_err("Switch event does not have CPU\n");
+ return -EINVAL;
+ }
+
+ vcpu = guest_tid->vcpu;
+ if (vcpu >= gs->vcpu_cnt)
+ return -EINVAL;
+
+ /* Guest is switching in, record which CPU the VCPU is now running on */
+ gs->vcpu[vcpu].cpu = sample->cpu;
+out:
+ return host__repipe(tool, event, sample, machine);
+}
+
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
@@ -767,6 +1666,61 @@ static int parse_vm_time_correlation(const struct option *opt, const char *str,
return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
}
+static int parse_guest_data(const struct option *opt, const char *str, int unset)
+{
+ struct perf_inject *inject = opt->value;
+ struct guest_session *gs = &inject->guest_session;
+ char *tok;
+ char *s;
+
+ if (unset)
+ return 0;
+
+ if (!str)
+ goto bad_args;
+
+ s = strdup(str);
+ if (!s)
+ return -ENOMEM;
+
+ gs->perf_data_file = strsep(&s, ",");
+ if (!gs->perf_data_file)
+ goto bad_args;
+
+ gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
+ if (gs->copy_kcore_dir)
+ inject->output.is_dir = true;
+
+ tok = strsep(&s, ",");
+ if (!tok)
+ goto bad_args;
+ gs->machine_pid = strtoul(tok, NULL, 0);
+ if (!inject->guest_session.machine_pid)
+ goto bad_args;
+
+ gs->time_scale = 1;
+
+ tok = strsep(&s, ",");
+ if (!tok)
+ goto out;
+ gs->time_offset = strtoull(tok, NULL, 0);
+
+ tok = strsep(&s, ",");
+ if (!tok)
+ goto out;
+ gs->time_scale = strtod(tok, NULL);
+ if (!gs->time_scale)
+ goto bad_args;
+out:
+ return 0;
+
+bad_args:
+ pr_err("--guest-data option requires guest perf.data file name, "
+ "guest machine PID, and optionally guest timestamp offset, "
+ "and guest timestamp scale factor, separated by commas.\n");
+ return -1;
+}
+
static int save_section_info_cb(struct perf_file_section *section,
struct perf_header *ph __maybe_unused,
int feat, int fd __maybe_unused, void *data)
@@ -809,7 +1763,7 @@ static bool keep_feat(int feat)
case HEADER_CPU_PMU_CAPS:
case HEADER_CLOCK_DATA:
case HEADER_HYBRID_TOPOLOGY:
- case HEADER_HYBRID_CPU_PMU_CAPS:
+ case HEADER_PMU_CAPS:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
@@ -896,6 +1850,22 @@ static int copy_kcore_dir(struct perf_inject *inject)
return ret;
}
+static int guest_session__copy_kcore_dir(struct guest_session *gs)
+{
+ struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
+ char *cmd;
+ int ret;
+
+ ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
+ gs->perf_data_file, inject->output.path, gs->machine_pid);
+ if (ret < 0)
+ return ret;
+ pr_debug("%s\n", cmd);
+ ret = system(cmd);
+ free(cmd);
+ return ret;
+}
+
static int output_fd(struct perf_inject *inject)
{
return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
@@ -904,6 +1874,7 @@ static int output_fd(struct perf_inject *inject)
static int __cmd_inject(struct perf_inject *inject)
{
int ret = -EINVAL;
+ struct guest_session *gs = &inject->guest_session;
struct perf_session *session = inject->session;
int fd = output_fd(inject);
u64 output_data_offset;
@@ -968,6 +1939,47 @@ static int __cmd_inject(struct perf_inject *inject)
output_data_offset = roundup(8192 + session->header.data_offset, 4096);
if (inject->strip)
strip_init(inject);
+ } else if (gs->perf_data_file) {
+ char *name = gs->perf_data_file;
+
+ /*
+ * Not strictly necessary, but keep these events in order wrt
+ * guest events.
+ */
+ inject->tool.mmap = host__repipe;
+ inject->tool.mmap2 = host__repipe;
+ inject->tool.comm = host__repipe;
+ inject->tool.fork = host__repipe;
+ inject->tool.exit = host__repipe;
+ inject->tool.lost = host__repipe;
+ inject->tool.context_switch = host__repipe;
+ inject->tool.ksymbol = host__repipe;
+ inject->tool.text_poke = host__repipe;
+ /*
+ * Once the host session has initialized, set up sample ID
+ * mapping and feed in guest attrs, build IDs and initial
+ * events.
+ */
+ inject->tool.finished_init = host__finished_init;
+ /* Obey finished round ordering */
+ inject->tool.finished_round = host__finished_round,
+ /* Keep track of which CPU a VCPU is runnng on */
+ inject->tool.context_switch = host__context_switch;
+ /*
+ * Must order events to be able to obey finished round
+ * ordering.
+ */
+ inject->tool.ordered_events = true;
+ inject->tool.ordering_requires_timestamps = true;
+ /* Set up a separate session to process guest perf.data file */
+ ret = guest_session__start(gs, name, session->data->force);
+ if (ret) {
+ pr_err("Failed to process %s, error %d\n", name, ret);
+ return ret;
+ }
+ /* Allow space in the header for guest attributes */
+ output_data_offset += gs->session->header.data_offset;
+ output_data_offset = roundup(output_data_offset, 4096);
}
if (!inject->itrace_synth_opts.set)
@@ -980,6 +1992,18 @@ static int __cmd_inject(struct perf_inject *inject)
if (ret)
return ret;
+ if (gs->session) {
+ /*
+ * Remaining guest events have later timestamps. Flush them
+ * out to file.
+ */
+ ret = guest_session__flush_events(gs);
+ if (ret) {
+ pr_err("Failed to flush guest events\n");
+ return ret;
+ }
+ }
+
if (!inject->is_pipe && !inject->in_place_update) {
struct inject_fc inj_fc = {
.fc.copy = feat_copy_cb,
@@ -1014,8 +2038,17 @@ static int __cmd_inject(struct perf_inject *inject)
if (inject->copy_kcore_dir) {
ret = copy_kcore_dir(inject);
- if (ret)
+ if (ret) {
+ pr_err("Failed to copy kcore\n");
return ret;
+ }
+ }
+ if (gs->copy_kcore_dir) {
+ ret = guest_session__copy_kcore_dir(gs);
+ if (ret) {
+ pr_err("Failed to copy guest kcore\n");
+ return ret;
+ }
}
}
@@ -1061,6 +2094,7 @@ int cmd_inject(int argc, const char **argv)
.stat = perf_event__repipe_op2_synth,
.stat_round = perf_event__repipe_op2_synth,
.feature = perf_event__repipe_op2_synth,
+ .finished_init = perf_event__repipe_op2_synth,
.compressed = perf_event__repipe_op4_synth,
.auxtrace = perf_event__repipe_auxtrace,
},
@@ -1112,6 +2146,12 @@ int cmd_inject(int argc, const char **argv)
OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
"correlate time between VM guests and the host",
parse_vm_time_correlation),
+ OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
+ "inject events from a guest perf.data file",
+ parse_guest_data),
+ OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
+ "guest mount directory under which every guest os"
+ " instance has a subdir"),
OPT_END()
};
const char * const inject_usage[] = {
@@ -1242,6 +2282,8 @@ int cmd_inject(int argc, const char **argv)
ret = __cmd_inject(&inject);
+ guest_session__exit(&inject.guest_session);
+
out_delete:
zstd_fini(&(inject.session->zstd_data));
perf_session__delete(inject.session);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 3696ae97f149..7d9ec1bac1a2 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1638,14 +1638,14 @@ int cmd_kvm(int argc, const char **argv)
return __cmd_record(file_name, argc, argv);
else if (strlen(argv[0]) > 2 && strstarts("report", argv[0]))
return __cmd_report(file_name, argc, argv);
- else if (!strncmp(argv[0], "diff", 4))
+ else if (strlen(argv[0]) > 2 && strstarts("diff", argv[0]))
return cmd_diff(argc, argv);
- else if (!strncmp(argv[0], "top", 3))
+ else if (!strcmp(argv[0], "top"))
return cmd_top(argc, argv);
- else if (!strncmp(argv[0], "buildid-list", 12))
+ else if (strlen(argv[0]) > 2 && strstarts("buildid-list", argv[0]))
return __cmd_buildid_list(file_name, argc, argv);
#ifdef HAVE_KVM_STAT_SUPPORT
- else if (!strncmp(argv[0], "stat", 4))
+ else if (strlen(argv[0]) > 2 && strstarts("stat", argv[0]))
return kvm_cmd_stat(file_name, argc, argv);
#endif
else
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
new file mode 100644
index 000000000000..fb8c63656ad8
--- /dev/null
+++ b/tools/perf/builtin-kwork.c
@@ -0,0 +1,1832 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-kwork.c
+ *
+ * Copyright (c) 2022 Huawei Inc, Yang Jihong <yangjihong1@huawei.com>
+ */
+
+#include "builtin.h"
+
+#include "util/data.h"
+#include "util/kwork.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/string2.h"
+#include "util/callchain.h"
+#include "util/evsel_fprintf.h"
+
+#include <subcmd/pager.h>
+#include <subcmd/parse-options.h>
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/err.h>
+#include <linux/time64.h>
+#include <linux/zalloc.h>
+
+/*
+ * report header elements width
+ */
+#define PRINT_CPU_WIDTH 4
+#define PRINT_COUNT_WIDTH 9
+#define PRINT_RUNTIME_WIDTH 10
+#define PRINT_LATENCY_WIDTH 10
+#define PRINT_TIMESTAMP_WIDTH 17
+#define PRINT_KWORK_NAME_WIDTH 30
+#define RPINT_DECIMAL_WIDTH 3
+#define PRINT_BRACKETPAIR_WIDTH 2
+#define PRINT_TIME_UNIT_SEC_WIDTH 2
+#define PRINT_TIME_UNIT_MESC_WIDTH 3
+#define PRINT_RUNTIME_HEADER_WIDTH (PRINT_RUNTIME_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH)
+#define PRINT_LATENCY_HEADER_WIDTH (PRINT_LATENCY_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH)
+#define PRINT_TIMEHIST_CPU_WIDTH (PRINT_CPU_WIDTH + PRINT_BRACKETPAIR_WIDTH)
+#define PRINT_TIMESTAMP_HEADER_WIDTH (PRINT_TIMESTAMP_WIDTH + PRINT_TIME_UNIT_SEC_WIDTH)
+
+struct sort_dimension {
+ const char *name;
+ int (*cmp)(struct kwork_work *l, struct kwork_work *r);
+ struct list_head list;
+};
+
+static int id_cmp(struct kwork_work *l, struct kwork_work *r)
+{
+ if (l->cpu > r->cpu)
+ return 1;
+ if (l->cpu < r->cpu)
+ return -1;
+
+ if (l->id > r->id)
+ return 1;
+ if (l->id < r->id)
+ return -1;
+
+ return 0;
+}
+
+static int count_cmp(struct kwork_work *l, struct kwork_work *r)
+{
+ if (l->nr_atoms > r->nr_atoms)
+ return 1;
+ if (l->nr_atoms < r->nr_atoms)
+ return -1;
+
+ return 0;
+}
+
+static int runtime_cmp(struct kwork_work *l, struct kwork_work *r)
+{
+ if (l->total_runtime > r->total_runtime)
+ return 1;
+ if (l->total_runtime < r->total_runtime)
+ return -1;
+
+ return 0;
+}
+
+static int max_runtime_cmp(struct kwork_work *l, struct kwork_work *r)
+{
+ if (l->max_runtime > r->max_runtime)
+ return 1;
+ if (l->max_runtime < r->max_runtime)
+ return -1;
+
+ return 0;
+}
+
+static int avg_latency_cmp(struct kwork_work *l, struct kwork_work *r)
+{
+ u64 avgl, avgr;
+
+ if (!r->nr_atoms)
+ return 1;
+ if (!l->nr_atoms)
+ return -1;
+
+ avgl = l->total_latency / l->nr_atoms;
+ avgr = r->total_latency / r->nr_atoms;
+
+ if (avgl > avgr)
+ return 1;
+ if (avgl < avgr)
+ return -1;
+
+ return 0;
+}
+
+static int max_latency_cmp(struct kwork_work *l, struct kwork_work *r)
+{
+ if (l->max_latency > r->max_latency)
+ return 1;
+ if (l->max_latency < r->max_latency)
+ return -1;
+
+ return 0;
+}
+
+static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
+ const char *tok, struct list_head *list)
+{
+ size_t i;
+ static struct sort_dimension max_sort_dimension = {
+ .name = "max",
+ .cmp = max_runtime_cmp,
+ };
+ static struct sort_dimension id_sort_dimension = {
+ .name = "id",
+ .cmp = id_cmp,
+ };
+ static struct sort_dimension runtime_sort_dimension = {
+ .name = "runtime",
+ .cmp = runtime_cmp,
+ };
+ static struct sort_dimension count_sort_dimension = {
+ .name = "count",
+ .cmp = count_cmp,
+ };
+ static struct sort_dimension avg_sort_dimension = {
+ .name = "avg",
+ .cmp = avg_latency_cmp,
+ };
+ struct sort_dimension *available_sorts[] = {
+ &id_sort_dimension,
+ &max_sort_dimension,
+ &count_sort_dimension,
+ &runtime_sort_dimension,
+ &avg_sort_dimension,
+ };
+
+ if (kwork->report == KWORK_REPORT_LATENCY)
+ max_sort_dimension.cmp = max_latency_cmp;
+
+ for (i = 0; i < ARRAY_SIZE(available_sorts); i++) {
+ if (!strcmp(available_sorts[i]->name, tok)) {
+ list_add_tail(&available_sorts[i]->list, list);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static void setup_sorting(struct perf_kwork *kwork,
+ const struct option *options,
+ const char * const usage_msg[])
+{
+ char *tmp, *tok, *str = strdup(kwork->sort_order);
+
+ for (tok = strtok_r(str, ", ", &tmp);
+ tok; tok = strtok_r(NULL, ", ", &tmp)) {
+ if (sort_dimension__add(kwork, tok, &kwork->sort_list) < 0)
+ usage_with_options_msg(usage_msg, options,
+ "Unknown --sort key: `%s'", tok);
+ }
+
+ pr_debug("Sort order: %s\n", kwork->sort_order);
+ free(str);
+}
+
+static struct kwork_atom *atom_new(struct perf_kwork *kwork,
+ struct perf_sample *sample)
+{
+ unsigned long i;
+ struct kwork_atom_page *page;
+ struct kwork_atom *atom = NULL;
+
+ list_for_each_entry(page, &kwork->atom_page_list, list) {
+ if (!bitmap_full(page->bitmap, NR_ATOM_PER_PAGE)) {
+ i = find_first_zero_bit(page->bitmap, NR_ATOM_PER_PAGE);
+ BUG_ON(i >= NR_ATOM_PER_PAGE);
+ atom = &page->atoms[i];
+ goto found_atom;
+ }
+ }
+
+ /*
+ * new page
+ */
+ page = zalloc(sizeof(*page));
+ if (page == NULL) {
+ pr_err("Failed to zalloc kwork atom page\n");
+ return NULL;
+ }
+
+ i = 0;
+ atom = &page->atoms[0];
+ list_add_tail(&page->list, &kwork->atom_page_list);
+
+found_atom:
+ set_bit(i, page->bitmap);
+ atom->time = sample->time;
+ atom->prev = NULL;
+ atom->page_addr = page;
+ atom->bit_inpage = i;
+ return atom;
+}
+
+static void atom_free(struct kwork_atom *atom)
+{
+ if (atom->prev != NULL)
+ atom_free(atom->prev);
+
+ clear_bit(atom->bit_inpage,
+ ((struct kwork_atom_page *)atom->page_addr)->bitmap);
+}
+
+static void atom_del(struct kwork_atom *atom)
+{
+ list_del(&atom->list);
+ atom_free(atom);
+}
+
+static int work_cmp(struct list_head *list,
+ struct kwork_work *l, struct kwork_work *r)
+{
+ int ret = 0;
+ struct sort_dimension *sort;
+
+ BUG_ON(list_empty(list));
+
+ list_for_each_entry(sort, list, list) {
+ ret = sort->cmp(l, r);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static struct kwork_work *work_search(struct rb_root_cached *root,
+ struct kwork_work *key,
+ struct list_head *sort_list)
+{
+ int cmp;
+ struct kwork_work *work;
+ struct rb_node *node = root->rb_root.rb_node;
+
+ while (node) {
+ work = container_of(node, struct kwork_work, node);
+ cmp = work_cmp(sort_list, key, work);
+ if (cmp > 0)
+ node = node->rb_left;
+ else if (cmp < 0)
+ node = node->rb_right;
+ else {
+ if (work->name == NULL)
+ work->name = key->name;
+ return work;
+ }
+ }
+ return NULL;
+}
+
+static void work_insert(struct rb_root_cached *root,
+ struct kwork_work *key, struct list_head *sort_list)
+{
+ int cmp;
+ bool leftmost = true;
+ struct kwork_work *cur;
+ struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
+
+ while (*new) {
+ cur = container_of(*new, struct kwork_work, node);
+ parent = *new;
+ cmp = work_cmp(sort_list, key, cur);
+
+ if (cmp > 0)
+ new = &((*new)->rb_left);
+ else {
+ new = &((*new)->rb_right);
+ leftmost = false;
+ }
+ }
+
+ rb_link_node(&key->node, parent, new);
+ rb_insert_color_cached(&key->node, root, leftmost);
+}
+
+static struct kwork_work *work_new(struct kwork_work *key)
+{
+ int i;
+ struct kwork_work *work = zalloc(sizeof(*work));
+
+ if (work == NULL) {
+ pr_err("Failed to zalloc kwork work\n");
+ return NULL;
+ }
+
+ for (i = 0; i < KWORK_TRACE_MAX; i++)
+ INIT_LIST_HEAD(&work->atom_list[i]);
+
+ work->id = key->id;
+ work->cpu = key->cpu;
+ work->name = key->name;
+ work->class = key->class;
+ return work;
+}
+
+static struct kwork_work *work_findnew(struct rb_root_cached *root,
+ struct kwork_work *key,
+ struct list_head *sort_list)
+{
+ struct kwork_work *work = work_search(root, key, sort_list);
+
+ if (work != NULL)
+ return work;
+
+ work = work_new(key);
+ if (work)
+ work_insert(root, work, sort_list);
+
+ return work;
+}
+
+static void profile_update_timespan(struct perf_kwork *kwork,
+ struct perf_sample *sample)
+{
+ if (!kwork->summary)
+ return;
+
+ if ((kwork->timestart == 0) || (kwork->timestart > sample->time))
+ kwork->timestart = sample->time;
+
+ if (kwork->timeend < sample->time)
+ kwork->timeend = sample->time;
+}
+
+static bool profile_event_match(struct perf_kwork *kwork,
+ struct kwork_work *work,
+ struct perf_sample *sample)
+{
+ int cpu = work->cpu;
+ u64 time = sample->time;
+ struct perf_time_interval *ptime = &kwork->ptime;
+
+ if ((kwork->cpu_list != NULL) && !test_bit(cpu, kwork->cpu_bitmap))
+ return false;
+
+ if (((ptime->start != 0) && (ptime->start > time)) ||
+ ((ptime->end != 0) && (ptime->end < time)))
+ return false;
+
+ if ((kwork->profile_name != NULL) &&
+ (work->name != NULL) &&
+ (strcmp(work->name, kwork->profile_name) != 0))
+ return false;
+
+ profile_update_timespan(kwork, sample);
+ return true;
+}
+
+static int work_push_atom(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ enum kwork_trace_type src_type,
+ enum kwork_trace_type dst_type,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct kwork_work **ret_work)
+{
+ struct kwork_atom *atom, *dst_atom;
+ struct kwork_work *work, key;
+
+ BUG_ON(class->work_init == NULL);
+ class->work_init(class, &key, evsel, sample, machine);
+
+ atom = atom_new(kwork, sample);
+ if (atom == NULL)
+ return -1;
+
+ work = work_findnew(&class->work_root, &key, &kwork->cmp_id);
+ if (work == NULL) {
+ free(atom);
+ return -1;
+ }
+
+ if (!profile_event_match(kwork, work, sample))
+ return 0;
+
+ if (dst_type < KWORK_TRACE_MAX) {
+ dst_atom = list_last_entry_or_null(&work->atom_list[dst_type],
+ struct kwork_atom, list);
+ if (dst_atom != NULL) {
+ atom->prev = dst_atom;
+ list_del(&dst_atom->list);
+ }
+ }
+
+ if (ret_work != NULL)
+ *ret_work = work;
+
+ list_add_tail(&atom->list, &work->atom_list[src_type]);
+
+ return 0;
+}
+
+static struct kwork_atom *work_pop_atom(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ enum kwork_trace_type src_type,
+ enum kwork_trace_type dst_type,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct kwork_work **ret_work)
+{
+ struct kwork_atom *atom, *src_atom;
+ struct kwork_work *work, key;
+
+ BUG_ON(class->work_init == NULL);
+ class->work_init(class, &key, evsel, sample, machine);
+
+ work = work_findnew(&class->work_root, &key, &kwork->cmp_id);
+ if (ret_work != NULL)
+ *ret_work = work;
+
+ if (work == NULL)
+ return NULL;
+
+ if (!profile_event_match(kwork, work, sample))
+ return NULL;
+
+ atom = list_last_entry_or_null(&work->atom_list[dst_type],
+ struct kwork_atom, list);
+ if (atom != NULL)
+ return atom;
+
+ src_atom = atom_new(kwork, sample);
+ if (src_atom != NULL)
+ list_add_tail(&src_atom->list, &work->atom_list[src_type]);
+ else {
+ if (ret_work != NULL)
+ *ret_work = NULL;
+ }
+
+ return NULL;
+}
+
+static void report_update_exit_event(struct kwork_work *work,
+ struct kwork_atom *atom,
+ struct perf_sample *sample)
+{
+ u64 delta;
+ u64 exit_time = sample->time;
+ u64 entry_time = atom->time;
+
+ if ((entry_time != 0) && (exit_time >= entry_time)) {
+ delta = exit_time - entry_time;
+ if ((delta > work->max_runtime) ||
+ (work->max_runtime == 0)) {
+ work->max_runtime = delta;
+ work->max_runtime_start = entry_time;
+ work->max_runtime_end = exit_time;
+ }
+ work->total_runtime += delta;
+ work->nr_atoms++;
+ }
+}
+
+static int report_entry_event(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return work_push_atom(kwork, class, KWORK_TRACE_ENTRY,
+ KWORK_TRACE_MAX, evsel, sample,
+ machine, NULL);
+}
+
+static int report_exit_event(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct kwork_atom *atom = NULL;
+ struct kwork_work *work = NULL;
+
+ atom = work_pop_atom(kwork, class, KWORK_TRACE_EXIT,
+ KWORK_TRACE_ENTRY, evsel, sample,
+ machine, &work);
+ if (work == NULL)
+ return -1;
+
+ if (atom != NULL) {
+ report_update_exit_event(work, atom, sample);
+ atom_del(atom);
+ }
+
+ return 0;
+}
+
+static void latency_update_entry_event(struct kwork_work *work,
+ struct kwork_atom *atom,
+ struct perf_sample *sample)
+{
+ u64 delta;
+ u64 entry_time = sample->time;
+ u64 raise_time = atom->time;
+
+ if ((raise_time != 0) && (entry_time >= raise_time)) {
+ delta = entry_time - raise_time;
+ if ((delta > work->max_latency) ||
+ (work->max_latency == 0)) {
+ work->max_latency = delta;
+ work->max_latency_start = raise_time;
+ work->max_latency_end = entry_time;
+ }
+ work->total_latency += delta;
+ work->nr_atoms++;
+ }
+}
+
+static int latency_raise_event(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return work_push_atom(kwork, class, KWORK_TRACE_RAISE,
+ KWORK_TRACE_MAX, evsel, sample,
+ machine, NULL);
+}
+
+static int latency_entry_event(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct kwork_atom *atom = NULL;
+ struct kwork_work *work = NULL;
+
+ atom = work_pop_atom(kwork, class, KWORK_TRACE_ENTRY,
+ KWORK_TRACE_RAISE, evsel, sample,
+ machine, &work);
+ if (work == NULL)
+ return -1;
+
+ if (atom != NULL) {
+ latency_update_entry_event(work, atom, sample);
+ atom_del(atom);
+ }
+
+ return 0;
+}
+
+static void timehist_save_callchain(struct perf_kwork *kwork,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine)
+{
+ struct symbol *sym;
+ struct thread *thread;
+ struct callchain_cursor_node *node;
+ struct callchain_cursor *cursor = &callchain_cursor;
+
+ if (!kwork->show_callchain || sample->callchain == NULL)
+ return;
+
+ /* want main thread for process - has maps */
+ thread = machine__findnew_thread(machine, sample->pid, sample->pid);
+ if (thread == NULL) {
+ pr_debug("Failed to get thread for pid %d\n", sample->pid);
+ return;
+ }
+
+ if (thread__resolve_callchain(thread, cursor, evsel, sample,
+ NULL, NULL, kwork->max_stack + 2) != 0) {
+ pr_debug("Failed to resolve callchain, skipping\n");
+ goto out_put;
+ }
+
+ callchain_cursor_commit(cursor);
+
+ while (true) {
+ node = callchain_cursor_current(cursor);
+ if (node == NULL)
+ break;
+
+ sym = node->ms.sym;
+ if (sym) {
+ if (!strcmp(sym->name, "__softirqentry_text_start") ||
+ !strcmp(sym->name, "__do_softirq"))
+ sym->ignore = 1;
+ }
+
+ callchain_cursor_advance(cursor);
+ }
+
+out_put:
+ thread__put(thread);
+}
+
+static void timehist_print_event(struct perf_kwork *kwork,
+ struct kwork_work *work,
+ struct kwork_atom *atom,
+ struct perf_sample *sample,
+ struct addr_location *al)
+{
+ char entrytime[32], exittime[32];
+ char kwork_name[PRINT_KWORK_NAME_WIDTH];
+
+ /*
+ * runtime start
+ */
+ timestamp__scnprintf_usec(atom->time,
+ entrytime, sizeof(entrytime));
+ printf(" %*s ", PRINT_TIMESTAMP_WIDTH, entrytime);
+
+ /*
+ * runtime end
+ */
+ timestamp__scnprintf_usec(sample->time,
+ exittime, sizeof(exittime));
+ printf(" %*s ", PRINT_TIMESTAMP_WIDTH, exittime);
+
+ /*
+ * cpu
+ */
+ printf(" [%0*d] ", PRINT_CPU_WIDTH, work->cpu);
+
+ /*
+ * kwork name
+ */
+ if (work->class && work->class->work_name) {
+ work->class->work_name(work, kwork_name,
+ PRINT_KWORK_NAME_WIDTH);
+ printf(" %-*s ", PRINT_KWORK_NAME_WIDTH, kwork_name);
+ } else
+ printf(" %-*s ", PRINT_KWORK_NAME_WIDTH, "");
+
+ /*
+ *runtime
+ */
+ printf(" %*.*f ",
+ PRINT_RUNTIME_WIDTH, RPINT_DECIMAL_WIDTH,
+ (double)(sample->time - atom->time) / NSEC_PER_MSEC);
+
+ /*
+ * delaytime
+ */
+ if (atom->prev != NULL)
+ printf(" %*.*f ", PRINT_LATENCY_WIDTH, RPINT_DECIMAL_WIDTH,
+ (double)(atom->time - atom->prev->time) / NSEC_PER_MSEC);
+ else
+ printf(" %*s ", PRINT_LATENCY_WIDTH, " ");
+
+ /*
+ * callchain
+ */
+ if (kwork->show_callchain) {
+ printf(" ");
+ sample__fprintf_sym(sample, al, 0,
+ EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
+ EVSEL__PRINT_CALLCHAIN_ARROW |
+ EVSEL__PRINT_SKIP_IGNORED,
+ &callchain_cursor, symbol_conf.bt_stop_list,
+ stdout);
+ }
+
+ printf("\n");
+}
+
+static int timehist_raise_event(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return work_push_atom(kwork, class, KWORK_TRACE_RAISE,
+ KWORK_TRACE_MAX, evsel, sample,
+ machine, NULL);
+}
+
+static int timehist_entry_event(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int ret;
+ struct kwork_work *work = NULL;
+
+ ret = work_push_atom(kwork, class, KWORK_TRACE_ENTRY,
+ KWORK_TRACE_RAISE, evsel, sample,
+ machine, &work);
+ if (ret)
+ return ret;
+
+ if (work != NULL)
+ timehist_save_callchain(kwork, sample, evsel, machine);
+
+ return 0;
+}
+
+static int timehist_exit_event(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct kwork_atom *atom = NULL;
+ struct kwork_work *work = NULL;
+ struct addr_location al;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_debug("Problem processing event, skipping it\n");
+ return -1;
+ }
+
+ atom = work_pop_atom(kwork, class, KWORK_TRACE_EXIT,
+ KWORK_TRACE_ENTRY, evsel, sample,
+ machine, &work);
+ if (work == NULL)
+ return -1;
+
+ if (atom != NULL) {
+ work->nr_atoms++;
+ timehist_print_event(kwork, work, atom, sample, &al);
+ atom_del(atom);
+ }
+
+ return 0;
+}
+
+static struct kwork_class kwork_irq;
+static int process_irq_handler_entry_event(struct perf_tool *tool,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool);
+
+ if (kwork->tp_handler->entry_event)
+ return kwork->tp_handler->entry_event(kwork, &kwork_irq,
+ evsel, sample, machine);
+ return 0;
+}
+
+static int process_irq_handler_exit_event(struct perf_tool *tool,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool);
+
+ if (kwork->tp_handler->exit_event)
+ return kwork->tp_handler->exit_event(kwork, &kwork_irq,
+ evsel, sample, machine);
+ return 0;
+}
+
+const struct evsel_str_handler irq_tp_handlers[] = {
+ { "irq:irq_handler_entry", process_irq_handler_entry_event, },
+ { "irq:irq_handler_exit", process_irq_handler_exit_event, },
+};
+
+static int irq_class_init(struct kwork_class *class,
+ struct perf_session *session)
+{
+ if (perf_session__set_tracepoints_handlers(session, irq_tp_handlers)) {
+ pr_err("Failed to set irq tracepoints handlers\n");
+ return -1;
+ }
+
+ class->work_root = RB_ROOT_CACHED;
+ return 0;
+}
+
+static void irq_work_init(struct kwork_class *class,
+ struct kwork_work *work,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine __maybe_unused)
+{
+ work->class = class;
+ work->cpu = sample->cpu;
+ work->id = evsel__intval(evsel, sample, "irq");
+ work->name = evsel__strval(evsel, sample, "name");
+}
+
+static void irq_work_name(struct kwork_work *work, char *buf, int len)
+{
+ snprintf(buf, len, "%s:%" PRIu64 "", work->name, work->id);
+}
+
+static struct kwork_class kwork_irq = {
+ .name = "irq",
+ .type = KWORK_CLASS_IRQ,
+ .nr_tracepoints = 2,
+ .tp_handlers = irq_tp_handlers,
+ .class_init = irq_class_init,
+ .work_init = irq_work_init,
+ .work_name = irq_work_name,
+};
+
+static struct kwork_class kwork_softirq;
+static int process_softirq_raise_event(struct perf_tool *tool,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool);
+
+ if (kwork->tp_handler->raise_event)
+ return kwork->tp_handler->raise_event(kwork, &kwork_softirq,
+ evsel, sample, machine);
+
+ return 0;
+}
+
+static int process_softirq_entry_event(struct perf_tool *tool,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool);
+
+ if (kwork->tp_handler->entry_event)
+ return kwork->tp_handler->entry_event(kwork, &kwork_softirq,
+ evsel, sample, machine);
+
+ return 0;
+}
+
+static int process_softirq_exit_event(struct perf_tool *tool,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool);
+
+ if (kwork->tp_handler->exit_event)
+ return kwork->tp_handler->exit_event(kwork, &kwork_softirq,
+ evsel, sample, machine);
+
+ return 0;
+}
+
+const struct evsel_str_handler softirq_tp_handlers[] = {
+ { "irq:softirq_raise", process_softirq_raise_event, },
+ { "irq:softirq_entry", process_softirq_entry_event, },
+ { "irq:softirq_exit", process_softirq_exit_event, },
+};
+
+static int softirq_class_init(struct kwork_class *class,
+ struct perf_session *session)
+{
+ if (perf_session__set_tracepoints_handlers(session,
+ softirq_tp_handlers)) {
+ pr_err("Failed to set softirq tracepoints handlers\n");
+ return -1;
+ }
+
+ class->work_root = RB_ROOT_CACHED;
+ return 0;
+}
+
+static char *evsel__softirq_name(struct evsel *evsel, u64 num)
+{
+ char *name = NULL;
+ bool found = false;
+ struct tep_print_flag_sym *sym = NULL;
+ struct tep_print_arg *args = evsel->tp_format->print_fmt.args;
+
+ if ((args == NULL) || (args->next == NULL))
+ return NULL;
+
+ /* skip softirq field: "REC->vec" */
+ for (sym = args->next->symbol.symbols; sym != NULL; sym = sym->next) {
+ if ((eval_flag(sym->value) == (unsigned long long)num) &&
+ (strlen(sym->str) != 0)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return NULL;
+
+ name = strdup(sym->str);
+ if (name == NULL) {
+ pr_err("Failed to copy symbol name\n");
+ return NULL;
+ }
+ return name;
+}
+
+static void softirq_work_init(struct kwork_class *class,
+ struct kwork_work *work,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine __maybe_unused)
+{
+ u64 num = evsel__intval(evsel, sample, "vec");
+
+ work->id = num;
+ work->class = class;
+ work->cpu = sample->cpu;
+ work->name = evsel__softirq_name(evsel, num);
+}
+
+static void softirq_work_name(struct kwork_work *work, char *buf, int len)
+{
+ snprintf(buf, len, "(s)%s:%" PRIu64 "", work->name, work->id);
+}
+
+static struct kwork_class kwork_softirq = {
+ .name = "softirq",
+ .type = KWORK_CLASS_SOFTIRQ,
+ .nr_tracepoints = 3,
+ .tp_handlers = softirq_tp_handlers,
+ .class_init = softirq_class_init,
+ .work_init = softirq_work_init,
+ .work_name = softirq_work_name,
+};
+
+static struct kwork_class kwork_workqueue;
+static int process_workqueue_activate_work_event(struct perf_tool *tool,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool);
+
+ if (kwork->tp_handler->raise_event)
+ return kwork->tp_handler->raise_event(kwork, &kwork_workqueue,
+ evsel, sample, machine);
+
+ return 0;
+}
+
+static int process_workqueue_execute_start_event(struct perf_tool *tool,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool);
+
+ if (kwork->tp_handler->entry_event)
+ return kwork->tp_handler->entry_event(kwork, &kwork_workqueue,
+ evsel, sample, machine);
+
+ return 0;
+}
+
+static int process_workqueue_execute_end_event(struct perf_tool *tool,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_kwork *kwork = container_of(tool, struct perf_kwork, tool);
+
+ if (kwork->tp_handler->exit_event)
+ return kwork->tp_handler->exit_event(kwork, &kwork_workqueue,
+ evsel, sample, machine);
+
+ return 0;
+}
+
+const struct evsel_str_handler workqueue_tp_handlers[] = {
+ { "workqueue:workqueue_activate_work", process_workqueue_activate_work_event, },
+ { "workqueue:workqueue_execute_start", process_workqueue_execute_start_event, },
+ { "workqueue:workqueue_execute_end", process_workqueue_execute_end_event, },
+};
+
+static int workqueue_class_init(struct kwork_class *class,
+ struct perf_session *session)
+{
+ if (perf_session__set_tracepoints_handlers(session,
+ workqueue_tp_handlers)) {
+ pr_err("Failed to set workqueue tracepoints handlers\n");
+ return -1;
+ }
+
+ class->work_root = RB_ROOT_CACHED;
+ return 0;
+}
+
+static void workqueue_work_init(struct kwork_class *class,
+ struct kwork_work *work,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ char *modp = NULL;
+ unsigned long long function_addr = evsel__intval(evsel,
+ sample, "function");
+
+ work->class = class;
+ work->cpu = sample->cpu;
+ work->id = evsel__intval(evsel, sample, "work");
+ work->name = function_addr == 0 ? NULL :
+ machine__resolve_kernel_addr(machine, &function_addr, &modp);
+}
+
+static void workqueue_work_name(struct kwork_work *work, char *buf, int len)
+{
+ if (work->name != NULL)
+ snprintf(buf, len, "(w)%s", work->name);
+ else
+ snprintf(buf, len, "(w)0x%" PRIx64, work->id);
+}
+
+static struct kwork_class kwork_workqueue = {
+ .name = "workqueue",
+ .type = KWORK_CLASS_WORKQUEUE,
+ .nr_tracepoints = 3,
+ .tp_handlers = workqueue_tp_handlers,
+ .class_init = workqueue_class_init,
+ .work_init = workqueue_work_init,
+ .work_name = workqueue_work_name,
+};
+
+static struct kwork_class *kwork_class_supported_list[KWORK_CLASS_MAX] = {
+ [KWORK_CLASS_IRQ] = &kwork_irq,
+ [KWORK_CLASS_SOFTIRQ] = &kwork_softirq,
+ [KWORK_CLASS_WORKQUEUE] = &kwork_workqueue,
+};
+
+static void print_separator(int len)
+{
+ printf(" %.*s\n", len, graph_dotted_line);
+}
+
+static int report_print_work(struct perf_kwork *kwork, struct kwork_work *work)
+{
+ int ret = 0;
+ char kwork_name[PRINT_KWORK_NAME_WIDTH];
+ char max_runtime_start[32], max_runtime_end[32];
+ char max_latency_start[32], max_latency_end[32];
+
+ printf(" ");
+
+ /*
+ * kwork name
+ */
+ if (work->class && work->class->work_name) {
+ work->class->work_name(work, kwork_name,
+ PRINT_KWORK_NAME_WIDTH);
+ ret += printf(" %-*s |", PRINT_KWORK_NAME_WIDTH, kwork_name);
+ } else {
+ ret += printf(" %-*s |", PRINT_KWORK_NAME_WIDTH, "");
+ }
+
+ /*
+ * cpu
+ */
+ ret += printf(" %0*d |", PRINT_CPU_WIDTH, work->cpu);
+
+ /*
+ * total runtime
+ */
+ if (kwork->report == KWORK_REPORT_RUNTIME) {
+ ret += printf(" %*.*f ms |",
+ PRINT_RUNTIME_WIDTH, RPINT_DECIMAL_WIDTH,
+ (double)work->total_runtime / NSEC_PER_MSEC);
+ } else if (kwork->report == KWORK_REPORT_LATENCY) { // avg delay
+ ret += printf(" %*.*f ms |",
+ PRINT_LATENCY_WIDTH, RPINT_DECIMAL_WIDTH,
+ (double)work->total_latency /
+ work->nr_atoms / NSEC_PER_MSEC);
+ }
+
+ /*
+ * count
+ */
+ ret += printf(" %*" PRIu64 " |", PRINT_COUNT_WIDTH, work->nr_atoms);
+
+ /*
+ * max runtime, max runtime start, max runtime end
+ */
+ if (kwork->report == KWORK_REPORT_RUNTIME) {
+ timestamp__scnprintf_usec(work->max_runtime_start,
+ max_runtime_start,
+ sizeof(max_runtime_start));
+ timestamp__scnprintf_usec(work->max_runtime_end,
+ max_runtime_end,
+ sizeof(max_runtime_end));
+ ret += printf(" %*.*f ms | %*s s | %*s s |",
+ PRINT_RUNTIME_WIDTH, RPINT_DECIMAL_WIDTH,
+ (double)work->max_runtime / NSEC_PER_MSEC,
+ PRINT_TIMESTAMP_WIDTH, max_runtime_start,
+ PRINT_TIMESTAMP_WIDTH, max_runtime_end);
+ }
+ /*
+ * max delay, max delay start, max delay end
+ */
+ else if (kwork->report == KWORK_REPORT_LATENCY) {
+ timestamp__scnprintf_usec(work->max_latency_start,
+ max_latency_start,
+ sizeof(max_latency_start));
+ timestamp__scnprintf_usec(work->max_latency_end,
+ max_latency_end,
+ sizeof(max_latency_end));
+ ret += printf(" %*.*f ms | %*s s | %*s s |",
+ PRINT_LATENCY_WIDTH, RPINT_DECIMAL_WIDTH,
+ (double)work->max_latency / NSEC_PER_MSEC,
+ PRINT_TIMESTAMP_WIDTH, max_latency_start,
+ PRINT_TIMESTAMP_WIDTH, max_latency_end);
+ }
+
+ printf("\n");
+ return ret;
+}
+
+static int report_print_header(struct perf_kwork *kwork)
+{
+ int ret;
+
+ printf("\n ");
+ ret = printf(" %-*s | %-*s |",
+ PRINT_KWORK_NAME_WIDTH, "Kwork Name",
+ PRINT_CPU_WIDTH, "Cpu");
+
+ if (kwork->report == KWORK_REPORT_RUNTIME) {
+ ret += printf(" %-*s |",
+ PRINT_RUNTIME_HEADER_WIDTH, "Total Runtime");
+ } else if (kwork->report == KWORK_REPORT_LATENCY) {
+ ret += printf(" %-*s |",
+ PRINT_LATENCY_HEADER_WIDTH, "Avg delay");
+ }
+
+ ret += printf(" %-*s |", PRINT_COUNT_WIDTH, "Count");
+
+ if (kwork->report == KWORK_REPORT_RUNTIME) {
+ ret += printf(" %-*s | %-*s | %-*s |",
+ PRINT_RUNTIME_HEADER_WIDTH, "Max runtime",
+ PRINT_TIMESTAMP_HEADER_WIDTH, "Max runtime start",
+ PRINT_TIMESTAMP_HEADER_WIDTH, "Max runtime end");
+ } else if (kwork->report == KWORK_REPORT_LATENCY) {
+ ret += printf(" %-*s | %-*s | %-*s |",
+ PRINT_LATENCY_HEADER_WIDTH, "Max delay",
+ PRINT_TIMESTAMP_HEADER_WIDTH, "Max delay start",
+ PRINT_TIMESTAMP_HEADER_WIDTH, "Max delay end");
+ }
+
+ printf("\n");
+ print_separator(ret);
+ return ret;
+}
+
+static void timehist_print_header(void)
+{
+ /*
+ * header row
+ */
+ printf(" %-*s %-*s %-*s %-*s %-*s %-*s\n",
+ PRINT_TIMESTAMP_WIDTH, "Runtime start",
+ PRINT_TIMESTAMP_WIDTH, "Runtime end",
+ PRINT_TIMEHIST_CPU_WIDTH, "Cpu",
+ PRINT_KWORK_NAME_WIDTH, "Kwork name",
+ PRINT_RUNTIME_WIDTH, "Runtime",
+ PRINT_RUNTIME_WIDTH, "Delaytime");
+
+ /*
+ * units row
+ */
+ printf(" %-*s %-*s %-*s %-*s %-*s %-*s\n",
+ PRINT_TIMESTAMP_WIDTH, "",
+ PRINT_TIMESTAMP_WIDTH, "",
+ PRINT_TIMEHIST_CPU_WIDTH, "",
+ PRINT_KWORK_NAME_WIDTH, "(TYPE)NAME:NUM",
+ PRINT_RUNTIME_WIDTH, "(msec)",
+ PRINT_RUNTIME_WIDTH, "(msec)");
+
+ /*
+ * separator
+ */
+ printf(" %.*s %.*s %.*s %.*s %.*s %.*s\n",
+ PRINT_TIMESTAMP_WIDTH, graph_dotted_line,
+ PRINT_TIMESTAMP_WIDTH, graph_dotted_line,
+ PRINT_TIMEHIST_CPU_WIDTH, graph_dotted_line,
+ PRINT_KWORK_NAME_WIDTH, graph_dotted_line,
+ PRINT_RUNTIME_WIDTH, graph_dotted_line,
+ PRINT_RUNTIME_WIDTH, graph_dotted_line);
+}
+
+static void print_summary(struct perf_kwork *kwork)
+{
+ u64 time = kwork->timeend - kwork->timestart;
+
+ printf(" Total count : %9" PRIu64 "\n", kwork->all_count);
+ printf(" Total runtime (msec) : %9.3f (%.3f%% load average)\n",
+ (double)kwork->all_runtime / NSEC_PER_MSEC,
+ time == 0 ? 0 : (double)kwork->all_runtime / time);
+ printf(" Total time span (msec) : %9.3f\n",
+ (double)time / NSEC_PER_MSEC);
+}
+
+static unsigned long long nr_list_entry(struct list_head *head)
+{
+ struct list_head *pos;
+ unsigned long long n = 0;
+
+ list_for_each(pos, head)
+ n++;
+
+ return n;
+}
+
+static void print_skipped_events(struct perf_kwork *kwork)
+{
+ int i;
+ const char *const kwork_event_str[] = {
+ [KWORK_TRACE_RAISE] = "raise",
+ [KWORK_TRACE_ENTRY] = "entry",
+ [KWORK_TRACE_EXIT] = "exit",
+ };
+
+ if ((kwork->nr_skipped_events[KWORK_TRACE_MAX] != 0) &&
+ (kwork->nr_events != 0)) {
+ printf(" INFO: %.3f%% skipped events (%" PRIu64 " including ",
+ (double)kwork->nr_skipped_events[KWORK_TRACE_MAX] /
+ (double)kwork->nr_events * 100.0,
+ kwork->nr_skipped_events[KWORK_TRACE_MAX]);
+
+ for (i = 0; i < KWORK_TRACE_MAX; i++) {
+ printf("%" PRIu64 " %s%s",
+ kwork->nr_skipped_events[i],
+ kwork_event_str[i],
+ (i == KWORK_TRACE_MAX - 1) ? ")\n" : ", ");
+ }
+ }
+
+ if (verbose > 0)
+ printf(" INFO: use %lld atom pages\n",
+ nr_list_entry(&kwork->atom_page_list));
+}
+
+static void print_bad_events(struct perf_kwork *kwork)
+{
+ if ((kwork->nr_lost_events != 0) && (kwork->nr_events != 0)) {
+ printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
+ (double)kwork->nr_lost_events /
+ (double)kwork->nr_events * 100.0,
+ kwork->nr_lost_events, kwork->nr_events,
+ kwork->nr_lost_chunks);
+ }
+}
+
+static void work_sort(struct perf_kwork *kwork, struct kwork_class *class)
+{
+ struct rb_node *node;
+ struct kwork_work *data;
+ struct rb_root_cached *root = &class->work_root;
+
+ pr_debug("Sorting %s ...\n", class->name);
+ for (;;) {
+ node = rb_first_cached(root);
+ if (!node)
+ break;
+
+ rb_erase_cached(node, root);
+ data = rb_entry(node, struct kwork_work, node);
+ work_insert(&kwork->sorted_work_root,
+ data, &kwork->sort_list);
+ }
+}
+
+static void perf_kwork__sort(struct perf_kwork *kwork)
+{
+ struct kwork_class *class;
+
+ list_for_each_entry(class, &kwork->class_list, list)
+ work_sort(kwork, class);
+}
+
+static int perf_kwork__check_config(struct perf_kwork *kwork,
+ struct perf_session *session)
+{
+ int ret;
+ struct evsel *evsel;
+ struct kwork_class *class;
+
+ static struct trace_kwork_handler report_ops = {
+ .entry_event = report_entry_event,
+ .exit_event = report_exit_event,
+ };
+ static struct trace_kwork_handler latency_ops = {
+ .raise_event = latency_raise_event,
+ .entry_event = latency_entry_event,
+ };
+ static struct trace_kwork_handler timehist_ops = {
+ .raise_event = timehist_raise_event,
+ .entry_event = timehist_entry_event,
+ .exit_event = timehist_exit_event,
+ };
+
+ switch (kwork->report) {
+ case KWORK_REPORT_RUNTIME:
+ kwork->tp_handler = &report_ops;
+ break;
+ case KWORK_REPORT_LATENCY:
+ kwork->tp_handler = &latency_ops;
+ break;
+ case KWORK_REPORT_TIMEHIST:
+ kwork->tp_handler = &timehist_ops;
+ break;
+ default:
+ pr_debug("Invalid report type %d\n", kwork->report);
+ return -1;
+ }
+
+ list_for_each_entry(class, &kwork->class_list, list)
+ if ((class->class_init != NULL) &&
+ (class->class_init(class, session) != 0))
+ return -1;
+
+ if (kwork->cpu_list != NULL) {
+ ret = perf_session__cpu_bitmap(session,
+ kwork->cpu_list,
+ kwork->cpu_bitmap);
+ if (ret < 0) {
+ pr_err("Invalid cpu bitmap\n");
+ return -1;
+ }
+ }
+
+ if (kwork->time_str != NULL) {
+ ret = perf_time__parse_str(&kwork->ptime, kwork->time_str);
+ if (ret != 0) {
+ pr_err("Invalid time span\n");
+ return -1;
+ }
+ }
+
+ list_for_each_entry(evsel, &session->evlist->core.entries, core.node) {
+ if (kwork->show_callchain && !evsel__has_callchain(evsel)) {
+ pr_debug("Samples do not have callchains\n");
+ kwork->show_callchain = 0;
+ symbol_conf.use_callchain = 0;
+ }
+ }
+
+ return 0;
+}
+
+static int perf_kwork__read_events(struct perf_kwork *kwork)
+{
+ int ret = -1;
+ struct perf_session *session = NULL;
+
+ struct perf_data data = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ .force = kwork->force,
+ };
+
+ session = perf_session__new(&data, &kwork->tool);
+ if (IS_ERR(session)) {
+ pr_debug("Error creating perf session\n");
+ return PTR_ERR(session);
+ }
+
+ symbol__init(&session->header.env);
+
+ if (perf_kwork__check_config(kwork, session) != 0)
+ goto out_delete;
+
+ if (session->tevent.pevent &&
+ tep_set_function_resolver(session->tevent.pevent,
+ machine__resolve_kernel_addr,
+ &session->machines.host) < 0) {
+ pr_err("Failed to set libtraceevent function resolver\n");
+ goto out_delete;
+ }
+
+ if (kwork->report == KWORK_REPORT_TIMEHIST)
+ timehist_print_header();
+
+ ret = perf_session__process_events(session);
+ if (ret) {
+ pr_debug("Failed to process events, error %d\n", ret);
+ goto out_delete;
+ }
+
+ kwork->nr_events = session->evlist->stats.nr_events[0];
+ kwork->nr_lost_events = session->evlist->stats.total_lost;
+ kwork->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST];
+
+out_delete:
+ perf_session__delete(session);
+ return ret;
+}
+
+static void process_skipped_events(struct perf_kwork *kwork,
+ struct kwork_work *work)
+{
+ int i;
+ unsigned long long count;
+
+ for (i = 0; i < KWORK_TRACE_MAX; i++) {
+ count = nr_list_entry(&work->atom_list[i]);
+ kwork->nr_skipped_events[i] += count;
+ kwork->nr_skipped_events[KWORK_TRACE_MAX] += count;
+ }
+}
+
+struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
+ struct kwork_class *class,
+ struct kwork_work *key)
+{
+ struct kwork_work *work = NULL;
+
+ work = work_new(key);
+ if (work == NULL)
+ return NULL;
+
+ work_insert(&class->work_root, work, &kwork->cmp_id);
+ return work;
+}
+
+static void sig_handler(int sig)
+{
+ /*
+ * Simply capture termination signal so that
+ * the program can continue after pause returns
+ */
+ pr_debug("Captuer signal %d\n", sig);
+}
+
+static int perf_kwork__report_bpf(struct perf_kwork *kwork)
+{
+ int ret;
+
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+
+ ret = perf_kwork__trace_prepare_bpf(kwork);
+ if (ret)
+ return -1;
+
+ printf("Starting trace, Hit <Ctrl+C> to stop and report\n");
+
+ perf_kwork__trace_start();
+
+ /*
+ * a simple pause, wait here for stop signal
+ */
+ pause();
+
+ perf_kwork__trace_finish();
+
+ perf_kwork__report_read_bpf(kwork);
+
+ perf_kwork__report_cleanup_bpf();
+
+ return 0;
+}
+
+static int perf_kwork__report(struct perf_kwork *kwork)
+{
+ int ret;
+ struct rb_node *next;
+ struct kwork_work *work;
+
+ if (kwork->use_bpf)
+ ret = perf_kwork__report_bpf(kwork);
+ else
+ ret = perf_kwork__read_events(kwork);
+
+ if (ret != 0)
+ return -1;
+
+ perf_kwork__sort(kwork);
+
+ setup_pager();
+
+ ret = report_print_header(kwork);
+ next = rb_first_cached(&kwork->sorted_work_root);
+ while (next) {
+ work = rb_entry(next, struct kwork_work, node);
+ process_skipped_events(kwork, work);
+
+ if (work->nr_atoms != 0) {
+ report_print_work(kwork, work);
+ if (kwork->summary) {
+ kwork->all_runtime += work->total_runtime;
+ kwork->all_count += work->nr_atoms;
+ }
+ }
+ next = rb_next(next);
+ }
+ print_separator(ret);
+
+ if (kwork->summary) {
+ print_summary(kwork);
+ print_separator(ret);
+ }
+
+ print_bad_events(kwork);
+ print_skipped_events(kwork);
+ printf("\n");
+
+ return 0;
+}
+
+typedef int (*tracepoint_handler)(struct perf_tool *tool,
+ struct evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine);
+
+static int perf_kwork__process_tracepoint_sample(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine)
+{
+ int err = 0;
+
+ if (evsel->handler != NULL) {
+ tracepoint_handler f = evsel->handler;
+
+ err = f(tool, evsel, sample, machine);
+ }
+
+ return err;
+}
+
+static int perf_kwork__timehist(struct perf_kwork *kwork)
+{
+ /*
+ * event handlers for timehist option
+ */
+ kwork->tool.comm = perf_event__process_comm;
+ kwork->tool.exit = perf_event__process_exit;
+ kwork->tool.fork = perf_event__process_fork;
+ kwork->tool.attr = perf_event__process_attr;
+ kwork->tool.tracing_data = perf_event__process_tracing_data;
+ kwork->tool.build_id = perf_event__process_build_id;
+ kwork->tool.ordered_events = true;
+ kwork->tool.ordering_requires_timestamps = true;
+ symbol_conf.use_callchain = kwork->show_callchain;
+
+ if (symbol__validate_sym_arguments()) {
+ pr_err("Failed to validate sym arguments\n");
+ return -1;
+ }
+
+ setup_pager();
+
+ return perf_kwork__read_events(kwork);
+}
+
+static void setup_event_list(struct perf_kwork *kwork,
+ const struct option *options,
+ const char * const usage_msg[])
+{
+ int i;
+ struct kwork_class *class;
+ char *tmp, *tok, *str;
+
+ if (kwork->event_list_str == NULL)
+ goto null_event_list_str;
+
+ str = strdup(kwork->event_list_str);
+ for (tok = strtok_r(str, ", ", &tmp);
+ tok; tok = strtok_r(NULL, ", ", &tmp)) {
+ for (i = 0; i < KWORK_CLASS_MAX; i++) {
+ class = kwork_class_supported_list[i];
+ if (strcmp(tok, class->name) == 0) {
+ list_add_tail(&class->list, &kwork->class_list);
+ break;
+ }
+ }
+ if (i == KWORK_CLASS_MAX) {
+ usage_with_options_msg(usage_msg, options,
+ "Unknown --event key: `%s'", tok);
+ }
+ }
+ free(str);
+
+null_event_list_str:
+ /*
+ * config all kwork events if not specified
+ */
+ if (list_empty(&kwork->class_list)) {
+ for (i = 0; i < KWORK_CLASS_MAX; i++) {
+ list_add_tail(&kwork_class_supported_list[i]->list,
+ &kwork->class_list);
+ }
+ }
+
+ pr_debug("Config event list:");
+ list_for_each_entry(class, &kwork->class_list, list)
+ pr_debug(" %s", class->name);
+ pr_debug("\n");
+}
+
+static int perf_kwork__record(struct perf_kwork *kwork,
+ int argc, const char **argv)
+{
+ const char **rec_argv;
+ unsigned int rec_argc, i, j;
+ struct kwork_class *class;
+
+ const char *const record_args[] = {
+ "record",
+ "-a",
+ "-R",
+ "-m", "1024",
+ "-c", "1",
+ };
+
+ rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+
+ list_for_each_entry(class, &kwork->class_list, list)
+ rec_argc += 2 * class->nr_tracepoints;
+
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ if (rec_argv == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(record_args); i++)
+ rec_argv[i] = strdup(record_args[i]);
+
+ list_for_each_entry(class, &kwork->class_list, list) {
+ for (j = 0; j < class->nr_tracepoints; j++) {
+ rec_argv[i++] = strdup("-e");
+ rec_argv[i++] = strdup(class->tp_handlers[j].name);
+ }
+ }
+
+ for (j = 1; j < (unsigned int)argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ BUG_ON(i != rec_argc);
+
+ pr_debug("record comm: ");
+ for (j = 0; j < rec_argc; j++)
+ pr_debug("%s ", rec_argv[j]);
+ pr_debug("\n");
+
+ return cmd_record(i, rec_argv);
+}
+
+int cmd_kwork(int argc, const char **argv)
+{
+ static struct perf_kwork kwork = {
+ .class_list = LIST_HEAD_INIT(kwork.class_list),
+ .tool = {
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .sample = perf_kwork__process_tracepoint_sample,
+ },
+ .atom_page_list = LIST_HEAD_INIT(kwork.atom_page_list),
+ .sort_list = LIST_HEAD_INIT(kwork.sort_list),
+ .cmp_id = LIST_HEAD_INIT(kwork.cmp_id),
+ .sorted_work_root = RB_ROOT_CACHED,
+ .tp_handler = NULL,
+ .profile_name = NULL,
+ .cpu_list = NULL,
+ .time_str = NULL,
+ .force = false,
+ .event_list_str = NULL,
+ .summary = false,
+ .sort_order = NULL,
+ .show_callchain = false,
+ .max_stack = 5,
+ .timestart = 0,
+ .timeend = 0,
+ .nr_events = 0,
+ .nr_lost_chunks = 0,
+ .nr_lost_events = 0,
+ .all_runtime = 0,
+ .all_count = 0,
+ .nr_skipped_events = { 0 },
+ };
+ static const char default_report_sort_order[] = "runtime, max, count";
+ static const char default_latency_sort_order[] = "avg, max, count";
+ const struct option kwork_options[] = {
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show symbol address, etc)"),
+ OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+ "dump raw trace in ASCII"),
+ OPT_STRING('k', "kwork", &kwork.event_list_str, "kwork",
+ "list of kwork to profile (irq, softirq, workqueue, etc)"),
+ OPT_BOOLEAN('f', "force", &kwork.force, "don't complain, do it"),
+ OPT_END()
+ };
+ const struct option report_options[] = {
+ OPT_STRING('s', "sort", &kwork.sort_order, "key[,key2...]",
+ "sort by key(s): runtime, max, count"),
+ OPT_STRING('C', "cpu", &kwork.cpu_list, "cpu",
+ "list of cpus to profile"),
+ OPT_STRING('n', "name", &kwork.profile_name, "name",
+ "event name to profile"),
+ OPT_STRING(0, "time", &kwork.time_str, "str",
+ "Time span for analysis (start,stop)"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
+ OPT_BOOLEAN('S', "with-summary", &kwork.summary,
+ "Show summary with statistics"),
+#ifdef HAVE_BPF_SKEL
+ OPT_BOOLEAN('b', "use-bpf", &kwork.use_bpf,
+ "Use BPF to measure kwork runtime"),
+#endif
+ OPT_PARENT(kwork_options)
+ };
+ const struct option latency_options[] = {
+ OPT_STRING('s', "sort", &kwork.sort_order, "key[,key2...]",
+ "sort by key(s): avg, max, count"),
+ OPT_STRING('C', "cpu", &kwork.cpu_list, "cpu",
+ "list of cpus to profile"),
+ OPT_STRING('n', "name", &kwork.profile_name, "name",
+ "event name to profile"),
+ OPT_STRING(0, "time", &kwork.time_str, "str",
+ "Time span for analysis (start,stop)"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
+#ifdef HAVE_BPF_SKEL
+ OPT_BOOLEAN('b', "use-bpf", &kwork.use_bpf,
+ "Use BPF to measure kwork latency"),
+#endif
+ OPT_PARENT(kwork_options)
+ };
+ const struct option timehist_options[] = {
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+ "file", "kallsyms pathname"),
+ OPT_BOOLEAN('g', "call-graph", &kwork.show_callchain,
+ "Display call chains if present"),
+ OPT_UINTEGER(0, "max-stack", &kwork.max_stack,
+ "Maximum number of functions to display backtrace."),
+ OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+ "Look for files with symbols relative to this directory"),
+ OPT_STRING(0, "time", &kwork.time_str, "str",
+ "Time span for analysis (start,stop)"),
+ OPT_STRING('C', "cpu", &kwork.cpu_list, "cpu",
+ "list of cpus to profile"),
+ OPT_STRING('n', "name", &kwork.profile_name, "name",
+ "event name to profile"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
+ OPT_PARENT(kwork_options)
+ };
+ const char *kwork_usage[] = {
+ NULL,
+ NULL
+ };
+ const char * const report_usage[] = {
+ "perf kwork report [<options>]",
+ NULL
+ };
+ const char * const latency_usage[] = {
+ "perf kwork latency [<options>]",
+ NULL
+ };
+ const char * const timehist_usage[] = {
+ "perf kwork timehist [<options>]",
+ NULL
+ };
+ const char *const kwork_subcommands[] = {
+ "record", "report", "latency", "timehist", NULL
+ };
+
+ argc = parse_options_subcommand(argc, argv, kwork_options,
+ kwork_subcommands, kwork_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (!argc)
+ usage_with_options(kwork_usage, kwork_options);
+
+ setup_event_list(&kwork, kwork_options, kwork_usage);
+ sort_dimension__add(&kwork, "id", &kwork.cmp_id);
+
+ if (strlen(argv[0]) > 2 && strstarts("record", argv[0]))
+ return perf_kwork__record(&kwork, argc, argv);
+ else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
+ kwork.sort_order = default_report_sort_order;
+ if (argc > 1) {
+ argc = parse_options(argc, argv, report_options, report_usage, 0);
+ if (argc)
+ usage_with_options(report_usage, report_options);
+ }
+ kwork.report = KWORK_REPORT_RUNTIME;
+ setup_sorting(&kwork, report_options, report_usage);
+ return perf_kwork__report(&kwork);
+ } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) {
+ kwork.sort_order = default_latency_sort_order;
+ if (argc > 1) {
+ argc = parse_options(argc, argv, latency_options, latency_usage, 0);
+ if (argc)
+ usage_with_options(latency_usage, latency_options);
+ }
+ kwork.report = KWORK_REPORT_LATENCY;
+ setup_sorting(&kwork, latency_options, latency_usage);
+ return perf_kwork__report(&kwork);
+ } else if (strlen(argv[0]) > 2 && strstarts("timehist", argv[0])) {
+ if (argc > 1) {
+ argc = parse_options(argc, argv, timehist_options, timehist_usage, 0);
+ if (argc)
+ usage_with_options(timehist_usage, timehist_options);
+ }
+ kwork.report = KWORK_REPORT_TIMEHIST;
+ return perf_kwork__timehist(&kwork);
+ } else
+ usage_with_options(kwork_usage, kwork_options);
+
+ return 0;
+}
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 468958154ed9..744dd3520584 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -10,7 +10,7 @@
*/
#include "builtin.h"
-#include "util/parse-events.h"
+#include "util/print-events.h"
#include "util/pmu.h"
#include "util/pmu-hybrid.h"
#include "util/debug.h"
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 23a33ac15e68..ea40ae52cd2c 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -9,16 +9,21 @@
#include "util/symbol.h"
#include "util/thread.h"
#include "util/header.h"
+#include "util/target.h"
+#include "util/callchain.h"
+#include "util/lock-contention.h"
#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
#include "util/trace-event.h"
+#include "util/tracepoint.h"
#include "util/debug.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/data.h"
#include "util/string2.h"
+#include "util/map.h"
#include <sys/types.h>
#include <sys/prctl.h>
@@ -32,8 +37,10 @@
#include <linux/kernel.h>
#include <linux/zalloc.h>
#include <linux/err.h>
+#include <linux/stringify.h>
static struct perf_session *session;
+static struct target target;
/* based on kernel/lockdep.c */
#define LOCKHASH_BITS 12
@@ -44,81 +51,23 @@ static struct hlist_head lockhash_table[LOCKHASH_SIZE];
#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
-struct lock_stat {
- struct hlist_node hash_entry;
- struct rb_node rb; /* used for sorting */
-
- u64 addr; /* address of lockdep_map, used as ID */
- char *name; /* for strcpy(), we cannot use const */
-
- unsigned int nr_acquire;
- unsigned int nr_acquired;
- unsigned int nr_contended;
- unsigned int nr_release;
-
- unsigned int nr_readlock;
- unsigned int nr_trylock;
-
- /* these times are in nano sec. */
- u64 avg_wait_time;
- u64 wait_time_total;
- u64 wait_time_min;
- u64 wait_time_max;
-
- int broken; /* flag of blacklist */
- int combined;
-};
-
-/*
- * States of lock_seq_stat
- *
- * UNINITIALIZED is required for detecting first event of acquire.
- * As the nature of lock events, there is no guarantee
- * that the first event for the locks are acquire,
- * it can be acquired, contended or release.
- */
-#define SEQ_STATE_UNINITIALIZED 0 /* initial state */
-#define SEQ_STATE_RELEASED 1
-#define SEQ_STATE_ACQUIRING 2
-#define SEQ_STATE_ACQUIRED 3
-#define SEQ_STATE_READ_ACQUIRED 4
-#define SEQ_STATE_CONTENDED 5
-
-/*
- * MAX_LOCK_DEPTH
- * Imported from include/linux/sched.h.
- * Should this be synchronized?
- */
-#define MAX_LOCK_DEPTH 48
-
-/*
- * struct lock_seq_stat:
- * Place to put on state of one lock sequence
- * 1) acquire -> acquired -> release
- * 2) acquire -> contended -> acquired -> release
- * 3) acquire (with read or try) -> release
- * 4) Are there other patterns?
- */
-struct lock_seq_stat {
- struct list_head list;
- int state;
- u64 prev_event_time;
- u64 addr;
-
- int read_count;
-};
-
-struct thread_stat {
- struct rb_node rb;
-
- u32 tid;
- struct list_head seq_list;
-};
-
static struct rb_root thread_stats;
static bool combine_locks;
static bool show_thread_stats;
+static bool use_bpf;
+static unsigned long bpf_map_entries = 10240;
+
+static enum {
+ LOCK_AGGR_ADDR,
+ LOCK_AGGR_TASK,
+ LOCK_AGGR_CALLER,
+} aggr_mode = LOCK_AGGR_ADDR;
+
+static u64 sched_text_start;
+static u64 sched_text_end;
+static u64 lock_text_start;
+static u64 lock_text_end;
static struct thread_stat *thread_stat_find(u32 tid)
{
@@ -251,6 +200,31 @@ struct lock_key {
struct list_head list;
};
+static void lock_stat_key_print_time(unsigned long long nsec, int len)
+{
+ static const struct {
+ float base;
+ const char *unit;
+ } table[] = {
+ { 1e9 * 3600, "h " },
+ { 1e9 * 60, "m " },
+ { 1e9, "s " },
+ { 1e6, "ms" },
+ { 1e3, "us" },
+ { 0, NULL },
+ };
+
+ for (int i = 0; table[i].unit; i++) {
+ if (nsec < table[i].base)
+ continue;
+
+ pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
+ return;
+ }
+
+ pr_info("%*llu %s", len - 3, nsec, "ns");
+}
+
#define PRINT_KEY(member) \
static void lock_stat_key_print_ ## member(struct lock_key *key, \
struct lock_stat *ls) \
@@ -258,11 +232,18 @@ static void lock_stat_key_print_ ## member(struct lock_key *key, \
pr_info("%*llu", key->len, (unsigned long long)ls->member); \
}
+#define PRINT_TIME(member) \
+static void lock_stat_key_print_ ## member(struct lock_key *key, \
+ struct lock_stat *ls) \
+{ \
+ lock_stat_key_print_time((unsigned long long)ls->member, key->len); \
+}
+
PRINT_KEY(nr_acquired)
PRINT_KEY(nr_contended)
-PRINT_KEY(avg_wait_time)
-PRINT_KEY(wait_time_total)
-PRINT_KEY(wait_time_max)
+PRINT_TIME(avg_wait_time)
+PRINT_TIME(wait_time_total)
+PRINT_TIME(wait_time_max)
static void lock_stat_key_print_wait_time_min(struct lock_key *key,
struct lock_stat *ls)
@@ -272,7 +253,7 @@ static void lock_stat_key_print_wait_time_min(struct lock_key *key,
if (wait_time == ULLONG_MAX)
wait_time = 0;
- pr_info("%*"PRIu64, key->len, wait_time);
+ lock_stat_key_print_time(wait_time, key->len);
}
@@ -288,21 +269,36 @@ static const char *output_fields;
#define DEF_KEY_LOCK(name, header, fn_suffix, len) \
{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
-struct lock_key keys[] = {
+static struct lock_key report_keys[] = {
DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
- DEF_KEY_LOCK(avg_wait, "avg wait (ns)", avg_wait_time, 15),
- DEF_KEY_LOCK(wait_total, "total wait (ns)", wait_time_total, 15),
- DEF_KEY_LOCK(wait_max, "max wait (ns)", wait_time_max, 15),
- DEF_KEY_LOCK(wait_min, "min wait (ns)", wait_time_min, 15),
+ DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
+ DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
+ DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
+ DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
/* extra comparisons much complicated should be here */
{ }
};
-static int select_key(void)
+static struct lock_key contention_keys[] = {
+ DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
+ DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
+ DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
+ DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
+ DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
+
+ /* extra comparisons much complicated should be here */
+ { }
+};
+
+static int select_key(bool contention)
{
int i;
+ struct lock_key *keys = report_keys;
+
+ if (contention)
+ keys = contention_keys;
for (i = 0; keys[i].name; i++) {
if (!strcmp(keys[i].name, sort_key)) {
@@ -320,9 +316,13 @@ static int select_key(void)
return -1;
}
-static int add_output_field(struct list_head *head, char *name)
+static int add_output_field(bool contention, char *name)
{
int i;
+ struct lock_key *keys = report_keys;
+
+ if (contention)
+ keys = contention_keys;
for (i = 0; keys[i].name; i++) {
if (strcmp(keys[i].name, name))
@@ -330,7 +330,7 @@ static int add_output_field(struct list_head *head, char *name)
/* prevent double link */
if (list_empty(&keys[i].list))
- list_add_tail(&keys[i].list, head);
+ list_add_tail(&keys[i].list, &lock_keys);
return 0;
}
@@ -339,10 +339,14 @@ static int add_output_field(struct list_head *head, char *name)
return -1;
}
-static int setup_output_field(const char *str)
+static int setup_output_field(bool contention, const char *str)
{
char *tok, *tmp, *orig;
int i, ret = 0;
+ struct lock_key *keys = report_keys;
+
+ if (contention)
+ keys = contention_keys;
/* no output field given: use all of them */
if (str == NULL) {
@@ -359,7 +363,7 @@ static int setup_output_field(const char *str)
return -ENOMEM;
while ((tok = strsep(&tmp, ",")) != NULL){
- ret = add_output_field(&lock_keys, tok);
+ ret = add_output_field(contention, tok);
if (ret < 0)
break;
}
@@ -451,7 +455,19 @@ static struct lock_stat *pop_from_result(void)
return container_of(node, struct lock_stat, rb);
}
-static struct lock_stat *lock_stat_findnew(u64 addr, const char *name)
+static struct lock_stat *lock_stat_find(u64 addr)
+{
+ struct hlist_head *entry = lockhashentry(addr);
+ struct lock_stat *ret;
+
+ hlist_for_each_entry(ret, entry, hash_entry) {
+ if (ret->addr == addr)
+ return ret;
+ }
+ return NULL;
+}
+
+static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
{
struct hlist_head *entry = lockhashentry(addr);
struct lock_stat *ret, *new;
@@ -466,13 +482,13 @@ static struct lock_stat *lock_stat_findnew(u64 addr, const char *name)
goto alloc_failed;
new->addr = addr;
- new->name = zalloc(sizeof(char) * strlen(name) + 1);
+ new->name = strdup(name);
if (!new->name) {
free(new);
goto alloc_failed;
}
- strcpy(new->name, name);
+ new->flags = flags;
new->wait_time_min = ULLONG_MAX;
hlist_add_head(&new->hash_entry, entry);
@@ -484,17 +500,29 @@ alloc_failed:
}
struct trace_lock_handler {
+ /* it's used on CONFIG_LOCKDEP */
int (*acquire_event)(struct evsel *evsel,
struct perf_sample *sample);
+ /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
int (*acquired_event)(struct evsel *evsel,
struct perf_sample *sample);
+ /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
int (*contended_event)(struct evsel *evsel,
struct perf_sample *sample);
+ /* it's used on CONFIG_LOCKDEP */
int (*release_event)(struct evsel *evsel,
struct perf_sample *sample);
+
+ /* it's used when CONFIG_LOCKDEP is off */
+ int (*contention_begin_event)(struct evsel *evsel,
+ struct perf_sample *sample);
+
+ /* it's used when CONFIG_LOCKDEP is off */
+ int (*contention_end_event)(struct evsel *evsel,
+ struct perf_sample *sample);
};
static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
@@ -542,12 +570,22 @@ static int report_lock_acquire_event(struct evsel *evsel,
const char *name = evsel__strval(evsel, sample, "name");
u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
int flag = evsel__intval(evsel, sample, "flags");
+ u64 key;
- /* abuse ls->addr for tid */
- if (show_thread_stats)
- addr = sample->tid;
+ switch (aggr_mode) {
+ case LOCK_AGGR_ADDR:
+ key = addr;
+ break;
+ case LOCK_AGGR_TASK:
+ key = sample->tid;
+ break;
+ case LOCK_AGGR_CALLER:
+ default:
+ pr_err("Invalid aggregation mode: %d\n", aggr_mode);
+ return -EINVAL;
+ }
- ls = lock_stat_findnew(addr, name);
+ ls = lock_stat_findnew(key, name, 0);
if (!ls)
return -ENOMEM;
@@ -615,11 +653,22 @@ static int report_lock_acquired_event(struct evsel *evsel,
u64 contended_term;
const char *name = evsel__strval(evsel, sample, "name");
u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
+ u64 key;
- if (show_thread_stats)
- addr = sample->tid;
+ switch (aggr_mode) {
+ case LOCK_AGGR_ADDR:
+ key = addr;
+ break;
+ case LOCK_AGGR_TASK:
+ key = sample->tid;
+ break;
+ case LOCK_AGGR_CALLER:
+ default:
+ pr_err("Invalid aggregation mode: %d\n", aggr_mode);
+ return -EINVAL;
+ }
- ls = lock_stat_findnew(addr, name);
+ ls = lock_stat_findnew(key, name, 0);
if (!ls)
return -ENOMEM;
@@ -677,11 +726,22 @@ static int report_lock_contended_event(struct evsel *evsel,
struct lock_seq_stat *seq;
const char *name = evsel__strval(evsel, sample, "name");
u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
+ u64 key;
- if (show_thread_stats)
- addr = sample->tid;
+ switch (aggr_mode) {
+ case LOCK_AGGR_ADDR:
+ key = addr;
+ break;
+ case LOCK_AGGR_TASK:
+ key = sample->tid;
+ break;
+ case LOCK_AGGR_CALLER:
+ default:
+ pr_err("Invalid aggregation mode: %d\n", aggr_mode);
+ return -EINVAL;
+ }
- ls = lock_stat_findnew(addr, name);
+ ls = lock_stat_findnew(key, name, 0);
if (!ls)
return -ENOMEM;
@@ -732,11 +792,22 @@ static int report_lock_release_event(struct evsel *evsel,
struct lock_seq_stat *seq;
const char *name = evsel__strval(evsel, sample, "name");
u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
+ u64 key;
- if (show_thread_stats)
- addr = sample->tid;
+ switch (aggr_mode) {
+ case LOCK_AGGR_ADDR:
+ key = addr;
+ break;
+ case LOCK_AGGR_TASK:
+ key = sample->tid;
+ break;
+ case LOCK_AGGR_CALLER:
+ default:
+ pr_err("Invalid aggregation mode: %d\n", aggr_mode);
+ return -EINVAL;
+ }
- ls = lock_stat_findnew(addr, name);
+ ls = lock_stat_findnew(key, name, 0);
if (!ls)
return -ENOMEM;
@@ -783,6 +854,314 @@ end:
return 0;
}
+bool is_lock_function(struct machine *machine, u64 addr)
+{
+ if (!sched_text_start) {
+ struct map *kmap;
+ struct symbol *sym;
+
+ sym = machine__find_kernel_symbol_by_name(machine,
+ "__sched_text_start",
+ &kmap);
+ if (!sym) {
+ /* to avoid retry */
+ sched_text_start = 1;
+ return false;
+ }
+
+ sched_text_start = kmap->unmap_ip(kmap, sym->start);
+
+ /* should not fail from here */
+ sym = machine__find_kernel_symbol_by_name(machine,
+ "__sched_text_end",
+ &kmap);
+ sched_text_end = kmap->unmap_ip(kmap, sym->start);
+
+ sym = machine__find_kernel_symbol_by_name(machine,
+ "__lock_text_start",
+ &kmap);
+ lock_text_start = kmap->unmap_ip(kmap, sym->start);
+
+ sym = machine__find_kernel_symbol_by_name(machine,
+ "__lock_text_end",
+ &kmap);
+ lock_text_end = kmap->unmap_ip(kmap, sym->start);
+ }
+
+ /* failed to get kernel symbols */
+ if (sched_text_start == 1)
+ return false;
+
+ /* mutex and rwsem functions are in sched text section */
+ if (sched_text_start <= addr && addr < sched_text_end)
+ return true;
+
+ /* spinlock functions are in lock text section */
+ if (lock_text_start <= addr && addr < lock_text_end)
+ return true;
+
+ return false;
+}
+
+static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
+ char *buf, int size)
+{
+ struct thread *thread;
+ struct callchain_cursor *cursor = &callchain_cursor;
+ struct machine *machine = &session->machines.host;
+ struct symbol *sym;
+ int skip = 0;
+ int ret;
+
+ /* lock names will be replaced to task name later */
+ if (show_thread_stats)
+ return -1;
+
+ thread = machine__findnew_thread(machine, -1, sample->pid);
+ if (thread == NULL)
+ return -1;
+
+ /* use caller function name from the callchain */
+ ret = thread__resolve_callchain(thread, cursor, evsel, sample,
+ NULL, NULL, CONTENTION_STACK_DEPTH);
+ if (ret != 0) {
+ thread__put(thread);
+ return -1;
+ }
+
+ callchain_cursor_commit(cursor);
+ thread__put(thread);
+
+ while (true) {
+ struct callchain_cursor_node *node;
+
+ node = callchain_cursor_current(cursor);
+ if (node == NULL)
+ break;
+
+ /* skip first few entries - for lock functions */
+ if (++skip <= CONTENTION_STACK_SKIP)
+ goto next;
+
+ sym = node->ms.sym;
+ if (sym && !is_lock_function(machine, node->ip)) {
+ struct map *map = node->ms.map;
+ u64 offset;
+
+ offset = map->map_ip(map, node->ip) - sym->start;
+
+ if (offset)
+ scnprintf(buf, size, "%s+%#lx", sym->name, offset);
+ else
+ strlcpy(buf, sym->name, size);
+ return 0;
+ }
+
+next:
+ callchain_cursor_advance(cursor);
+ }
+ return -1;
+}
+
+static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
+{
+ struct callchain_cursor *cursor = &callchain_cursor;
+ struct machine *machine = &session->machines.host;
+ struct thread *thread;
+ u64 hash = 0;
+ int skip = 0;
+ int ret;
+
+ thread = machine__findnew_thread(machine, -1, sample->pid);
+ if (thread == NULL)
+ return -1;
+
+ /* use caller function name from the callchain */
+ ret = thread__resolve_callchain(thread, cursor, evsel, sample,
+ NULL, NULL, CONTENTION_STACK_DEPTH);
+ thread__put(thread);
+
+ if (ret != 0)
+ return -1;
+
+ callchain_cursor_commit(cursor);
+
+ while (true) {
+ struct callchain_cursor_node *node;
+
+ node = callchain_cursor_current(cursor);
+ if (node == NULL)
+ break;
+
+ /* skip first few entries - for lock functions */
+ if (++skip <= CONTENTION_STACK_SKIP)
+ goto next;
+
+ if (node->ms.sym && is_lock_function(machine, node->ip))
+ goto next;
+
+ hash ^= hash_long((unsigned long)node->ip, 64);
+
+next:
+ callchain_cursor_advance(cursor);
+ }
+ return hash;
+}
+
+static int report_lock_contention_begin_event(struct evsel *evsel,
+ struct perf_sample *sample)
+{
+ struct lock_stat *ls;
+ struct thread_stat *ts;
+ struct lock_seq_stat *seq;
+ u64 addr = evsel__intval(evsel, sample, "lock_addr");
+ u64 key;
+
+ switch (aggr_mode) {
+ case LOCK_AGGR_ADDR:
+ key = addr;
+ break;
+ case LOCK_AGGR_TASK:
+ key = sample->tid;
+ break;
+ case LOCK_AGGR_CALLER:
+ key = callchain_id(evsel, sample);
+ break;
+ default:
+ pr_err("Invalid aggregation mode: %d\n", aggr_mode);
+ return -EINVAL;
+ }
+
+ ls = lock_stat_find(key);
+ if (!ls) {
+ char buf[128];
+ const char *caller = buf;
+ unsigned int flags = evsel__intval(evsel, sample, "flags");
+
+ if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
+ caller = "Unknown";
+
+ ls = lock_stat_findnew(key, caller, flags);
+ if (!ls)
+ return -ENOMEM;
+ }
+
+ ts = thread_stat_findnew(sample->tid);
+ if (!ts)
+ return -ENOMEM;
+
+ seq = get_seq(ts, addr);
+ if (!seq)
+ return -ENOMEM;
+
+ switch (seq->state) {
+ case SEQ_STATE_UNINITIALIZED:
+ case SEQ_STATE_ACQUIRED:
+ break;
+ case SEQ_STATE_CONTENDED:
+ /*
+ * It can have nested contention begin with mutex spinning,
+ * then we would use the original contention begin event and
+ * ignore the second one.
+ */
+ goto end;
+ case SEQ_STATE_ACQUIRING:
+ case SEQ_STATE_READ_ACQUIRED:
+ case SEQ_STATE_RELEASED:
+ /* broken lock sequence */
+ if (!ls->broken) {
+ ls->broken = 1;
+ bad_hist[BROKEN_CONTENDED]++;
+ }
+ list_del_init(&seq->list);
+ free(seq);
+ goto end;
+ default:
+ BUG_ON("Unknown state of lock sequence found!\n");
+ break;
+ }
+
+ if (seq->state != SEQ_STATE_CONTENDED) {
+ seq->state = SEQ_STATE_CONTENDED;
+ seq->prev_event_time = sample->time;
+ ls->nr_contended++;
+ }
+end:
+ return 0;
+}
+
+static int report_lock_contention_end_event(struct evsel *evsel,
+ struct perf_sample *sample)
+{
+ struct lock_stat *ls;
+ struct thread_stat *ts;
+ struct lock_seq_stat *seq;
+ u64 contended_term;
+ u64 addr = evsel__intval(evsel, sample, "lock_addr");
+ u64 key;
+
+ switch (aggr_mode) {
+ case LOCK_AGGR_ADDR:
+ key = addr;
+ break;
+ case LOCK_AGGR_TASK:
+ key = sample->tid;
+ break;
+ case LOCK_AGGR_CALLER:
+ key = callchain_id(evsel, sample);
+ break;
+ default:
+ pr_err("Invalid aggregation mode: %d\n", aggr_mode);
+ return -EINVAL;
+ }
+
+ ls = lock_stat_find(key);
+ if (!ls)
+ return 0;
+
+ ts = thread_stat_find(sample->tid);
+ if (!ts)
+ return 0;
+
+ seq = get_seq(ts, addr);
+ if (!seq)
+ return -ENOMEM;
+
+ switch (seq->state) {
+ case SEQ_STATE_UNINITIALIZED:
+ goto end;
+ case SEQ_STATE_CONTENDED:
+ contended_term = sample->time - seq->prev_event_time;
+ ls->wait_time_total += contended_term;
+ if (contended_term < ls->wait_time_min)
+ ls->wait_time_min = contended_term;
+ if (ls->wait_time_max < contended_term)
+ ls->wait_time_max = contended_term;
+ break;
+ case SEQ_STATE_ACQUIRING:
+ case SEQ_STATE_ACQUIRED:
+ case SEQ_STATE_READ_ACQUIRED:
+ case SEQ_STATE_RELEASED:
+ /* broken lock sequence */
+ if (!ls->broken) {
+ ls->broken = 1;
+ bad_hist[BROKEN_ACQUIRED]++;
+ }
+ list_del_init(&seq->list);
+ free(seq);
+ goto end;
+ default:
+ BUG_ON("Unknown state of lock sequence found!\n");
+ break;
+ }
+
+ seq->state = SEQ_STATE_ACQUIRED;
+ ls->nr_acquired++;
+ ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
+end:
+ return 0;
+}
+
/* lock oriented handlers */
/* TODO: handlers for CPU oriented, thread oriented */
static struct trace_lock_handler report_lock_ops = {
@@ -790,8 +1169,16 @@ static struct trace_lock_handler report_lock_ops = {
.acquired_event = report_lock_acquired_event,
.contended_event = report_lock_contended_event,
.release_event = report_lock_release_event,
+ .contention_begin_event = report_lock_contention_begin_event,
+ .contention_end_event = report_lock_contention_end_event,
};
+static struct trace_lock_handler contention_lock_ops = {
+ .contention_begin_event = report_lock_contention_begin_event,
+ .contention_end_event = report_lock_contention_end_event,
+};
+
+
static struct trace_lock_handler *trace_handler;
static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
@@ -822,13 +1209,34 @@ static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *
return 0;
}
+static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
+{
+ if (trace_handler->contention_begin_event)
+ return trace_handler->contention_begin_event(evsel, sample);
+ return 0;
+}
+
+static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
+{
+ if (trace_handler->contention_end_event)
+ return trace_handler->contention_end_event(evsel, sample);
+ return 0;
+}
+
static void print_bad_events(int bad, int total)
{
/* Output for debug, this have to be removed */
int i;
+ int broken = 0;
const char *name[4] =
{ "acquire", "acquired", "contended", "release" };
+ for (i = 0; i < BROKEN_MAX; i++)
+ broken += bad_hist[i];
+
+ if (broken == 0 && !verbose)
+ return;
+
pr_info("\n=== output for debug===\n\n");
pr_info("bad: %d, total: %d\n", bad, total);
pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
@@ -1016,6 +1424,83 @@ static void sort_result(void)
}
}
+static const char *get_type_str(struct lock_stat *st)
+{
+ static const struct {
+ unsigned int flags;
+ const char *name;
+ } table[] = {
+ { 0, "semaphore" },
+ { LCB_F_SPIN, "spinlock" },
+ { LCB_F_SPIN | LCB_F_READ, "rwlock:R" },
+ { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W"},
+ { LCB_F_READ, "rwsem:R" },
+ { LCB_F_WRITE, "rwsem:W" },
+ { LCB_F_RT, "rtmutex" },
+ { LCB_F_RT | LCB_F_READ, "rwlock-rt:R" },
+ { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W"},
+ { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R" },
+ { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W" },
+ { LCB_F_MUTEX, "mutex" },
+ { LCB_F_MUTEX | LCB_F_SPIN, "mutex" },
+ };
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(table); i++) {
+ if (table[i].flags == st->flags)
+ return table[i].name;
+ }
+ return "unknown";
+}
+
+static void sort_contention_result(void)
+{
+ sort_result();
+}
+
+static void print_contention_result(void)
+{
+ struct lock_stat *st;
+ struct lock_key *key;
+ int bad, total;
+
+ list_for_each_entry(key, &lock_keys, list)
+ pr_info("%*s ", key->len, key->header);
+
+ if (show_thread_stats)
+ pr_info(" %10s %s\n\n", "pid", "comm");
+ else
+ pr_info(" %10s %s\n\n", "type", "caller");
+
+ bad = total = 0;
+ if (use_bpf)
+ bad = bad_hist[BROKEN_CONTENDED];
+
+ while ((st = pop_from_result())) {
+ total += use_bpf ? st->nr_contended : 1;
+ if (st->broken)
+ bad++;
+
+ list_for_each_entry(key, &lock_keys, list) {
+ key->print(key, st);
+ pr_info(" ");
+ }
+
+ if (show_thread_stats) {
+ struct thread *t;
+ int pid = st->addr;
+
+ /* st->addr contains tid of thread */
+ t = perf_session__findnew(session, pid);
+ pr_info(" %10d %s\n", pid, thread__comm_str(t));
+ continue;
+ }
+
+ pr_info(" %10s %s\n", get_type_str(st), st->name);
+ }
+
+ print_bad_events(bad, total);
+}
+
static const struct evsel_str_handler lock_tracepoints[] = {
{ "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
{ "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
@@ -1023,6 +1508,11 @@ static const struct evsel_str_handler lock_tracepoints[] = {
{ "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
};
+static const struct evsel_str_handler contention_tracepoints[] = {
+ { "lock:contention_begin", evsel__process_contention_begin, },
+ { "lock:contention_end", evsel__process_contention_end, },
+};
+
static bool force;
static int __cmd_report(bool display_info)
@@ -1031,6 +1521,7 @@ static int __cmd_report(bool display_info)
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
+ .mmap = perf_event__process_mmap,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
@@ -1046,6 +1537,8 @@ static int __cmd_report(bool display_info)
return PTR_ERR(session);
}
+ /* for lock function check */
+ symbol_conf.sort_by_name = true;
symbol__init(&session->header.env);
if (!perf_session__has_traces(session, "lock record"))
@@ -1056,12 +1549,20 @@ static int __cmd_report(bool display_info)
goto out_delete;
}
- if (setup_output_field(output_fields))
+ if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
+ pr_err("Initializing perf session tracepoint handlers failed\n");
+ goto out_delete;
+ }
+
+ if (setup_output_field(false, output_fields))
goto out_delete;
- if (select_key())
+ if (select_key(false))
goto out_delete;
+ if (show_thread_stats)
+ aggr_mode = LOCK_AGGR_TASK;
+
err = perf_session__process_events(session);
if (err)
goto out_delete;
@@ -1080,26 +1581,184 @@ out_delete:
return err;
}
+static void sighandler(int sig __maybe_unused)
+{
+}
+
+static int __cmd_contention(int argc, const char **argv)
+{
+ int err = -EINVAL;
+ struct perf_tool eops = {
+ .sample = process_sample_event,
+ .comm = perf_event__process_comm,
+ .mmap = perf_event__process_mmap,
+ .ordered_events = true,
+ };
+ struct perf_data data = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ .force = force,
+ };
+ struct lock_contention con = {
+ .target = &target,
+ .result = &lockhash_table[0],
+ .map_nr_entries = bpf_map_entries,
+ };
+
+ session = perf_session__new(use_bpf ? NULL : &data, &eops);
+ if (IS_ERR(session)) {
+ pr_err("Initializing perf session failed\n");
+ return PTR_ERR(session);
+ }
+
+ /* for lock function check */
+ symbol_conf.sort_by_name = true;
+ symbol__init(&session->header.env);
+
+ if (use_bpf) {
+ err = target__validate(&target);
+ if (err) {
+ char errbuf[512];
+
+ target__strerror(&target, err, errbuf, 512);
+ pr_err("%s\n", errbuf);
+ goto out_delete;
+ }
+
+ signal(SIGINT, sighandler);
+ signal(SIGCHLD, sighandler);
+ signal(SIGTERM, sighandler);
+
+ con.machine = &session->machines.host;
+
+ con.evlist = evlist__new();
+ if (con.evlist == NULL) {
+ err = -ENOMEM;
+ goto out_delete;
+ }
+
+ err = evlist__create_maps(con.evlist, &target);
+ if (err < 0)
+ goto out_delete;
+
+ if (argc) {
+ err = evlist__prepare_workload(con.evlist, &target,
+ argv, false, NULL);
+ if (err < 0)
+ goto out_delete;
+ }
+
+ if (lock_contention_prepare(&con) < 0) {
+ pr_err("lock contention BPF setup failed\n");
+ goto out_delete;
+ }
+ } else {
+ if (!perf_session__has_traces(session, "lock record"))
+ goto out_delete;
+
+ if (!evlist__find_evsel_by_str(session->evlist,
+ "lock:contention_begin")) {
+ pr_err("lock contention evsel not found\n");
+ goto out_delete;
+ }
+
+ if (perf_session__set_tracepoints_handlers(session,
+ contention_tracepoints)) {
+ pr_err("Initializing perf session tracepoint handlers failed\n");
+ goto out_delete;
+ }
+ }
+
+ if (setup_output_field(true, output_fields))
+ goto out_delete;
+
+ if (select_key(true))
+ goto out_delete;
+
+ if (show_thread_stats)
+ aggr_mode = LOCK_AGGR_TASK;
+ else
+ aggr_mode = LOCK_AGGR_CALLER;
+
+ if (use_bpf) {
+ lock_contention_start();
+ if (argc)
+ evlist__start_workload(con.evlist);
+
+ /* wait for signal */
+ pause();
+
+ lock_contention_stop();
+ lock_contention_read(&con);
+
+ /* abuse bad hist stats for lost entries */
+ bad_hist[BROKEN_CONTENDED] = con.lost;
+ } else {
+ err = perf_session__process_events(session);
+ if (err)
+ goto out_delete;
+ }
+
+ setup_pager();
+
+ sort_contention_result();
+ print_contention_result();
+
+out_delete:
+ evlist__delete(con.evlist);
+ lock_contention_finish();
+ perf_session__delete(session);
+ return err;
+}
+
+
static int __cmd_record(int argc, const char **argv)
{
const char *record_args[] = {
"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
};
+ const char *callgraph_args[] = {
+ "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
+ };
unsigned int rec_argc, i, j, ret;
+ unsigned int nr_tracepoints;
+ unsigned int nr_callgraph_args = 0;
const char **rec_argv;
+ bool has_lock_stat = true;
for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
- pr_err("tracepoint %s is not enabled. "
- "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
- lock_tracepoints[i].name);
- return 1;
+ pr_debug("tracepoint %s is not enabled. "
+ "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
+ lock_tracepoints[i].name);
+ has_lock_stat = false;
+ break;
+ }
+ }
+
+ if (has_lock_stat)
+ goto setup_args;
+
+ for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
+ if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
+ pr_err("tracepoint %s is not enabled.\n",
+ contention_tracepoints[i].name);
+ return 1;
}
}
- rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+ nr_callgraph_args = ARRAY_SIZE(callgraph_args);
+
+setup_args:
+ rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
+
+ if (has_lock_stat)
+ nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
+ else
+ nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
+
/* factor of 2 is for -e in front of each tracepoint */
- rec_argc += 2 * ARRAY_SIZE(lock_tracepoints);
+ rec_argc += 2 * nr_tracepoints;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (!rec_argv)
@@ -1108,11 +1767,24 @@ static int __cmd_record(int argc, const char **argv)
for (i = 0; i < ARRAY_SIZE(record_args); i++)
rec_argv[i] = strdup(record_args[i]);
- for (j = 0; j < ARRAY_SIZE(lock_tracepoints); j++) {
+ for (j = 0; j < nr_tracepoints; j++) {
+ const char *ev_name;
+
+ if (has_lock_stat)
+ ev_name = strdup(lock_tracepoints[j].name);
+ else
+ ev_name = strdup(contention_tracepoints[j].name);
+
+ if (!ev_name)
+ return -ENOMEM;
+
rec_argv[i++] = "-e";
- rec_argv[i++] = strdup(lock_tracepoints[j].name);
+ rec_argv[i++] = ev_name;
}
+ for (j = 0; j < nr_callgraph_args; j++, i++)
+ rec_argv[i] = callgraph_args[j];
+
for (j = 1; j < (unsigned int)argc; j++, i++)
rec_argv[i] = argv[j];
@@ -1123,6 +1795,24 @@ static int __cmd_record(int argc, const char **argv)
return ret;
}
+static int parse_map_entry(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ unsigned long *len = (unsigned long *)opt->value;
+ unsigned long val;
+ char *endptr;
+
+ errno = 0;
+ val = strtoul(str, &endptr, 0);
+ if (*endptr != '\0' || errno != 0) {
+ pr_err("invalid BPF map length: %s\n", str);
+ return -1;
+ }
+
+ *len = val;
+ return 0;
+}
+
int cmd_lock(int argc, const char **argv)
{
const struct option lock_options[] = {
@@ -1130,6 +1820,10 @@ int cmd_lock(int argc, const char **argv)
OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+ OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+ "file", "kallsyms pathname"),
OPT_END()
};
@@ -1154,12 +1848,33 @@ int cmd_lock(int argc, const char **argv)
OPT_PARENT(lock_options)
};
+ struct option contention_options[] = {
+ OPT_STRING('k', "key", &sort_key, "wait_total",
+ "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
+ OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
+ "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
+ OPT_BOOLEAN('t', "threads", &show_thread_stats,
+ "show per-thread lock stats"),
+ OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
+ OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
+ "System-wide collection from all CPUs"),
+ OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
+ "List of cpus to monitor"),
+ OPT_STRING('p', "pid", &target.pid, "pid",
+ "Trace on existing process id"),
+ OPT_STRING(0, "tid", &target.tid, "tid",
+ "Trace on existing thread id (exclusive to --pid)"),
+ OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num",
+ "Max number of BPF map entries", parse_map_entry),
+ OPT_PARENT(lock_options)
+ };
+
const char * const info_usage[] = {
"perf lock info [<options>]",
NULL
};
const char *const lock_subcommands[] = { "record", "report", "script",
- "info", NULL };
+ "info", "contention", NULL };
const char *lock_usage[] = {
NULL,
NULL
@@ -1168,6 +1883,10 @@ int cmd_lock(int argc, const char **argv)
"perf lock report [<options>]",
NULL
};
+ const char * const contention_usage[] = {
+ "perf lock contention [<options>]",
+ NULL
+ };
unsigned int i;
int rc = 0;
@@ -1203,6 +1922,20 @@ int cmd_lock(int argc, const char **argv)
/* recycling report_lock_ops */
trace_handler = &report_lock_ops;
rc = __cmd_report(true);
+ } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
+ trace_handler = &contention_lock_ops;
+ sort_key = "wait_total";
+ output_fields = "contended,wait_total,wait_max,avg_wait";
+
+#ifndef HAVE_BPF_SKEL
+ set_option_nobuild(contention_options, 'b', "use-bpf",
+ "no BUILD_BPF_SKEL=1", false);
+#endif
+ if (argc) {
+ argc = parse_options(argc, argv, contention_options,
+ contention_usage, 0);
+ }
+ rc = __cmd_contention(argc, argv);
} else {
usage_with_options(lock_usage, lock_options);
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 9a71f0330137..f87ef43eb820 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1388,6 +1388,11 @@ static struct perf_event_header finished_round_event = {
.type = PERF_RECORD_FINISHED_ROUND,
};
+static struct perf_event_header finished_init_event = {
+ .size = sizeof(struct perf_event_header),
+ .type = PERF_RECORD_FINISHED_INIT,
+};
+
static void record__adjust_affinity(struct record *rec, struct mmap *map)
{
if (rec->opts.affinity != PERF_AFFINITY_SYS &&
@@ -1696,6 +1701,14 @@ static int record__synthesize_workload(struct record *rec, bool tail)
return err;
}
+static int write_finished_init(struct record *rec, bool tail)
+{
+ if (rec->opts.tail_synthesize != tail)
+ return 0;
+
+ return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
+}
+
static int record__synthesize(struct record *rec, bool tail);
static int
@@ -1710,6 +1723,8 @@ record__switch_output(struct record *rec, bool at_exit)
record__aio_mmap_read_sync(rec);
+ write_finished_init(rec, true);
+
record__synthesize(rec, true);
if (target__none(&rec->opts.target))
record__synthesize_workload(rec, true);
@@ -1764,6 +1779,7 @@ record__switch_output(struct record *rec, bool at_exit)
*/
if (target__none(&rec->opts.target))
record__synthesize_workload(rec, false);
+ write_finished_init(rec, false);
}
return fd;
}
@@ -1834,13 +1850,11 @@ static int record__synthesize(struct record *rec, bool tail)
goto out;
/* Synthesize id_index before auxtrace_info */
- if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) {
- err = perf_event__synthesize_id_index(tool,
- process_synthesized_event,
- session->evlist, machine);
- if (err)
- goto out;
- }
+ err = perf_event__synthesize_id_index(tool,
+ process_synthesized_event,
+ session->evlist, machine);
+ if (err)
+ goto out;
if (rec->opts.full_auxtrace) {
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
@@ -1892,14 +1906,18 @@ static int record__synthesize(struct record *rec, bool tail)
err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
machine, opts);
- if (err < 0)
+ if (err < 0) {
pr_warning("Couldn't synthesize bpf events.\n");
+ err = 0;
+ }
if (rec->opts.synth & PERF_SYNTH_CGROUP) {
err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
machine);
- if (err < 0)
+ if (err < 0) {
pr_warning("Couldn't synthesize cgroup events.\n");
+ err = 0;
+ }
}
if (rec->opts.nr_threads_synthesize > 1) {
@@ -2421,6 +2439,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
trigger_ready(&auxtrace_snapshot_trigger);
trigger_ready(&switch_output_trigger);
perf_hooks__invoke_record_start();
+
+ /*
+ * Must write FINISHED_INIT so it will be seen after all other
+ * synthesized user events, but before any regular events.
+ */
+ err = write_finished_init(rec, false);
+ if (err < 0)
+ goto out_child;
+
for (;;) {
unsigned long long hits = thread->samples;
@@ -2565,6 +2592,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
record__waking(rec));
+ write_finished_init(rec, true);
+
if (target__none(&rec->opts.target))
record__synthesize_workload(rec, true);
@@ -3193,6 +3222,8 @@ static struct option __record_options[] = {
OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
"Record the sampled code address (ip) page size"),
OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
+ OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
+ "Record the sample identifier"),
OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
&record.opts.sample_time_set,
"Record the sample timestamps"),
@@ -3331,16 +3362,22 @@ static struct option __record_options[] = {
struct option *record_options = __record_options;
-static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
+static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
{
struct perf_cpu cpu;
int idx;
if (cpu_map__is_dummy(cpus))
- return;
+ return 0;
- perf_cpu_map__for_each_cpu(cpu, idx, cpus)
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ /* Return ENODEV is input cpu is greater than max cpu */
+ if ((unsigned long)cpu.cpu > mask->nbits)
+ return -ENODEV;
set_bit(cpu.cpu, mask->bits);
+ }
+
+ return 0;
}
static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
@@ -3352,7 +3389,9 @@ static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const cha
return -ENOMEM;
bitmap_zero(mask->bits, mask->nbits);
- record__mmap_cpu_mask_init(mask, cpus);
+ if (record__mmap_cpu_mask_init(mask, cpus))
+ return -ENODEV;
+
perf_cpu_map__put(cpus);
return 0;
@@ -3434,7 +3473,12 @@ static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_ma
pr_err("Failed to allocate CPUs mask\n");
return ret;
}
- record__mmap_cpu_mask_init(&cpus_mask, cpus);
+
+ ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
+ if (ret) {
+ pr_err("Failed to init cpu mask\n");
+ goto out_free_cpu_mask;
+ }
ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
if (ret) {
@@ -3675,7 +3719,8 @@ static int record__init_thread_default_masks(struct record *rec, struct perf_cpu
if (ret)
return ret;
- record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus);
+ if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
+ return -ENODEV;
rec->nr_threads = 1;
@@ -3805,6 +3850,9 @@ int cmd_record(int argc, const char **argv)
goto out_opts;
}
+ if (rec->opts.kcore)
+ rec->opts.text_poke = true;
+
if (rec->opts.kcore || record__threads_enabled(rec))
rec->data.is_dir = true;
@@ -3966,8 +4014,15 @@ int cmd_record(int argc, const char **argv)
arch__add_leaf_frame_record_opts(&rec->opts);
err = -ENOMEM;
- if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
- usage_with_options(record_usage, record_options);
+ if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
+ if (rec->opts.target.pid != NULL) {
+ pr_err("Couldn't create thread/CPU maps: %s\n",
+ errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
+ goto out;
+ }
+ else
+ usage_with_options(record_usage, record_options);
+ }
err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
if (err)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index afe4a5539ecc..91ed41cc7d88 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -74,7 +74,9 @@ struct report {
#ifdef HAVE_SLANG_SUPPORT
bool use_tui;
#endif
+#ifdef HAVE_GTK2_SUPPORT
bool use_gtk;
+#endif
bool use_stdio;
bool show_full_info;
bool show_threads;
@@ -1227,7 +1229,9 @@ int cmd_report(int argc, const char **argv)
#ifdef HAVE_SLANG_SUPPORT
OPT_BOOLEAN(0, "tui", &report.use_tui, "Use the TUI interface"),
#endif
+#ifdef HAVE_GTK2_SUPPORT
OPT_BOOLEAN(0, "gtk", &report.use_gtk, "Use the GTK2 interface"),
+#endif
OPT_BOOLEAN(0, "stdio", &report.use_stdio,
"Use the stdio interface"),
OPT_BOOLEAN(0, "header", &report.header, "Show data header."),
@@ -1516,8 +1520,10 @@ repeat:
else if (report.use_tui)
use_browser = 1;
#endif
+#ifdef HAVE_GTK2_SUPPORT
else if (report.use_gtk)
use_browser = 2;
+#endif
/* Force tty output for header output and per-thread stat. */
if (report.header || report.header_only || report.show_threads)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 646bd938927a..a5cf243c337f 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -3355,7 +3355,8 @@ static bool schedstat_events_exposed(void)
static int __cmd_record(int argc, const char **argv)
{
unsigned int rec_argc, i, j;
- const char **rec_argv;
+ char **rec_argv;
+ const char **rec_argv_copy;
const char * const record_args[] = {
"record",
"-a",
@@ -3384,6 +3385,7 @@ static int __cmd_record(int argc, const char **argv)
ARRAY_SIZE(schedstat_args) : 0;
struct tep_event *waking_event;
+ int ret;
/*
* +2 for either "-e", "sched:sched_wakeup" or
@@ -3391,14 +3393,18 @@ static int __cmd_record(int argc, const char **argv)
*/
rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
-
if (rec_argv == NULL)
return -ENOMEM;
+ rec_argv_copy = calloc(rec_argc + 1, sizeof(char *));
+ if (rec_argv_copy == NULL) {
+ free(rec_argv);
+ return -ENOMEM;
+ }
for (i = 0; i < ARRAY_SIZE(record_args); i++)
rec_argv[i] = strdup(record_args[i]);
- rec_argv[i++] = "-e";
+ rec_argv[i++] = strdup("-e");
waking_event = trace_event__tp_format("sched", "sched_waking");
if (!IS_ERR(waking_event))
rec_argv[i++] = strdup("sched:sched_waking");
@@ -3409,11 +3415,19 @@ static int __cmd_record(int argc, const char **argv)
rec_argv[i++] = strdup(schedstat_args[j]);
for (j = 1; j < (unsigned int)argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = strdup(argv[j]);
BUG_ON(i != rec_argc);
- return cmd_record(i, rec_argv);
+ memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc);
+ ret = cmd_record(rec_argc, rec_argv_copy);
+
+ for (i = 0; i < rec_argc; i++)
+ free(rec_argv[i]);
+ free(rec_argv);
+ free(rec_argv_copy);
+
+ return ret;
}
int cmd_sched(int argc, const char **argv)
@@ -3563,7 +3577,7 @@ int cmd_sched(int argc, const char **argv)
if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
return __cmd_record(argc, argv);
- } else if (!strncmp(argv[0], "lat", 3)) {
+ } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) {
sched.tp_handler = &lat_ops;
if (argc > 1) {
argc = parse_options(argc, argv, latency_options, latency_usage, 0);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c689054002cc..029b4330e59b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -125,6 +125,8 @@ enum perf_output_field {
PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34,
PERF_OUTPUT_INS_LAT = 1ULL << 35,
PERF_OUTPUT_BRSTACKINSNLEN = 1ULL << 36,
+ PERF_OUTPUT_MACHINE_PID = 1ULL << 37,
+ PERF_OUTPUT_VCPU = 1ULL << 38,
};
struct perf_script {
@@ -193,6 +195,8 @@ struct output_option {
{.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE},
{.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT},
{.str = "brstackinsnlen", .field = PERF_OUTPUT_BRSTACKINSNLEN},
+ {.str = "machine_pid", .field = PERF_OUTPUT_MACHINE_PID},
+ {.str = "vcpu", .field = PERF_OUTPUT_VCPU},
};
enum {
@@ -441,6 +445,9 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
struct perf_event_attr *attr = &evsel->core.attr;
bool allow_user_set;
+ if (evsel__is_dummy_event(evsel))
+ return 0;
+
if (perf_header__has_feat(&session->header, HEADER_STAT))
return 0;
@@ -562,6 +569,8 @@ static struct evsel *find_first_output_type(struct evlist *evlist,
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_dummy_event(evsel))
+ continue;
if (output_type(evsel->core.attr.type) == (int)type)
return evsel;
}
@@ -746,6 +755,13 @@ static int perf_sample__fprintf_start(struct perf_script *script,
int printed = 0;
char tstr[128];
+ if (PRINT_FIELD(MACHINE_PID) && sample->machine_pid)
+ printed += fprintf(fp, "VM:%5d ", sample->machine_pid);
+
+ /* Print VCPU only for guest events i.e. with machine_pid */
+ if (PRINT_FIELD(VCPU) && sample->machine_pid)
+ printed += fprintf(fp, "VCPU:%03d ", sample->vcpu);
+
if (PRINT_FIELD(COMM)) {
const char *comm = thread ? thread__comm_str(thread) : ":-1";
@@ -3633,6 +3649,9 @@ int process_thread_map_event(struct perf_session *session,
struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
+ if (dump_trace)
+ perf_event__fprintf_thread_map(event, stdout);
+
if (script->threads) {
pr_warning("Extra thread map event, ignoring.\n");
return 0;
@@ -3652,6 +3671,9 @@ int process_cpu_map_event(struct perf_session *session,
struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
+ if (dump_trace)
+ perf_event__fprintf_cpu_map(event, stdout);
+
if (script->cpus) {
pr_warning("Extra cpu map event, ignoring.\n");
return 0;
@@ -3740,6 +3762,7 @@ int cmd_script(int argc, const char **argv)
bool header = false;
bool header_only = false;
bool script_started = false;
+ bool unsorted_dump = false;
char *rec_script_path = NULL;
char *rep_script_path = NULL;
struct perf_session *session;
@@ -3788,6 +3811,8 @@ int cmd_script(int argc, const char **argv)
const struct option options[] = {
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
+ OPT_BOOLEAN(0, "dump-unsorted-raw-trace", &unsorted_dump,
+ "dump unsorted raw trace in ASCII"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('L', "Latency", &latency_format,
@@ -3841,7 +3866,7 @@ int cmd_script(int argc, const char **argv)
OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
"Run xed disassembler on output", parse_xed),
OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL,
- "Decode calls from from itrace", parse_call_trace),
+ "Decode calls from itrace", parse_call_trace),
OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
"Decode calls and returns from itrace", parse_callret_trace),
OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
@@ -3950,6 +3975,11 @@ int cmd_script(int argc, const char **argv)
data.path = input_name;
data.force = symbol_conf.force;
+ if (unsorted_dump) {
+ dump_trace = true;
+ script.tool.ordered_events = false;
+ }
+
if (symbol__validate_sym_arguments())
return -1;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d2ecd4d29624..0b4a62e4ff67 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -826,6 +826,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
}
evlist__for_each_entry(evsel_list, counter) {
+ counter->reset_group = false;
if (bpf_counter__load(counter, &target))
return -1;
if (!evsel__is_bpf(counter))
@@ -966,18 +967,18 @@ try_again_reset:
return err;
}
- /*
- * Enable counters and exec the command:
- */
- if (forks) {
- err = enable_counters();
- if (err)
- return -1;
+ err = enable_counters();
+ if (err)
+ return -1;
+
+ /* Exec the command, if any */
+ if (forks)
evlist__start_workload(evsel_list);
- t0 = rdclock();
- clock_gettime(CLOCK_MONOTONIC, &ref_time);
+ t0 = rdclock();
+ clock_gettime(CLOCK_MONOTONIC, &ref_time);
+ if (forks) {
if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
status = dispatch_events(forks, timeout, interval, &times);
if (child_pid != -1) {
@@ -995,13 +996,6 @@ try_again_reset:
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
} else {
- err = enable_counters();
- if (err)
- return -1;
-
- t0 = rdclock();
- clock_gettime(CLOCK_MONOTONIC, &ref_time);
-
status = dispatch_events(forks, timeout, interval, &times);
}
@@ -1256,6 +1250,8 @@ static struct option stat_options[] = {
"Merge identical named hybrid events"),
OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
"print counts with custom separator"),
+ OPT_BOOLEAN('j', "json-output", &stat_config.json_output,
+ "print counts in JSON format"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
"monitor event in cgroup name only", parse_stat_cgroups),
OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
@@ -1442,6 +1438,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
+ case AGGR_MAX:
default:
return NULL;
}
@@ -1466,6 +1463,7 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
+ case AGGR_MAX:
default:
return NULL;
}
@@ -1616,6 +1614,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
+ case AGGR_MAX:
default:
return NULL;
}
@@ -1636,6 +1635,7 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
+ case AGGR_MAX:
default:
return NULL;
}
@@ -1686,12 +1686,6 @@ static int add_default_attributes(void)
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
};
- struct perf_event_attr default_sw_attrs[] = {
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
-};
/*
* Detailed stats (-d), covering the L1 and last level data caches:
@@ -1783,6 +1777,9 @@ static int add_default_attributes(void)
(PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
};
+
+ struct perf_event_attr default_null_attrs[] = {};
+
/* Set attrs if no event is selected and !null_run: */
if (stat_config.null_run)
return 0;
@@ -1861,22 +1858,11 @@ static int add_default_attributes(void)
unsigned int max_level = 1;
char *str = NULL;
bool warn = false;
- const char *pmu_name = "cpu";
+ const char *pmu_name = arch_get_topdown_pmu_name(evsel_list, true);
if (!force_metric_only)
stat_config.metric_only = true;
- if (perf_pmu__has_hybrid()) {
- if (!evsel_list->hybrid_pmu_name) {
- pr_warning("WARNING: default to use cpu_core topdown events\n");
- evsel_list->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
- }
-
- pmu_name = evsel_list->hybrid_pmu_name;
- if (!pmu_name)
- return -1;
- }
-
if (pmu_have_event(pmu_name, topdown_metric_L2_attrs[5])) {
metric_attrs = topdown_metric_L2_attrs;
max_level = 2;
@@ -1946,31 +1932,10 @@ setup_metrics:
free(str);
}
- if (!evsel_list->core.nr_entries) {
- if (perf_pmu__has_hybrid()) {
- struct parse_events_error errinfo;
- const char *hybrid_str = "cycles,instructions,branches,branch-misses";
-
- if (target__has_cpu(&target))
- default_sw_attrs[0].config = PERF_COUNT_SW_CPU_CLOCK;
-
- if (evlist__add_default_attrs(evsel_list,
- default_sw_attrs) < 0) {
- return -1;
- }
-
- parse_events_error__init(&errinfo);
- err = parse_events(evsel_list, hybrid_str, &errinfo);
- if (err) {
- fprintf(stderr,
- "Cannot set up hybrid events %s: %d\n",
- hybrid_str, err);
- parse_events_error__print(&errinfo, hybrid_str);
- }
- parse_events_error__exit(&errinfo);
- return err ? -1 : 0;
- }
+ if (!stat_config.topdown_level)
+ stat_config.topdown_level = TOPDOWN_MAX_LEVEL;
+ if (!evsel_list->core.nr_entries) {
if (target__has_cpu(&target))
default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
@@ -1986,9 +1951,8 @@ setup_metrics:
}
if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
return -1;
-
- stat_config.topdown_level = TOPDOWN_MAX_LEVEL;
- if (arch_evlist__add_default_attrs(evsel_list) < 0)
+ /* Platform specific attrs */
+ if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0)
return -1;
}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index afce731cec16..e2e9ad929baf 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -36,6 +36,7 @@
#include "util/data.h"
#include "util/debug.h"
#include "util/string2.h"
+#include "util/tracepoint.h"
#include <linux/err.h>
#ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 897fc504918b..0bd9d01c0df9 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -53,6 +53,7 @@
#include "trace-event.h"
#include "util/parse-events.h"
#include "util/bpf-loader.h"
+#include "util/tracepoint.h"
#include "callchain.h"
#include "print_binary.h"
#include "string2.h"
@@ -2748,7 +2749,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
/*
* Suppress this argument if its value is zero and
- * and we don't have a string associated in an
+ * we don't have a string associated in an
* strarray for it.
*/
if (val == 0 &&
@@ -4280,6 +4281,7 @@ static int trace__replay(struct trace *trace)
goto out;
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
+ trace->syscalls.events.sys_enter = evsel;
/* older kernels have syscalls tp versus raw_syscalls */
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
@@ -4292,6 +4294,7 @@ static int trace__replay(struct trace *trace)
}
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
+ trace->syscalls.events.sys_exit = evsel;
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
if (evsel &&
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 7303e80a639c..d03afea86217 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -38,6 +38,7 @@ int cmd_mem(int argc, const char **argv);
int cmd_data(int argc, const char **argv);
int cmd_ftrace(int argc, const char **argv);
int cmd_daemon(int argc, const char **argv);
+int cmd_kwork(int argc, const char **argv);
int find_scripts(char **scripts_array, char **scripts_path_array, int num,
int pathlen);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 4aa034aefa33..8fcab5ad00c5 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -18,6 +18,7 @@ perf-iostat mainporcelain common
perf-kallsyms mainporcelain common
perf-kmem mainporcelain common
perf-kvm mainporcelain common
+perf-kwork mainporcelain common
perf-list mainporcelain common
perf-lock mainporcelain common
perf-mem mainporcelain common
diff --git a/tools/perf/dlfilters/dlfilter-show-cycles.c b/tools/perf/dlfilters/dlfilter-show-cycles.c
index 9eccc97bff82..6d47298ebe9f 100644
--- a/tools/perf/dlfilters/dlfilter-show-cycles.c
+++ b/tools/perf/dlfilters/dlfilter-show-cycles.c
@@ -98,9 +98,9 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo
static void print_vals(__u64 cycles, __u64 delta)
{
if (delta)
- printf("%10llu %10llu ", cycles, delta);
+ printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta);
else
- printf("%10llu %10s ", cycles, "");
+ printf("%10llu %10s ", (unsigned long long)cycles, "");
}
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
diff --git a/tools/perf/include/perf/perf_dlfilter.h b/tools/perf/include/perf/perf_dlfilter.h
index 3eef03d661b4..a26e2f129f83 100644
--- a/tools/perf/include/perf/perf_dlfilter.h
+++ b/tools/perf/include/perf/perf_dlfilter.h
@@ -9,6 +9,12 @@
#include <linux/perf_event.h>
#include <linux/types.h>
+/*
+ * The following macro can be used to determine if this header defines
+ * perf_dlfilter_sample machine_pid and vcpu.
+ */
+#define PERF_DLFILTER_HAS_MACHINE_PID
+
/* Definitions for perf_dlfilter_sample flags */
enum {
PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0,
@@ -62,6 +68,8 @@ struct perf_dlfilter_sample {
__u64 raw_callchain_nr; /* Number of raw_callchain entries */
const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
const char *event;
+ __s32 machine_pid;
+ __s32 vcpu;
};
/*
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 0170cb0819d6..c21b3973641a 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -91,6 +91,7 @@ static struct cmd_struct commands[] = {
{ "data", cmd_data, 0 },
{ "ftrace", cmd_ftrace, 0 },
{ "daemon", cmd_daemon, 0 },
+ { "kwork", cmd_kwork, 0 },
};
struct pager_config {
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index a055dee6a46a..04ef95174660 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -1,7 +1,3 @@
-hostprogs := jevents
-
-jevents-y += json.o jsmn.o jevents.o
-HOSTCFLAGS_jevents.o = -I$(srctree)/tools/include
pmu-events-y += pmu-events.o
JDIR = pmu-events/arch/$(SRCARCH)
JSON = $(shell [ -d $(JDIR) ] && \
@@ -9,10 +5,23 @@ JSON = $(shell [ -d $(JDIR) ] && \
JDIR_TEST = pmu-events/arch/test
JSON_TEST = $(shell [ -d $(JDIR_TEST) ] && \
find $(JDIR_TEST) -name '*.json')
+JEVENTS_PY = pmu-events/jevents.py
+
+ifeq ($(JEVENTS_ARCH),)
+JEVENTS_ARCH=$(SRCARCH)
+endif
#
# Locate/process JSON files in pmu-events/arch/
# directory and create tables in pmu-events.c.
#
-$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS)
- $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
+
+ifeq ($(NO_JEVENTS),1)
+$(OUTPUT)pmu-events/pmu-events.c: pmu-events/empty-pmu-events.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)cp $< $@
+else
+$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY)
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) pmu-events/arch $@
+endif
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index ed29e4433c67..406f6edd4e12 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -27,7 +27,9 @@
0x00000000410fd0d0,v1,arm/cortex-a77,core
0x00000000410fd400,v1,arm/neoverse-v1,core
0x00000000410fd410,v1,arm/cortex-a78,core
+0x00000000410fd4b0,v1,arm/cortex-a78,core
0x00000000410fd440,v1,arm/cortex-x1,core
+0x00000000410fd4c0,v1,arm/cortex-x1,core
0x00000000410fd460,v1,arm/cortex-a510,core
0x00000000410fd470,v1,arm/cortex-a710,core
0x00000000410fd480,v1,arm/cortex-x2,core
diff --git a/tools/perf/pmu-events/arch/s390/cf_z16/pai.json b/tools/perf/pmu-events/arch/s390/cf_z16/pai.json
new file mode 100644
index 000000000000..cf8563d059b9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z16/pai.json
@@ -0,0 +1,1101 @@
+[
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4096",
+ "EventName": "CRYPTO_ALL",
+ "BriefDescription": "CRYPTO ALL",
+ "PublicDescription": "Sums of all non zero cryptography counters"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4097",
+ "EventName": "KM_DEA",
+ "BriefDescription": "KM DEA",
+ "PublicDescription": "KM-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4098",
+ "EventName": "KM_TDEA_128",
+ "BriefDescription": "KM TDEA 128",
+ "PublicDescription": "KM-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4099",
+ "EventName": "KM_TDEA_192",
+ "BriefDescription": "KM TDEA 192",
+ "PublicDescription": "KM-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4100",
+ "EventName": "KM_ENCRYPTED_DEA",
+ "BriefDescription": "KM ENCRYPTED DEA",
+ "PublicDescription": "KM-Encrypted-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4101",
+ "EventName": "KM_ENCRYPTED_TDEA_128",
+ "BriefDescription": "KM ENCRYPTED TDEA 128",
+ "PublicDescription": "KM-Encrypted-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4102",
+ "EventName": "KM_ENCRYPTED_TDEA_192",
+ "BriefDescription": "KM ENCRYPTED TDEA 192",
+ "PublicDescription": "KM-Encrypted-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4103",
+ "EventName": "KM_AES_128",
+ "BriefDescription": "KM AES 128",
+ "PublicDescription": "KM-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4104",
+ "EventName": "KM_AES_192",
+ "BriefDescription": "KM AES 192",
+ "PublicDescription": "KM-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4105",
+ "EventName": "KM_AES_256",
+ "BriefDescription": "KM AES 256",
+ "PublicDescription": "KM-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4106",
+ "EventName": "KM_ENCRYPTED_AES_128",
+ "BriefDescription": "KM ENCRYPTED AES 128",
+ "PublicDescription": "KM-Encrypted-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4107",
+ "EventName": "KM_ENCRYPTED_AES_192",
+ "BriefDescription": "KM ENCRYPTED AES 192",
+ "PublicDescription": "KM-Encrypted-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4108",
+ "EventName": "KM_ENCRYPTED_AES_256",
+ "BriefDescription": "KM ENCRYPTED AES 256",
+ "PublicDescription": "KM-Encrypted-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4109",
+ "EventName": "KM_XTS_AES_128",
+ "BriefDescription": "KM XTS AES 128",
+ "PublicDescription": "KM-XTS-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4110",
+ "EventName": "KM_XTS_AES_256",
+ "BriefDescription": "KM XTS AES 256",
+ "PublicDescription": "KM-XTS-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4111",
+ "EventName": "KM_XTS_ENCRYPTED_AES_128",
+ "BriefDescription": "KM XTS ENCRYPTED AES 128",
+ "PublicDescription": "KM-XTS-Encrypted-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4112",
+ "EventName": "KM_XTS_ENCRYPTED_AES_256",
+ "BriefDescription": "KM XTS ENCRYPTED AES 256",
+ "PublicDescription": "KM-XTS-Encrypted-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4113",
+ "EventName": "KMC_DEA",
+ "BriefDescription": "KMC DEA",
+ "PublicDescription": "KMC-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4114",
+ "EventName": "KMC_TDEA_128",
+ "BriefDescription": "KMC TDEA 128",
+ "PublicDescription": "KMC-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4115",
+ "EventName": "KMC_TDEA_192",
+ "BriefDescription": "KMC TDEA 192",
+ "PublicDescription": "KMC-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4116",
+ "EventName": "KMC_ENCRYPTED_DEA",
+ "BriefDescription": "KMC ENCRYPTED DEA",
+ "PublicDescription": "KMC-Encrypted-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4117",
+ "EventName": "KMC_ENCRYPTED_TDEA_128",
+ "BriefDescription": "KMC ENCRYPTED TDEA 128",
+ "PublicDescription": "KMC-Encrypted-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4118",
+ "EventName": "KMC_ENCRYPTED_TDEA_192",
+ "BriefDescription": "KMC ENCRYPTED TDEA 192",
+ "PublicDescription": "KMC-Encrypted-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4119",
+ "EventName": "KMC_AES_128",
+ "BriefDescription": "KMC AES 128",
+ "PublicDescription": "KMC-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4120",
+ "EventName": "KMC_AES_192",
+ "BriefDescription": "KMC AES 192",
+ "PublicDescription": "KMC-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4121",
+ "EventName": "KMC_AES_256",
+ "BriefDescription": "KMC AES 256",
+ "PublicDescription": "KMC-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4122",
+ "EventName": "KMC_ENCRYPTED_AES_128",
+ "BriefDescription": "KMC ENCRYPTED AES 128",
+ "PublicDescription": "KMC-Encrypted-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4123",
+ "EventName": "KMC_ENCRYPTED_AES_192",
+ "BriefDescription": "KMC ENCRYPTED AES 192",
+ "PublicDescription": "KMC-Encrypted-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4124",
+ "EventName": "KMC_ENCRYPTED_AES_256",
+ "BriefDescription": "KMC ENCRYPTED AES 256",
+ "PublicDescription": "KMC-Encrypted-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4125",
+ "EventName": "KMC_PRNG",
+ "BriefDescription": "KMC PRNG",
+ "PublicDescription": "KMC-PRNG function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4126",
+ "EventName": "KMA_GCM_AES_128",
+ "BriefDescription": "KMA GCM AES 128",
+ "PublicDescription": "KMA-GCM-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4127",
+ "EventName": "KMA_GCM_AES_192",
+ "BriefDescription": "KMA GCM AES 192",
+ "PublicDescription": "KMA-GCM-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4128",
+ "EventName": "KMA_GCM_AES_256",
+ "BriefDescription": "KMA GCM AES 256",
+ "PublicDescription": "KMA-GCM-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4129",
+ "EventName": "KMA_GCM_ENCRYPTED_AES_128",
+ "BriefDescription": "KMA GCM ENCRYPTED AES 128",
+ "PublicDescription": "KMA-GCM-Encrypted-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4130",
+ "EventName": "KMA_GCM_ENCRYPTED_AES_192",
+ "BriefDescription": "KMA GCM ENCRYPTED AES 192",
+ "PublicDescription": "KMA-GCM-Encrypted-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4131",
+ "EventName": "KMA_GCM_ENCRYPTED_AES_256",
+ "BriefDescription": "KMA GCM ENCRYPTED AES 256",
+ "PublicDescription": "KMA-GCM-Encrypted-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4132",
+ "EventName": "KMF_DEA",
+ "BriefDescription": "KMF DEA",
+ "PublicDescription": "KMF-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4133",
+ "EventName": "KMF_TDEA_128",
+ "BriefDescription": "KMF TDEA 128",
+ "PublicDescription": "KMF-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4134",
+ "EventName": "KMF_TDEA_192",
+ "BriefDescription": "KMF TDEA 192",
+ "PublicDescription": "KMF-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4135",
+ "EventName": "KMF_ENCRYPTED_DEA",
+ "BriefDescription": "KMF ENCRYPTED DEA",
+ "PublicDescription": "KMF-Encrypted-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4136",
+ "EventName": "KMF_ENCRYPTED_TDEA_128",
+ "BriefDescription": "KMF ENCRYPTED TDEA 128",
+ "PublicDescription": "KMF-Encrypted-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4137",
+ "EventName": "KMF_ENCRYPTED_TDEA_192",
+ "BriefDescription": "KMF ENCRYPTED TDEA 192",
+ "PublicDescription": "KMF-Encrypted-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4138",
+ "EventName": "KMF_AES_128",
+ "BriefDescription": "KMF AES 128",
+ "PublicDescription": "KMF-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4139",
+ "EventName": "KMF_AES_192",
+ "BriefDescription": "KMF AES 192",
+ "PublicDescription": "KMF-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4140",
+ "EventName": "KMF_AES_256",
+ "BriefDescription": "KMF AES 256",
+ "PublicDescription": "KMF-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4141",
+ "EventName": "KMF_ENCRYPTED_AES_128",
+ "BriefDescription": "KMF ENCRYPTED AES 128",
+ "PublicDescription": "KMF-Encrypted-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4142",
+ "EventName": "KMF_ENCRYPTED_AES_192",
+ "BriefDescription": "KMF ENCRYPTED AES 192",
+ "PublicDescription": "KMF-Encrypted-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4143",
+ "EventName": "KMF_ENCRYPTED_AES_256",
+ "BriefDescription": "KMF ENCRYPTED AES 256",
+ "PublicDescription": "KMF-Encrypted-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4144",
+ "EventName": "KMCTR_DEA",
+ "BriefDescription": "KMCTR DEA",
+ "PublicDescription": "KMCTR-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4145",
+ "EventName": "KMCTR_TDEA_128",
+ "BriefDescription": "KMCTR TDEA 128",
+ "PublicDescription": "KMCTR-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4146",
+ "EventName": "KMCTR_TDEA_192",
+ "BriefDescription": "KMCTR TDEA 192",
+ "PublicDescription": "KMCTR-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4147",
+ "EventName": "KMCTR_ENCRYPTED_DEA",
+ "BriefDescription": "KMCTR ENCRYPTED DEA",
+ "PublicDescription": "KMCTR-Encrypted-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4148",
+ "EventName": "KMCTR_ENCRYPTED_TDEA_128",
+ "BriefDescription": "KMCTR ENCRYPTED TDEA 128",
+ "PublicDescription": "KMCTR-Encrypted-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4149",
+ "EventName": "KMCTR_ENCRYPTED_TDEA_192",
+ "BriefDescription": "KMCTR ENCRYPTED TDEA 192",
+ "PublicDescription": "KMCTR-Encrypted-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4150",
+ "EventName": "KMCTR_AES_128",
+ "BriefDescription": "KMCTR AES 128",
+ "PublicDescription": "KMCTR-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4151",
+ "EventName": "KMCTR_AES_192",
+ "BriefDescription": "KMCTR AES 192",
+ "PublicDescription": "KMCTR-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4152",
+ "EventName": "KMCTR_AES_256",
+ "BriefDescription": "KMCTR AES 256",
+ "PublicDescription": "KMCTR-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4153",
+ "EventName": "KMCTR_ENCRYPTED_AES_128",
+ "BriefDescription": "KMCTR ENCRYPTED AES 128",
+ "PublicDescription": "KMCTR-Encrypted-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4154",
+ "EventName": "KMCTR_ENCRYPTED_AES_192",
+ "BriefDescription": "KMCTR ENCRYPTED AES 192",
+ "PublicDescription": "KMCTR-Encrypted-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4155",
+ "EventName": "KMCTR_ENCRYPTED_AES_256",
+ "BriefDescription": "KMCTR ENCRYPTED AES 256",
+ "PublicDescription": "KMCTR-Encrypted-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4156",
+ "EventName": "KMO_DEA",
+ "BriefDescription": "KMO DEA",
+ "PublicDescription": "KMO-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4157",
+ "EventName": "KMO_TDEA_128",
+ "BriefDescription": "KMO TDEA 128",
+ "PublicDescription": "KMO-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4158",
+ "EventName": "KMO_TDEA_192",
+ "BriefDescription": "KMO TDEA 192",
+ "PublicDescription": "KMO-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4159",
+ "EventName": "KMO_ENCRYPTED_DEA",
+ "BriefDescription": "KMO ENCRYPTED DEA",
+ "PublicDescription": "KMO-Encrypted-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4160",
+ "EventName": "KMO_ENCRYPTED_TDEA_128",
+ "BriefDescription": "KMO ENCRYPTED TDEA 128",
+ "PublicDescription": "KMO-Encrypted-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4161",
+ "EventName": "KMO_ENCRYPTED_TDEA_192",
+ "BriefDescription": "KMO ENCRYPTED TDEA 192",
+ "PublicDescription": "KMO-Encrypted-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4162",
+ "EventName": "KMO_AES_128",
+ "BriefDescription": "KMO AES 128",
+ "PublicDescription": "KMO-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4163",
+ "EventName": "KMO_AES_192",
+ "BriefDescription": "KMO AES 192",
+ "PublicDescription": "KMO-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4164",
+ "EventName": "KMO_AES_256",
+ "BriefDescription": "KMO AES 256",
+ "PublicDescription": "KMO-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4165",
+ "EventName": "KMO_ENCRYPTED_AES_128",
+ "BriefDescription": "KMO ENCRYPTED AES 128",
+ "PublicDescription": "KMO-Encrypted-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4166",
+ "EventName": "KMO_ENCRYPTED_AES_192",
+ "BriefDescription": "KMO ENCRYPTED AES 192",
+ "PublicDescription": "KMO-Encrypted-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4167",
+ "EventName": "KMO_ENCRYPTED_AES_256",
+ "BriefDescription": "KMO ENCRYPTED AES 256",
+ "PublicDescription": "KMO-Encrypted-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4168",
+ "EventName": "KIMD_SHA_1",
+ "BriefDescription": "KIMD SHA 1",
+ "PublicDescription": "KIMD-SHA-1 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4169",
+ "EventName": "KIMD_SHA_256",
+ "BriefDescription": "KIMD SHA 256",
+ "PublicDescription": "KIMD-SHA-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4170",
+ "EventName": "KIMD_SHA_512",
+ "BriefDescription": "KIMD SHA 512",
+ "PublicDescription": "KIMD-SHA-512 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4171",
+ "EventName": "KIMD_SHA3_224",
+ "BriefDescription": "KIMD SHA3 224",
+ "PublicDescription": "KIMD-SHA3-224 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4172",
+ "EventName": "KIMD_SHA3_256",
+ "BriefDescription": "KIMD SHA3 256",
+ "PublicDescription": "KIMD-SHA3-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4173",
+ "EventName": "KIMD_SHA3_384",
+ "BriefDescription": "KIMD SHA3 384",
+ "PublicDescription": "KIMD-SHA3-384 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4174",
+ "EventName": "KIMD_SHA3_512",
+ "BriefDescription": "KIMD SHA3 512",
+ "PublicDescription": "KIMD-SHA3-512 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4175",
+ "EventName": "KIMD_SHAKE_128",
+ "BriefDescription": "KIMD SHAKE 128",
+ "PublicDescription": "KIMD-SHAKE-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4176",
+ "EventName": "KIMD_SHAKE_256",
+ "BriefDescription": "KIMD SHAKE 256",
+ "PublicDescription": "KIMD-SHAKE-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4177",
+ "EventName": "KIMD_GHASH",
+ "BriefDescription": "KIMD GHASH",
+ "PublicDescription": "KIMD-GHASH function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4178",
+ "EventName": "KLMD_SHA_1",
+ "BriefDescription": "KLMD SHA 1",
+ "PublicDescription": "KLMD-SHA-1 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4179",
+ "EventName": "KLMD_SHA_256",
+ "BriefDescription": "KLMD SHA 256",
+ "PublicDescription": "KLMD-SHA-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4180",
+ "EventName": "KLMD_SHA_512",
+ "BriefDescription": "KLMD SHA 512",
+ "PublicDescription": "KLMD-SHA-512 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4181",
+ "EventName": "KLMD_SHA3_224",
+ "BriefDescription": "KLMD SHA3 224",
+ "PublicDescription": "KLMD-SHA3-224 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4182",
+ "EventName": "KLMD_SHA3_256",
+ "BriefDescription": "KLMD SHA3 256",
+ "PublicDescription": "KLMD-SHA3-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4183",
+ "EventName": "KLMD_SHA3_384",
+ "BriefDescription": "KLMD SHA3 384",
+ "PublicDescription": "KLMD-SHA3-384 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4184",
+ "EventName": "KLMD_SHA3_512",
+ "BriefDescription": "KLMD SHA3 512",
+ "PublicDescription": "KLMD-SHA3-512 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4185",
+ "EventName": "KLMD_SHAKE_128",
+ "BriefDescription": "KLMD SHAKE 128",
+ "PublicDescription": "KLMD-SHAKE-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4186",
+ "EventName": "KLMD_SHAKE_256",
+ "BriefDescription": "KLMD SHAKE 256",
+ "PublicDescription": "KLMD-SHAKE-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4187",
+ "EventName": "KMAC_DEA",
+ "BriefDescription": "KMAC DEA",
+ "PublicDescription": "KMAC-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4188",
+ "EventName": "KMAC_TDEA_128",
+ "BriefDescription": "KMAC TDEA 128",
+ "PublicDescription": "KMAC-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4189",
+ "EventName": "KMAC_TDEA_192",
+ "BriefDescription": "KMAC TDEA 192",
+ "PublicDescription": "KMAC-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4190",
+ "EventName": "KMAC_ENCRYPTED_DEA",
+ "BriefDescription": "KMAC ENCRYPTED DEA",
+ "PublicDescription": "KMAC-Encrypted-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4191",
+ "EventName": "KMAC_ENCRYPTED_TDEA_128",
+ "BriefDescription": "KMAC ENCRYPTED TDEA 128",
+ "PublicDescription": "KMAC-Encrypted-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4192",
+ "EventName": "KMAC_ENCRYPTED_TDEA_192",
+ "BriefDescription": "KMAC ENCRYPTED TDEA 192",
+ "PublicDescription": "KMAC-Encrypted-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4193",
+ "EventName": "KMAC_AES_128",
+ "BriefDescription": "KMAC AES 128",
+ "PublicDescription": "KMAC-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4194",
+ "EventName": "KMAC_AES_192",
+ "BriefDescription": "KMAC AES 192",
+ "PublicDescription": "KMAC-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4195",
+ "EventName": "KMAC_AES_256",
+ "BriefDescription": "KMAC AES 256",
+ "PublicDescription": "KMAC-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4196",
+ "EventName": "KMAC_ENCRYPTED_AES_128",
+ "BriefDescription": "KMAC ENCRYPTED AES 128",
+ "PublicDescription": "KMAC-Encrypted-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4197",
+ "EventName": "KMAC_ENCRYPTED_AES_192",
+ "BriefDescription": "KMAC ENCRYPTED AES 192",
+ "PublicDescription": "KMAC-Encrypted-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4198",
+ "EventName": "KMAC_ENCRYPTED_AES_256",
+ "BriefDescription": "KMAC ENCRYPTED AES 256",
+ "PublicDescription": "KMAC-Encrypted-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4199",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING DEA",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4200",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING TDEA 128",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-TDEA-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4201",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING TDEA 192",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-TDEA-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4202",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED DEA",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-DEA function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4203",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED TDEA 128",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-TDEA- 128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4204",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED TDEA 192",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-TDEA- 192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4205",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING AES 128",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4206",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING AES 192",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-AES-192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4207",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING AES 256",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4208",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 128",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES- 128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4209",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 192",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES- 192 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4210",
+ "EventName": "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A",
+ "BriefDescription": "PCC COMPUTE LAST BLOCK CMAC USING ENCRYPTED AES 256A",
+ "PublicDescription": "PCC-Compute-Last-Block-CMAC-Using-Encrypted-AES- 256A function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4211",
+ "EventName": "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
+ "BriefDescription": "PCC COMPUTE XTS PARAMETER USING AES 128",
+ "PublicDescription": "PCC-Compute-XTS-Parameter-Using-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4212",
+ "EventName": "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
+ "BriefDescription": "PCC COMPUTE XTS PARAMETER USING AES 256",
+ "PublicDescription": "PCC-Compute-XTS-Parameter-Using-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4213",
+ "EventName": "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
+ "BriefDescription": "PCC COMPUTE XTS PARAMETER USING ENCRYPTED AES 128",
+ "PublicDescription": "PCC-Compute-XTS-Parameter-Using-Encrypted-AES-128 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4214",
+ "EventName": "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256",
+ "BriefDescription": "PCC COMPUTE XTS PARAMETER USING ENCRYPTED AES 256",
+ "PublicDescription": "PCC-Compute-XTS-Parameter-Using-Encrypted-AES-256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4215",
+ "EventName": "PCC_SCALAR_MULTIPLY_P256",
+ "BriefDescription": "PCC SCALAR MULTIPLY P256",
+ "PublicDescription": "PCC-Scalar-Multiply-P256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4216",
+ "EventName": "PCC_SCALAR_MULTIPLY_P384",
+ "BriefDescription": "PCC SCALAR MULTIPLY P384",
+ "PublicDescription": "PCC-Scalar-Multiply-P384 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4217",
+ "EventName": "PCC_SCALAR_MULTIPLY_P521",
+ "BriefDescription": "PCC SCALAR MULTIPLY P521",
+ "PublicDescription": "PCC-Scalar-Multiply-P521 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4218",
+ "EventName": "PCC_SCALAR_MULTIPLY_ED25519",
+ "BriefDescription": "PCC SCALAR MULTIPLY ED25519",
+ "PublicDescription": "PCC-Scalar-Multiply-Ed25519 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4219",
+ "EventName": "PCC_SCALAR_MULTIPLY_ED448",
+ "BriefDescription": "PCC SCALAR MULTIPLY ED448",
+ "PublicDescription": "PCC-Scalar-Multiply-Ed448 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4220",
+ "EventName": "PCC_SCALAR_MULTIPLY_X25519",
+ "BriefDescription": "PCC SCALAR MULTIPLY X25519",
+ "PublicDescription": "PCC-Scalar-Multiply-X25519 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4221",
+ "EventName": "PCC_SCALAR_MULTIPLY_X448",
+ "BriefDescription": "PCC SCALAR MULTIPLY X448",
+ "PublicDescription": "PCC-Scalar-Multiply-X448 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4222",
+ "EventName": "PRNO_SHA_512_DRNG",
+ "BriefDescription": "PRNO SHA 512 DRNG",
+ "PublicDescription": "PRNO-SHA-512-DRNG function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4223",
+ "EventName": "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO",
+ "BriefDescription": "PRNO TRNG QUERY RAW TO CONDITIONED RATIO",
+ "PublicDescription": "PRNO-TRNG-Query-Raw-to-Conditioned-Ratio function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4224",
+ "EventName": "PRNO_TRNG",
+ "BriefDescription": "PRNO TRNG",
+ "PublicDescription": "PRNO-TRNG function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4225",
+ "EventName": "KDSA_ECDSA_VERIFY_P256",
+ "BriefDescription": "KDSA ECDSA VERIFY P256",
+ "PublicDescription": "KDSA-ECDSA-Verify-P256 function ending with CC=0 or CC=2"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4226",
+ "EventName": "KDSA_ECDSA_VERIFY_P384",
+ "BriefDescription": "KDSA ECDSA VERIFY P384",
+ "PublicDescription": "KDSA-ECDSA-Verify-P384 function ending with CC=0 or CC=2"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4227",
+ "EventName": "KDSA_ECDSA_VERIFY_P521",
+ "BriefDescription": "KDSA ECDSA VERIFY P521",
+ "PublicDescription": "KDSA-ECDSA-Verify-P521 function ending with CC=0 or CC=2"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4228",
+ "EventName": "KDSA_ECDSA_SIGN_P256",
+ "BriefDescription": "KDSA ECDSA SIGN P256",
+ "PublicDescription": "KDSA-ECDSA-Sign-P256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4229",
+ "EventName": "KDSA_ECDSA_SIGN_P384",
+ "BriefDescription": "KDSA ECDSA SIGN P384",
+ "PublicDescription": "KDSA-ECDSA-Sign-P384 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4230",
+ "EventName": "KDSA_ECDSA_SIGN_P521",
+ "BriefDescription": "KDSA ECDSA SIGN P521",
+ "PublicDescription": "KDSA-ECDSA-Sign-P521 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4231",
+ "EventName": "KDSA_ENCRYPTED_ECDSA_SIGN_P256",
+ "BriefDescription": "KDSA ENCRYPTED ECDSA SIGN P256",
+ "PublicDescription": "KDSA-Encrypted-ECDSA-Sign-P256 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4232",
+ "EventName": "KDSA_ENCRYPTED_ECDSA_SIGN_P384",
+ "BriefDescription": "KDSA ENCRYPTED ECDSA SIGN P384",
+ "PublicDescription": "KDSA-Encrypted-ECDSA-Sign-P384 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4233",
+ "EventName": "KDSA_ENCRYPTED_ECDSA_SIGN_P521",
+ "BriefDescription": "KDSA ENCRYPTED ECDSA SIGN P521",
+ "PublicDescription": "KDSA-Encrypted-ECDSA-Sign-P521 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4234",
+ "EventName": "KDSA_EDDSA_VERIFY_ED25519",
+ "BriefDescription": "KDSA EDDSA VERIFY ED25519",
+ "PublicDescription": "KDSA-EdDSA-Verify-Ed25519 function ending with CC=0 or CC=2"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4235",
+ "EventName": "KDSA_EDDSA_VERIFY_ED448",
+ "BriefDescription": "KDSA EDDSA VERIFY ED448",
+ "PublicDescription": "KDSA-EdDSA-Verify-Ed448 function ending with CC=0 or CC=2"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4236",
+ "EventName": "KDSA_EDDSA_SIGN_ED25519",
+ "BriefDescription": "KDSA EDDSA SIGN ED25519",
+ "PublicDescription": "KDSA-EdDSA-Sign-Ed25519 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4237",
+ "EventName": "KDSA_EDDSA_SIGN_ED448",
+ "BriefDescription": "KDSA EDDSA SIGN ED448",
+ "PublicDescription": "KDSA-EdDSA-Sign-Ed448 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4238",
+ "EventName": "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519",
+ "BriefDescription": "KDSA ENCRYPTED EDDSA SIGN ED25519",
+ "PublicDescription": "KDSA-Encrypted-EdDSA-Sign-Ed25519 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4239",
+ "EventName": "KDSA_ENCRYPTED_EDDSA_SIGN_ED448",
+ "BriefDescription": "KDSA ENCRYPTED EDDSA SIGN ED448",
+ "PublicDescription": "KDSA-Encrypted-EdDSA-Sign-Ed448 function ending with CC=0"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4240",
+ "EventName": "PCKMO_ENCRYPT_DEA_KEY",
+ "BriefDescription": "PCKMO ENCRYPT DEA KEY",
+ "PublicDescription": "PCKMO-Encrypt-DEA-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4241",
+ "EventName": "PCKMO_ENCRYPT_TDEA_128_KEY",
+ "BriefDescription": "PCKMO ENCRYPT TDEA 128 KEY",
+ "PublicDescription": "PCKMO-Encrypt-TDEA-128-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4242",
+ "EventName": "PCKMO_ENCRYPT_TDEA_192_KEY",
+ "BriefDescription": "PCKMO ENCRYPT TDEA 192 KEY",
+ "PublicDescription": "PCKMO-Encrypt-TDEA-192-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4243",
+ "EventName": "PCKMO_ENCRYPT_AES_128_KEY",
+ "BriefDescription": "PCKMO ENCRYPT AES 128 KEY",
+ "PublicDescription": "PCKMO-Encrypt-AES-128-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4244",
+ "EventName": "PCKMO_ENCRYPT_AES_192_KEY",
+ "BriefDescription": "PCKMO ENCRYPT AES 192 KEY",
+ "PublicDescription": "PCKMO-Encrypt-AES-192-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4245",
+ "EventName": "PCKMO_ENCRYPT_AES_256_KEY",
+ "BriefDescription": "PCKMO ENCRYPT AES 256 KEY",
+ "PublicDescription": "PCKMO-Encrypt-AES-256-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4246",
+ "EventName": "PCKMO_ENCRYPT_ECC_P256_KEY",
+ "BriefDescription": "PCKMO ENCRYPT ECC P256 KEY",
+ "PublicDescription": "PCKMO-Encrypt-ECC-P256-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4247",
+ "EventName": "PCKMO_ENCRYPT_ECC_P384_KEY",
+ "BriefDescription": "PCKMO ENCRYPT ECC P384 KEY",
+ "PublicDescription": "PCKMO-Encrypt-ECC-P384-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4248",
+ "EventName": "PCKMO_ENCRYPT_ECC_P521_KEY",
+ "BriefDescription": "PCKMO ENCRYPT ECC P521 KEY",
+ "PublicDescription": "PCKMO-Encrypt-ECC-P521-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4249",
+ "EventName": "PCKMO_ENCRYPT_ECC_ED25519_KEY",
+ "BriefDescription": "PCKMO ENCRYPT ECC ED25519 KEY",
+ "PublicDescription": "PCKMO-Encrypt-ECC-Ed25519-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4250",
+ "EventName": "PCKMO_ENCRYPT_ECC_ED448_KEY",
+ "BriefDescription": "PCKMO ENCRYPT ECC ED448 KEY",
+ "PublicDescription": "PCKMO-Encrypt-ECC-Ed448-key function"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4251",
+ "EventName": "IBM_RESERVED_155",
+ "BriefDescription": "IBM RESERVED_155",
+ "PublicDescription": "Reserved for IBM use"
+ },
+ {
+ "Unit": "PAI-CRYPTO",
+ "EventCode": "4252",
+ "EventName": "IBM_RESERVED_156",
+ "BriefDescription": "IBM RESERVED_156",
+ "PublicDescription": "Reserved for IBM use"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/test/test_soc/cpu/metrics.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/metrics.json
new file mode 100644
index 000000000000..42d9b5242fd7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/metrics.json
@@ -0,0 +1,64 @@
+[
+ {
+ "MetricExpr": "1 / IPC",
+ "MetricName": "CPI"
+ },
+ {
+ "MetricExpr": "inst_retired.any / cpu_clk_unhalted.thread",
+ "MetricName": "IPC",
+ "MetricGroup": "group1"
+ },
+ {
+ "MetricExpr": "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * ( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
+ "MetricName": "Frontend_Bound_SMT"
+ },
+ {
+ "MetricExpr": "l1d\\-loads\\-misses / inst_retired.any",
+ "MetricName": "dcache_miss_cpi"
+ },
+ {
+ "MetricExpr": "l1i\\-loads\\-misses / inst_retired.any",
+ "MetricName": "icache_miss_cycles"
+ },
+ {
+ "MetricExpr": "(dcache_miss_cpi + icache_miss_cycles)",
+ "MetricName": "cache_miss_cycles",
+ "MetricGroup": "group1"
+ },
+ {
+ "MetricExpr": "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
+ "MetricName": "DCache_L2_All_Hits"
+ },
+ {
+ "MetricExpr": "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
+ "MetricName": "DCache_L2_All_Miss"
+ },
+ {
+ "MetricExpr": "dcache_l2_all_hits + dcache_l2_all_miss",
+ "MetricName": "DCache_L2_All"
+ },
+ {
+ "MetricExpr": "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
+ "MetricName": "DCache_L2_Hits"
+ },
+ {
+ "MetricExpr": "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
+ "MetricName": "DCache_L2_Misses"
+ },
+ {
+ "MetricExpr": "ipc + M2",
+ "MetricName": "M1"
+ },
+ {
+ "MetricExpr": "ipc + M1",
+ "MetricName": "M2"
+ },
+ {
+ "MetricExpr": "1/M3",
+ "MetricName": "M3"
+ },
+ {
+ "MetricExpr": "64 * l1d.replacement / 1000000000 / duration_time",
+ "MetricName": "L1D_Cache_Fill_BW"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index f8bdf7812b51..095dd8c7f161 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -592,13 +592,13 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+ "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricName": "IpBranch",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)",
+ "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
"MetricName": "IpCall",
"Unit": "cpu_atom"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
index b83ed129c454..887dce4dfeba 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
@@ -1,45 +1,49 @@
[
{
- "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or tlb miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x34",
"EventName": "MEM_BOUND_STALLS.IFETCH",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x38",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or tlb miss which hit in DRAM or MMIO (Non-DRAM).",
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x34",
"EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or tlb miss which hit in the L2 cache.",
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x34",
"EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or tlb miss which hit in the last level cache or other core with HITE/F/M.",
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x34",
"EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_atom"
},
@@ -51,6 +55,7 @@
"EventName": "MEM_BOUND_STALLS.LOAD",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x7",
"Unit": "cpu_atom"
},
@@ -62,6 +67,7 @@
"EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_atom"
},
@@ -73,6 +79,7 @@
"EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_atom"
},
@@ -84,11 +91,12 @@
"EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of load ops retired that hit in DRAM.",
+ "BriefDescription": "Counts the number of load uops retired that hit in DRAM.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"Data_LA": "1",
@@ -101,7 +109,7 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of load ops retired that hit in the L2 cache.",
+ "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"Data_LA": "1",
@@ -114,9 +122,10 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of load ops retired that hit in the L3 cache.",
+ "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
+ "Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
"PEBS": "1",
@@ -133,6 +142,7 @@
"EventName": "MEM_SCHEDULER_BLOCK.ALL",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x7",
"Unit": "cpu_atom"
},
@@ -144,6 +154,7 @@
"EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -155,6 +166,7 @@
"EventName": "MEM_SCHEDULER_BLOCK.RSV",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_atom"
},
@@ -166,6 +178,7 @@
"EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_atom"
},
@@ -202,6 +215,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
+ "L1_Hit_Indication": "1",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
"PEBS": "2",
@@ -218,6 +232,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
+ "L1_Hit_Indication": "1",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
"PEBS": "2",
@@ -234,6 +249,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
+ "L1_Hit_Indication": "1",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
"PEBS": "2",
@@ -250,6 +266,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
+ "L1_Hit_Indication": "1",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
"PEBS": "2",
@@ -266,6 +283,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
+ "L1_Hit_Indication": "1",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
"PEBS": "2",
@@ -282,6 +300,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
+ "L1_Hit_Indication": "1",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
"PEBS": "2",
@@ -298,6 +317,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
+ "L1_Hit_Indication": "1",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
"PEBS": "2",
@@ -314,6 +334,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
+ "L1_Hit_Indication": "1",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
"PEBS": "2",
@@ -324,7 +345,7 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts all the retired split loads.",
+ "BriefDescription": "Counts the number of retired split load uops.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"Data_LA": "1",
@@ -338,11 +359,13 @@
},
{
"BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.",
- "CollectPEBSRecord": "2",
+ "CollectPEBSRecord": "3",
"Counter": "0,1,2,3,4,5",
+ "Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.STORE_LATENCY",
- "PEBS": "1",
+ "L1_Hit_Indication": "1",
+ "PEBS": "2",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
"UMask": "0x6",
@@ -350,7 +373,7 @@
},
{
"BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -367,10 +390,23 @@
"EventName": "TOPDOWN_FE_BOUND.ICACHE",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "L1D.HWPF_MISS",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x51",
+ "EventName": "L1D.HWPF_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of cache lines replaced in L1 data cache.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
@@ -378,6 +414,7 @@
"EventName": "L1D.REPLACEMENT",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -389,6 +426,7 @@
"EventName": "L1D_PEND_MISS.FB_FULL",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -402,6 +440,7 @@
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -413,6 +452,7 @@
"EventName": "L1D_PEND_MISS.L2_STALL",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -424,6 +464,7 @@
"EventName": "L1D_PEND_MISS.L2_STALLS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -435,6 +476,7 @@
"EventName": "L1D_PEND_MISS.PENDING",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -447,6 +489,7 @@
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -458,17 +501,31 @@
"EventName": "L2_LINES_IN.ALL",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x1f",
"Unit": "cpu_core"
},
{
- "BriefDescription": "All L2 requests.[This event is alias to L2_RQSTS.REFERENCES]",
+ "BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x26",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "All accesses to L2 cache[This event is alias to L2_RQSTS.REFERENCES]",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xff",
"Unit": "cpu_core"
},
@@ -480,6 +537,7 @@
"EventName": "L2_REQUEST.MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x3f",
"Unit": "cpu_core"
},
@@ -491,17 +549,19 @@
"EventName": "L2_RQSTS.ALL_CODE_RD",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xe4",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Demand Data Read requests",
+ "BriefDescription": "Demand Data Read access L2 cache",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xe1",
"Unit": "cpu_core"
},
@@ -513,10 +573,23 @@
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x27",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "L2_RQSTS.ALL_HWPF",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.ALL_HWPF",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xf0",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "RFO requests to L2 cache.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
@@ -524,6 +597,7 @@
"EventName": "L2_RQSTS.ALL_RFO",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xe2",
"Unit": "cpu_core"
},
@@ -535,6 +609,7 @@
"EventName": "L2_RQSTS.CODE_RD_HIT",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xc4",
"Unit": "cpu_core"
},
@@ -546,6 +621,7 @@
"EventName": "L2_RQSTS.CODE_RD_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x24",
"Unit": "cpu_core"
},
@@ -557,21 +633,35 @@
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xc1",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "BriefDescription": "Demand Data Read miss L2 cache",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x21",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "L2_RQSTS.HWPF_MISS",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "L2_RQSTS.HWPF_MISS",
+ "PEBScounters": "0,1,2,3",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0x30",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_REQUEST.MISS]",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
@@ -579,17 +669,19 @@
"EventName": "L2_RQSTS.MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x3f",
"Unit": "cpu_core"
},
{
- "BriefDescription": "All L2 requests.[This event is alias to L2_REQUEST.ALL]",
+ "BriefDescription": "All accesses to L2 cache[This event is alias to L2_REQUEST.ALL]",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xff",
"Unit": "cpu_core"
},
@@ -601,6 +693,7 @@
"EventName": "L2_RQSTS.RFO_HIT",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xc2",
"Unit": "cpu_core"
},
@@ -612,6 +705,7 @@
"EventName": "L2_RQSTS.RFO_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x22",
"Unit": "cpu_core"
},
@@ -623,6 +717,7 @@
"EventName": "L2_RQSTS.SWPF_HIT",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xc8",
"Unit": "cpu_core"
},
@@ -634,22 +729,36 @@
"EventName": "L2_RQSTS.SWPF_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x28",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x41",
"Unit": "cpu_core"
},
{
- "BriefDescription": "All retired load instructions.",
+ "BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x4f",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired load instructions.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"Data_LA": "1",
@@ -662,7 +771,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "All retired store instructions.",
+ "BriefDescription": "Retired store instructions.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"Data_LA": "1",
@@ -764,6 +873,7 @@
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0xfd",
"Unit": "cpu_core"
},
@@ -961,7 +1071,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x44",
@@ -983,7 +1093,7 @@
},
{
"BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -993,8 +1103,8 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
- "Counter": "0,1,2,3",
+ "BriefDescription": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -1005,7 +1115,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1015,13 +1125,14 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "OFFCORE_REQUESTS.ALL_REQUESTS",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x80",
"Unit": "cpu_core"
},
@@ -1033,6 +1144,7 @@
"EventName": "OFFCORE_REQUESTS.DATA_RD",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -1044,6 +1156,7 @@
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1051,22 +1164,26 @@
"BriefDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
+ "Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"CounterMask": "1",
+ "Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -1079,17 +1196,20 @@
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
+ "Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -1101,6 +1221,7 @@
"EventName": "SW_PREFETCH_ACCESS.NTA",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1112,6 +1233,7 @@
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -1123,6 +1245,7 @@
"EventName": "SW_PREFETCH_ACCESS.T0",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -1134,7 +1257,8 @@
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
index 310c2a8f3e6b..48a4605fc057 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
@@ -7,6 +7,7 @@
"EventName": "MACHINE_CLEARS.FP_ASSIST",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_atom"
},
@@ -23,7 +24,7 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "ARITH.FPDIV_ACTIVE",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
@@ -31,6 +32,7 @@
"EventName": "ARITH.FPDIV_ACTIVE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -42,50 +44,55 @@
"EventName": "ASSISTS.FP",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "ASSISTS.SSE_AVX_MIX",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -155,4 +162,4 @@
"UMask": "0x2",
"Unit": "cpu_core"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
index 908588f63314..2cfa70b2d5e1 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
@@ -7,6 +7,7 @@
"EventName": "BACLEARS.ANY",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_atom"
},
@@ -18,6 +19,7 @@
"EventName": "ICACHE.ACCESSES",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x3",
"Unit": "cpu_atom"
},
@@ -29,6 +31,7 @@
"EventName": "ICACHE.MISSES",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -40,6 +43,7 @@
"EventName": "DECODE.LCP",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "500009",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -51,6 +55,7 @@
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -295,6 +300,21 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "FRONTEND_RETIRED.MS_FLOWS",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc6",
+ "EventName": "FRONTEND_RETIRED.MS_FLOWS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x8",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
@@ -310,7 +330,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
@@ -332,6 +352,7 @@
"EventName": "ICACHE_DATA.STALLS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "500009",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -343,6 +364,7 @@
"EventName": "ICACHE_TAG.STALLS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -355,6 +377,7 @@
"EventName": "IDQ.DSB_CYCLES_ANY",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -367,6 +390,7 @@
"EventName": "IDQ.DSB_CYCLES_OK",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -378,6 +402,7 @@
"EventName": "IDQ.DSB_UOPS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -390,6 +415,7 @@
"EventName": "IDQ.MITE_CYCLES_ANY",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -402,6 +428,7 @@
"EventName": "IDQ.MITE_CYCLES_OK",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -413,6 +440,7 @@
"EventName": "IDQ.MITE_UOPS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -425,6 +453,7 @@
"EventName": "IDQ.MS_CYCLES_ANY",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
@@ -438,6 +467,7 @@
"EventName": "IDQ.MS_SWITCHES",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
@@ -449,6 +479,7 @@
"EventName": "IDQ.MS_UOPS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
@@ -460,6 +491,7 @@
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -472,6 +504,7 @@
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -485,7 +518,8 @@
"Invert": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/memory.json b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
index 1d4d1ebe2a74..586fb961e46d 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
@@ -1,52 +1,61 @@
[
{
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
+ "CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.ANY_AT_RET",
+ "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0xff",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.",
- "Counter": "0,1,2,3",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.L1_BOUND_AT_RET",
+ "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0xf4",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to other block cases when load subsequently retires when load subsequently retires.",
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.OTHER_AT_RET",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0xc0",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a pagewalk when load subsequently retires.",
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.PGWALK_AT_RET",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0xa0",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a store address match when load subsequently retires.",
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.ST_ADDR_AT_RET",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x84",
"Unit": "cpu_atom"
},
@@ -58,12 +67,13 @@
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -74,7 +84,7 @@
},
{
"BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -92,6 +102,7 @@
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x6",
"Unit": "cpu_core"
},
@@ -103,6 +114,7 @@
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -115,6 +127,7 @@
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -127,11 +140,12 @@
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x3",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "MEMORY_ACTIVITY.STALLS_L2_MISS",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"CounterMask": "5",
@@ -139,11 +153,12 @@
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x5",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "MEMORY_ACTIVITY.STALLS_L3_MISS",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"CounterMask": "9",
@@ -151,6 +166,7 @@
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x9",
"Unit": "cpu_core"
},
@@ -283,7 +299,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Retired instructions with at least 1 store uop. This PEBS event is the trigger for stores sampled by the PEBS Store Facility.",
+ "BriefDescription": "Retired memory store access operations. A PDist event for PEBS Store Latency Facility.",
"CollectPEBSRecord": "2",
"Data_LA": "1",
"EventCode": "0xcd",
@@ -295,7 +311,7 @@
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -306,7 +322,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -315,4 +331,4 @@
"UMask": "0x1",
"Unit": "cpu_core"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/other.json b/tools/perf/pmu-events/arch/x86/alderlake/other.json
index dc810f093fb0..67a9c13cc71d 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/other.json
@@ -1,7 +1,7 @@
[
{
"BriefDescription": "Counts demand data reads that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -12,7 +12,7 @@
},
{
"BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -23,7 +23,7 @@
},
{
"BriefDescription": "Counts streaming stores that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -33,74 +33,68 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc1",
- "EventName": "ASSISTS.ANY",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "100003",
- "UMask": "0x1f",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Count all other microcode assist beyond FP, AVX_TILE_MIX and A/D assists (counted by their own sub-events). This includes assists at uop writeback like AVX* load/store (non-FP) assists, Null Assist in SNC (due to lack of FP precision format convert with FMA3x3 uarch) or assists generated by ROB (like assists to due to Missprediction for FSW register - fixed in SNC)",
+ "BriefDescription": "ASSISTS.HARDWARE",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.HARDWARE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "ASSISTS.PAGE_FAULT",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "CORE_POWER.LICENSE_1",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_1",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "CORE_POWER.LICENSE_2",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_2",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "CORE_POWER.LICENSE_3",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_3",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts demand data reads that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -111,7 +105,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -122,7 +116,7 @@
},
{
"BriefDescription": "Counts streaming stores that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -132,7 +126,61 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa5",
+ "EventName": "RS.EMPTY",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xa5",
+ "EventName": "RS.EMPTY_COUNT",
+ "Invert": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EdgeDetect": "1",
+ "EventCode": "0xa5",
+ "EventName": "RS_EMPTY.COUNT",
+ "Invert": "1",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa5",
+ "EventName": "RS_EMPTY.CYCLES",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x7",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "XQ.FULL_CYCLES",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"CounterMask": "1",
@@ -140,7 +188,8 @@
"EventName": "XQ.FULL_CYCLES",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
index de2c6e0ef654..d02e078a90c9 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
@@ -23,7 +23,31 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and Interrupt call and return.",
+ "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7e",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfe",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
@@ -35,6 +59,54 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xeb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT_CALL",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.IND_CALL",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.JCC",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7e",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of near CALL branch instructions retired.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
@@ -47,6 +119,66 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of near RET branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xeb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of near relative CALL branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.REL_CALL",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfd",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.RETURN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
@@ -58,12 +190,121 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7e",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfe",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xeb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.IND_CALL",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.JCC",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7e",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xeb",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.RETURN",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
"CollectPEBSRecord": "2",
- "Counter": "33",
+ "Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.CORE",
"PEBScounters": "33",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -75,25 +316,28 @@
"EventName": "CPU_CLK_UNHALTED.CORE_P",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)",
"CollectPEBSRecord": "2",
- "Counter": "34",
+ "Counter": "Fixed counter 2",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
"PEBScounters": "34",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x3",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
"CollectPEBSRecord": "2",
- "Counter": "33",
+ "Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.THREAD",
"PEBScounters": "33",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -105,12 +349,13 @@
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of instructions retired. (Fixed event)",
+ "BriefDescription": "Counts the total number of instructions retired. (Fixed event)",
"CollectPEBSRecord": "2",
- "Counter": "32",
+ "Counter": "Fixed counter 0",
"EventName": "INST_RETIRED.ANY",
"PEBS": "1",
"PEBScounters": "32",
@@ -119,6 +364,17 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the total number of instructions retired.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "PEBS": "1",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "2000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
@@ -162,6 +418,7 @@
"EventName": "MACHINE_CLEARS.DISAMBIGUATION",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_atom"
},
@@ -173,6 +430,7 @@
"EventName": "MACHINE_CLEARS.MRN_NUKE",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x80",
"Unit": "cpu_atom"
},
@@ -182,9 +440,9 @@
"Counter": "0,1,2,3,4,5",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.PAGE_FAULT",
- "PEBS": "1",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_atom"
},
@@ -196,6 +454,7 @@
"EventName": "MACHINE_CLEARS.SLOW",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x6f",
"Unit": "cpu_atom"
},
@@ -207,17 +466,19 @@
"EventName": "MACHINE_CLEARS.SMC",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of issue slots not consumed due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing uops from the UROM until a specified older uop retires.",
+ "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x75",
"EventName": "SERIALIZATION.NON_C01_MS_SCB",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -229,6 +490,7 @@
"EventName": "TOPDOWN_BAD_SPECULATION.ALL",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"Unit": "cpu_atom"
},
{
@@ -239,6 +501,7 @@
"EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -250,6 +513,7 @@
"EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x3",
"Unit": "cpu_atom"
},
@@ -261,6 +525,7 @@
"EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_atom"
},
@@ -272,6 +537,7 @@
"EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_atom"
},
@@ -283,6 +549,7 @@
"EventName": "TOPDOWN_BE_BOUND.ALL",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"Unit": "cpu_atom"
},
{
@@ -293,6 +560,7 @@
"EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_atom"
},
@@ -304,6 +572,7 @@
"EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -315,6 +584,7 @@
"EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_atom"
},
@@ -326,6 +596,7 @@
"EventName": "TOPDOWN_BE_BOUND.REGISTER",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_atom"
},
@@ -337,6 +608,7 @@
"EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_atom"
},
@@ -348,6 +620,7 @@
"EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_atom"
},
@@ -359,6 +632,7 @@
"EventName": "TOPDOWN_FE_BOUND.ALL",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"Unit": "cpu_atom"
},
{
@@ -369,6 +643,7 @@
"EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
@@ -380,6 +655,7 @@
"EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_atom"
},
@@ -391,6 +667,7 @@
"EventName": "TOPDOWN_FE_BOUND.CISC",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_atom"
},
@@ -402,6 +679,7 @@
"EventName": "TOPDOWN_FE_BOUND.DECODE",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_atom"
},
@@ -413,17 +691,19 @@
"EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x8d",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x71",
"EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x72",
"Unit": "cpu_atom"
},
@@ -435,6 +715,7 @@
"EventName": "TOPDOWN_FE_BOUND.ITLB",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_atom"
},
@@ -446,6 +727,7 @@
"EventName": "TOPDOWN_FE_BOUND.OTHER",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x80",
"Unit": "cpu_atom"
},
@@ -457,6 +739,7 @@
"EventName": "TOPDOWN_FE_BOUND.PREDECODE",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_atom"
},
@@ -527,6 +810,7 @@
"EventName": "ARITH.DIVIDER_ACTIVE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x9",
"Unit": "cpu_core"
},
@@ -539,6 +823,7 @@
"EventName": "ARITH.DIV_ACTIVE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x9",
"Unit": "cpu_core"
},
@@ -551,11 +836,24 @@
"EventName": "ARITH.FP_DIVIDER_ACTIVE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "This event counts the cycles the integer divider is busy.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.IDIV_ACTIVE",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
@@ -563,10 +861,23 @@
"EventName": "ARITH.INT_DIVIDER_ACTIVE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.ANY",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "100003",
+ "Speculative": "1",
+ "UMask": "0x1f",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "All branch instructions retired.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
@@ -709,7 +1020,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "number of branch instructions retired that were mispredicted and taken. Non PEBS",
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc5",
@@ -757,6 +1068,42 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.C01",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.C02",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xec",
+ "EventName": "CPU_CLK_UNHALTED.C0_WAIT",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x70",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
@@ -764,6 +1111,7 @@
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -775,22 +1123,24 @@
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "25003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "CPU_CLK_UNHALTED.PAUSE",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST",
"Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EdgeDetect": "1",
@@ -798,6 +1148,7 @@
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_core"
},
@@ -808,26 +1159,41 @@
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Reference cycles when the core is not in halt state.",
"CollectPEBSRecord": "2",
- "Counter": "34",
+ "Counter": "Fixed counter 2",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
"PEBScounters": "34",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x3",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Core cycles when the thread is not in halt state",
"CollectPEBSRecord": "2",
- "Counter": "33",
+ "Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.THREAD",
"PEBScounters": "33",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -839,6 +1205,7 @@
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"Unit": "cpu_core"
},
{
@@ -850,6 +1217,7 @@
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -862,6 +1230,7 @@
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -874,6 +1243,7 @@
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
@@ -886,6 +1256,7 @@
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0xc",
"Unit": "cpu_core"
},
@@ -898,6 +1269,7 @@
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x5",
"Unit": "cpu_core"
},
@@ -910,6 +1282,7 @@
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -921,6 +1294,7 @@
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -932,6 +1306,7 @@
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -943,6 +1318,7 @@
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -954,6 +1330,7 @@
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
@@ -966,6 +1343,7 @@
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x21",
"Unit": "cpu_core"
},
@@ -978,10 +1356,23 @@
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cycles no uop executed while RS was not empty, the SB was not full and there was no outstanding load.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xa6",
+ "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Instruction decoders utilized in a cycle",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
@@ -989,13 +1380,14 @@
"EventName": "INST_DECODED.DECODERS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of instructions retired. Fixed Counter - architectural event",
"CollectPEBSRecord": "2",
- "Counter": "32",
+ "Counter": "Fixed counter 0",
"EventName": "INST_RETIRED.ANY",
"PEBS": "1",
"PEBScounters": "32",
@@ -1015,7 +1407,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "INST_RETIRED.MACRO_FUSED",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
@@ -1026,7 +1418,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Number of all retired NOP instructions.",
+ "BriefDescription": "Retired NOP instructions.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
@@ -1039,7 +1431,7 @@
{
"BriefDescription": "Precise instruction retired with PEBS precise-distribution",
"CollectPEBSRecord": "2",
- "Counter": "32",
+ "Counter": "Fixed counter 0",
"EventName": "INST_RETIRED.PREC_DIST",
"PEBS": "1",
"PEBScounters": "32",
@@ -1048,7 +1440,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "INST_RETIRED.REP_ITERATION",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
@@ -1066,6 +1458,7 @@
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "500009",
+ "Speculative": "1",
"UMask": "0x80",
"Unit": "cpu_core"
},
@@ -1077,11 +1470,12 @@
"EventName": "INT_MISC.RECOVERY_CYCLES",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "500009",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
@@ -1090,6 +1484,7 @@
"MSRValue": "0x7",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"TakenAlone": "1",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -1102,11 +1497,12 @@
"EventName": "INT_MISC.UOP_DROPPING",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "INT_VEC_RETIRED.128BIT",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
@@ -1117,7 +1513,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "INT_VEC_RETIRED.256BIT",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
@@ -1150,7 +1546,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "INT_VEC_RETIRED.MUL_256",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
@@ -1161,7 +1557,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "INT_VEC_RETIRED.SHUFFLES",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
@@ -1172,7 +1568,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "INT_VEC_RETIRED.VNNI_128",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
@@ -1183,7 +1579,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "INT_VEC_RETIRED.VNNI_256",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
@@ -1201,6 +1597,7 @@
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -1212,6 +1609,7 @@
"EventName": "LD_BLOCKS.NO_SR",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x88",
"Unit": "cpu_core"
},
@@ -1223,6 +1621,7 @@
"EventName": "LD_BLOCKS.STORE_FORWARD",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x82",
"Unit": "cpu_core"
},
@@ -1234,6 +1633,7 @@
"EventName": "LOAD_HIT_PREFETCH.SWPF",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1244,8 +1644,9 @@
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PEBScounters": "0,1,2,3",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1256,8 +1657,9 @@
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PEBScounters": "0,1,2,3",
+ "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1269,6 +1671,7 @@
"EventName": "LSD.UOPS",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1282,6 +1685,7 @@
"EventName": "MACHINE_CLEARS.COUNT",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1293,17 +1697,19 @@
"EventName": "MACHINE_CLEARS.SMC",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "MISC2_RETIRED.LFENCE",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "400009",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
@@ -1326,6 +1732,7 @@
"EventName": "RESOURCE_STALLS.SB",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -1337,6 +1744,7 @@
"EventName": "RESOURCE_STALLS.SCOREBOARD",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -1348,6 +1756,7 @@
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "10000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -1357,6 +1766,7 @@
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
"SampleAfterValue": "10000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -1366,27 +1776,30 @@
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
"SampleAfterValue": "10000003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "TOPDOWN.MEMORY_BOUND_SLOTS",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "10000003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
"CollectPEBSRecord": "2",
- "Counter": "35",
+ "Counter": "Fixed counter 3",
"EventName": "TOPDOWN.SLOTS",
"PEBScounters": "35",
"SampleAfterValue": "10000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -1398,17 +1811,19 @@
"EventName": "TOPDOWN.SLOTS_P",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "10000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "UOPS_DECODED.DEC0_UOPS",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1420,6 +1835,7 @@
"EventName": "UOPS_DISPATCHED.PORT_0",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1431,6 +1847,7 @@
"EventName": "UOPS_DISPATCHED.PORT_1",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -1442,6 +1859,7 @@
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -1453,6 +1871,7 @@
"EventName": "UOPS_DISPATCHED.PORT_4_9",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
@@ -1464,6 +1883,7 @@
"EventName": "UOPS_DISPATCHED.PORT_5_11",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
@@ -1475,6 +1895,7 @@
"EventName": "UOPS_DISPATCHED.PORT_6",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_core"
},
@@ -1486,6 +1907,7 @@
"EventName": "UOPS_DISPATCHED.PORT_7_8",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x80",
"Unit": "cpu_core"
},
@@ -1498,6 +1920,7 @@
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -1510,6 +1933,7 @@
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -1522,6 +1946,7 @@
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -1534,6 +1959,7 @@
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -1546,6 +1972,7 @@
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1558,6 +1985,7 @@
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1570,6 +1998,7 @@
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1582,6 +2011,7 @@
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1595,6 +2025,7 @@
"Invert": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1608,6 +2039,7 @@
"Invert": "1",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1619,6 +2051,7 @@
"EventName": "UOPS_EXECUTED.THREAD",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1630,6 +2063,7 @@
"EventName": "UOPS_EXECUTED.X87",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
@@ -1641,6 +2075,7 @@
"EventName": "UOPS_ISSUED.ANY",
"PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
@@ -1657,7 +2092,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "Retired uops except the last uop of each instruction.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
@@ -1668,7 +2103,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "TBD",
+ "BriefDescription": "UOPS_RETIRED.MS",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
@@ -1718,4 +2153,4 @@
"UMask": "0x2",
"Unit": "cpu_core"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json
index 50de82c29944..b1ae349f5f21 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json
@@ -3,7 +3,7 @@
"BriefDescription": "This 48-bit fixed counter counts the UCLK cycles",
"Counter": "Fixed",
"CounterType": "PGMABLE",
- "EventCode": "0xff",
+ "EventCode": "0xff",
"EventName": "UNC_CLOCK.SOCKET",
"PerPkg": "1",
"Unit": "CLOCK"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
index 1cc39aa032e1..12baf768ad8d 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
@@ -7,6 +7,7 @@
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
+ "Speculative": "1",
"UMask": "0xe",
"Unit": "cpu_atom"
},
@@ -18,17 +19,55 @@
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "2000003",
+ "Speculative": "1",
"UMask": "0xe",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a DTLB miss when load subsequently retires.",
+ "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "1000003",
+ "Speculative": "1",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "2000003",
+ "Speculative": "1",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
+ "CollectPEBSRecord": "2",
+ "Counter": "0,1,2,3,4,5",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "PEBScounters": "0,1,2,3,4,5",
+ "SampleAfterValue": "200003",
+ "Speculative": "1",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
"CollectPEBSRecord": "2",
"Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.DTLB_MISS_AT_RET",
"PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
+ "Speculative": "1",
"UMask": "0x90",
"Unit": "cpu_atom"
},
@@ -40,6 +79,7 @@
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
@@ -52,6 +92,7 @@
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
@@ -63,6 +104,7 @@
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0xe",
"Unit": "cpu_core"
},
@@ -74,6 +116,7 @@
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -85,6 +128,7 @@
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -96,6 +140,7 @@
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -107,6 +152,7 @@
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
@@ -118,6 +164,7 @@
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
@@ -130,6 +177,7 @@
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
@@ -141,6 +189,7 @@
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0xe",
"Unit": "cpu_core"
},
@@ -152,6 +201,7 @@
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
@@ -163,6 +213,7 @@
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -174,6 +225,7 @@
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -185,6 +237,7 @@
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
@@ -196,6 +249,7 @@
"EventName": "ITLB_MISSES.STLB_HIT",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
@@ -208,6 +262,7 @@
"EventName": "ITLB_MISSES.WALK_ACTIVE",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
@@ -219,6 +274,7 @@
"EventName": "ITLB_MISSES.WALK_COMPLETED",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0xe",
"Unit": "cpu_core"
},
@@ -230,6 +286,7 @@
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
@@ -241,6 +298,7 @@
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
@@ -252,7 +310,8 @@
"EventName": "ITLB_MISSES.WALK_PENDING",
"PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
+ "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/cache.json b/tools/perf/pmu-events/arch/x86/bonnell/cache.json
index 71653bfe7093..86582bb8aa39 100644
--- a/tools/perf/pmu-events/arch/x86/bonnell/cache.json
+++ b/tools/perf/pmu-events/arch/x86/bonnell/cache.json
@@ -743,4 +743,4 @@
"SampleAfterValue": "10000",
"UMask": "0x2"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/floating-point.json b/tools/perf/pmu-events/arch/x86/bonnell/floating-point.json
index f8055ff47f19..1fa347d07c98 100644
--- a/tools/perf/pmu-events/arch/x86/bonnell/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/bonnell/floating-point.json
@@ -258,4 +258,4 @@
"SampleAfterValue": "2000000",
"UMask": "0x2"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/frontend.json b/tools/perf/pmu-events/arch/x86/bonnell/frontend.json
index e852eb2cc878..21fe5fe229aa 100644
--- a/tools/perf/pmu-events/arch/x86/bonnell/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/bonnell/frontend.json
@@ -88,4 +88,4 @@
"SampleAfterValue": "2000000",
"UMask": "0x1"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/memory.json b/tools/perf/pmu-events/arch/x86/bonnell/memory.json
index 2aa4c41f528e..f8b45b6fb4d3 100644
--- a/tools/perf/pmu-events/arch/x86/bonnell/memory.json
+++ b/tools/perf/pmu-events/arch/x86/bonnell/memory.json
@@ -151,4 +151,4 @@
"SampleAfterValue": "200000",
"UMask": "0x86"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/other.json b/tools/perf/pmu-events/arch/x86/bonnell/other.json
index 114c062e7e96..e0bdcfbfa9dc 100644
--- a/tools/perf/pmu-events/arch/x86/bonnell/other.json
+++ b/tools/perf/pmu-events/arch/x86/bonnell/other.json
@@ -447,4 +447,4 @@
"SampleAfterValue": "200000",
"UMask": "0xc0"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json b/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json
index 896b738e59b6..f5123c99a7ba 100644
--- a/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json
@@ -353,4 +353,4 @@
"SampleAfterValue": "2000000",
"UMask": "0x10"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/virtual-memory.json b/tools/perf/pmu-events/arch/x86/bonnell/virtual-memory.json
index c2363b8e61b4..e8512c585572 100644
--- a/tools/perf/pmu-events/arch/x86/bonnell/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/bonnell/virtual-memory.json
@@ -121,4 +121,4 @@
"SampleAfterValue": "200000",
"UMask": "0x3"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
index 91d23341eabd..d65afe3d0b06 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -130,44 +130,26 @@
"MetricName": "FLOPc_SMT"
},
{
- "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)",
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
"MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "FP_Arith_Utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting."
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). SMT version; use when SMT is enabled and measuring per logical CPU.",
"MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
"MetricGroup": "Cor;Flops;HPC_SMT",
"MetricName": "FP_Arith_Utilization_SMT",
- "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting. SMT version; use when SMT is enabled and measuring per logical CPU."
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common). SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
"MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
"MetricName": "ILP"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (BR_MISP_RETIRED.ALL_BRANCHES * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / CPU_CLK_UNHALTED.THREAD) / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY )) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "Branch_Misprediction_Cost"
- },
- {
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (BR_MISP_RETIRED.ALL_BRANCHES * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / CPU_CLK_UNHALTED.THREAD) / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY )) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts_SMT",
- "MetricName": "Branch_Misprediction_Cost_SMT"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "IpMispredict"
- },
- {
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
"MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "SMT",
@@ -257,41 +239,52 @@
"MetricName": "Instructions"
},
{
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "Retire"
+ },
+ {
+ "BriefDescription": "",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "Execute"
+ },
+ {
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
"MetricGroup": "DSB;Fed;FetchBW",
"MetricName": "DSB_Coverage"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "Load_Miss_Real_Latency",
- "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "IpMispredict"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBound;MemoryBW",
- "MetricName": "MLP"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (BR_MISP_RETIRED.ALL_BRANCHES * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / CPU_CLK_UNHALTED.THREAD) / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY )) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "Branch_Misprediction_Cost"
},
{
- "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "L1D_Cache_Fill_BW"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (BR_MISP_RETIRED.ALL_BRANCHES * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / CPU_CLK_UNHALTED.THREAD) / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY )) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts_SMT",
+ "MetricName": "Branch_Misprediction_Cost_SMT"
},
{
- "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "L2_Cache_Fill_BW"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "Load_Miss_Real_Latency"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "L3_Cache_Fill_BW"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBound;MemoryBW",
+ "MetricName": "MLP"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
@@ -306,13 +299,13 @@
"MetricName": "L2MPKI"
},
{
- "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+ "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "Mem;CacheMisses;Offcore",
"MetricName": "L2MPKI_All"
},
{
- "BriefDescription": "L2 cache misses per kilo instruction for all demand loads (including speculative)",
+ "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "Mem;CacheMisses",
"MetricName": "L2MPKI_Load"
@@ -349,6 +342,48 @@
"MetricName": "Page_Walks_Utilization_SMT"
},
{
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L1D_Cache_Fill_BW"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L2_Cache_Fill_BW"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L3_Cache_Fill_BW"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "(64 * L1D.REPLACEMENT / 1000000000 / duration_time)",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L1D_Cache_Fill_BW_1T"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "(64 * L2_LINES_IN.ALL / 1000000000 / duration_time)",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L2_Cache_Fill_BW_1T"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "(64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time)",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L3_Cache_Fill_BW_1T"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "0",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "L3_Cache_Access_BW_1T"
+ },
+ {
"BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "HPC;Summary",
@@ -364,7 +399,8 @@
"BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
"MetricGroup": "Cor;Flops;HPC",
- "MetricName": "GFLOPs"
+ "MetricName": "GFLOPs",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/cache.json b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
index 890412f02e06..f3d7fced28b6 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
@@ -3407,4 +3407,4 @@
"SampleAfterValue": "100003",
"UMask": "0x10"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
index 9ad37dddb354..6322116d0d46 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
@@ -190,4 +190,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x3"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json
index f0bcb945ff76..37ce8034b2ed 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json
@@ -292,4 +292,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/memory.json b/tools/perf/pmu-events/arch/x86/broadwell/memory.json
index f4eebecf371f..2a7797738159 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/memory.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/memory.json
@@ -3050,4 +3050,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x40"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/other.json b/tools/perf/pmu-events/arch/x86/broadwell/other.json
index 4b360fe96698..917d145d5227 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/other.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/other.json
@@ -41,4 +41,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
index 18d21b94a4b9..e9a604e2d67c 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
@@ -1377,4 +1377,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwell/uncore-cache.json
new file mode 100644
index 000000000000..d1805b3a5e3d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/uncore-cache.json
@@ -0,0 +1,152 @@
+[
+ {
+ "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
+ "UMask": "0x86",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup any request that access cache and found line in I-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.",
+ "UMask": "0x88",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup any request that access cache and found line in M-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.",
+ "UMask": "0x81",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
+ "UMask": "0x8f",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
+ "UMask": "0x16",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup read request that access cache and found line in I-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.",
+ "UMask": "0x18",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup read request that access cache and found line in M-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.",
+ "UMask": "0x11",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
+ "UMask": "0x1f",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
+ "UMask": "0x26",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup write request that access cache and found line in M-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.",
+ "UMask": "0x21",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI",
+ "PerPkg": "1",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
+ "UMask": "0x2f",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+ "Counter": "0,1",
+ "EventCode": "0x22",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE",
+ "PerPkg": "1",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+ "UMask": "0x48",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "EventCode": "0x22",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE",
+ "PerPkg": "1",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+ "UMask": "0x44",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "Counter": "0,1",
+ "EventCode": "0x22",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+ "PerPkg": "1",
+ "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "UMask": "0x81",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+ "Counter": "0,1",
+ "EventCode": "0x22",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE",
+ "PerPkg": "1",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+ "UMask": "0x41",
+ "Unit": "CBO"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/uncore-other.json b/tools/perf/pmu-events/arch/x86/broadwell/uncore-other.json
new file mode 100644
index 000000000000..73c2261e1e94
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/uncore-other.json
@@ -0,0 +1,82 @@
+[
+ {
+ "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+ "Counter": "0,1",
+ "EventCode": "0x84",
+ "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+ "UMask": "0x01",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+ "Counter": "0,",
+ "EventCode": "0x80",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+ "UMask": "0x01",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;",
+ "Counter": "0,",
+ "CounterMask": "1",
+ "EventCode": "0x80",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+ "PerPkg": "1",
+ "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "UMask": "0x01",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.",
+ "Counter": "0,",
+ "EventCode": "0x80",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.DRD_DIRECT",
+ "PerPkg": "1",
+ "PublicDescription": "Each cycle count number of valid coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.",
+ "UMask": "0x02",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "Counter": "0,1",
+ "EventCode": "0x81",
+ "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "UMask": "0x01",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode",
+ "Counter": "0,1",
+ "EventCode": "0x81",
+ "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT",
+ "PerPkg": "1",
+ "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.",
+ "UMask": "0x02",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+ "Counter": "0,1",
+ "EventCode": "0x81",
+ "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+ "PerPkg": "1",
+ "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+ "UMask": "0x20",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles",
+ "Counter": "FIXED",
+ "EventCode": "0xff",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "PerPkg": "1",
+ "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "Unit": "CLOCK"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/uncore.json b/tools/perf/pmu-events/arch/x86/broadwell/uncore.json
deleted file mode 100644
index 28e1e159a3cb..000000000000
--- a/tools/perf/pmu-events/arch/x86/broadwell/uncore.json
+++ /dev/null
@@ -1,278 +0,0 @@
-[
- {
- "Unit": "CBO",
- "EventCode": "0x22",
- "UMask": "0x41",
- "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE",
- "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
- "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x22",
- "UMask": "0x81",
- "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
- "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
- "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x22",
- "UMask": "0x44",
- "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE",
- "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
- "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x22",
- "UMask": "0x48",
- "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE",
- "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
- "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x11",
- "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M",
- "BriefDescription": "L3 Lookup read request that access cache and found line in M-state",
- "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x21",
- "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M",
- "BriefDescription": "L3 Lookup write request that access cache and found line in M-state",
- "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x81",
- "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M",
- "BriefDescription": "L3 Lookup any request that access cache and found line in M-state",
- "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x18",
- "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I",
- "BriefDescription": "L3 Lookup read request that access cache and found line in I-state",
- "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x88",
- "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I",
- "BriefDescription": "L3 Lookup any request that access cache and found line in I-state",
- "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x1f",
- "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI",
- "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state",
- "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x2f",
- "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI",
- "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state",
- "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x8f",
- "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI",
- "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state",
- "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x86",
- "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES",
- "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state",
- "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x16",
- "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES",
- "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state",
- "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "CBO",
- "EventCode": "0x34",
- "UMask": "0x26",
- "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES",
- "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state",
- "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "iMPH-U",
- "EventCode": "0x80",
- "UMask": "0x01",
- "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
- "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
- "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
- "Counter": "0,",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "iMPH-U",
- "EventCode": "0x80",
- "UMask": "0x02",
- "EventName": "UNC_ARB_TRK_OCCUPANCY.DRD_DIRECT",
- "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.",
- "PublicDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.",
- "Counter": "0,",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "iMPH-U",
- "EventCode": "0x81",
- "UMask": "0x01",
- "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
- "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
- "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "iMPH-U",
- "EventCode": "0x81",
- "UMask": "0x02",
- "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT",
- "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode",
- "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "iMPH-U",
- "EventCode": "0x81",
- "UMask": "0x20",
- "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
- "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
- "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "iMPH-U",
- "EventCode": "0x84",
- "UMask": "0x01",
- "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
- "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
- "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
- "Counter": "0,1",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "iMPH-U",
- "EventCode": "0x80",
- "UMask": "0x01",
- "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
- "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;",
- "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
- "Counter": "0,",
- "CounterMask": "1",
- "Invert": "0",
- "EdgeDetect": "0"
- },
- {
- "Unit": "NCU",
- "EventCode": "0x0",
- "UMask": "0x01",
- "EventName": "UNC_CLOCK.SOCKET",
- "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles",
- "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
- "Counter": "FIXED",
- "CounterMask": "0",
- "Invert": "0",
- "EdgeDetect": "0"
- }
-] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json
index 818a8b132c08..6a6de8790f25 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json
@@ -385,4 +385,4 @@
"SampleAfterValue": "100007",
"UMask": "0x20"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
index 73b6865a769d..b6fdf5ba2c9a 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -47,7 +47,7 @@
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)",
"MetricGroup": "TopdownL1",
"MetricName": "Retiring",
- "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided."
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. "
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
@@ -130,44 +130,26 @@
"MetricName": "FLOPc_SMT"
},
{
- "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)",
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
"MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "FP_Arith_Utilization",
- "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting."
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
},
{
- "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). SMT version; use when SMT is enabled and measuring per logical CPU.",
"MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
"MetricGroup": "Cor;Flops;HPC_SMT",
"MetricName": "FP_Arith_Utilization_SMT",
- "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting. SMT version; use when SMT is enabled and measuring per logical CPU."
+ "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common). SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
- "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
"MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
"MetricName": "ILP"
},
{
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (BR_MISP_RETIRED.ALL_BRANCHES * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / CPU_CLK_UNHALTED.THREAD) / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY )) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts",
- "MetricName": "Branch_Misprediction_Cost"
- },
- {
- "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (BR_MISP_RETIRED.ALL_BRANCHES * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / CPU_CLK_UNHALTED.THREAD) / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY )) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BrMispredicts_SMT",
- "MetricName": "Branch_Misprediction_Cost_SMT"
- },
- {
- "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Bad;BadSpec;BrMispredicts",
- "MetricName": "IpMispredict"
- },
- {
"BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
"MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"MetricGroup": "SMT",
@@ -204,7 +186,7 @@
"MetricName": "IpTB"
},
{
- "BriefDescription": "Branch instructions per taken branch.",
+ "BriefDescription": "Branch instructions per taken branch. ",
"MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;PGO",
"MetricName": "BpTkBranch"
@@ -257,41 +239,52 @@
"MetricName": "Instructions"
},
{
+ "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
+ "MetricGroup": "Pipeline;Ret",
+ "MetricName": "Retire"
+ },
+ {
+ "BriefDescription": "",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+ "MetricName": "Execute"
+ },
+ {
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
"MetricGroup": "DSB;Fed;FetchBW",
"MetricName": "DSB_Coverage"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
- "MetricGroup": "Mem;MemoryBound;MemoryLat",
- "MetricName": "Load_Miss_Real_Latency",
- "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "IpMispredict"
},
{
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "Mem;MemoryBound;MemoryBW",
- "MetricName": "MLP"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (BR_MISP_RETIRED.ALL_BRANCHES * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / CPU_CLK_UNHALTED.THREAD) / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY )) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "Branch_Misprediction_Cost"
},
{
- "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "L1D_Cache_Fill_BW"
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (BR_MISP_RETIRED.ALL_BRANCHES * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / CPU_CLK_UNHALTED.THREAD) / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY )) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts_SMT",
+ "MetricName": "Branch_Misprediction_Cost_SMT"
},
{
- "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "L2_Cache_Fill_BW"
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "Load_Miss_Real_Latency"
},
{
- "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
- "MetricGroup": "Mem;MemoryBW",
- "MetricName": "L3_Cache_Fill_BW"
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Mem;MemoryBound;MemoryBW",
+ "MetricName": "MLP"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
@@ -306,13 +299,13 @@
"MetricName": "L2MPKI"
},
{
- "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+ "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
"MetricGroup": "Mem;CacheMisses;Offcore",
"MetricName": "L2MPKI_All"
},
{
- "BriefDescription": "L2 cache misses per kilo instruction for all demand loads (including speculative)",
+ "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
"MetricGroup": "Mem;CacheMisses",
"MetricName": "L2MPKI_Load"
@@ -349,6 +342,48 @@
"MetricName": "Page_Walks_Utilization_SMT"
},
{
+ "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L1D_Cache_Fill_BW"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L2_Cache_Fill_BW"
+ },
+ {
+ "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L3_Cache_Fill_BW"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "(64 * L1D.REPLACEMENT / 1000000000 / duration_time)",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L1D_Cache_Fill_BW_1T"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "(64 * L2_LINES_IN.ALL / 1000000000 / duration_time)",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L2_Cache_Fill_BW_1T"
+ },
+ {
+ "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "(64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time)",
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "L3_Cache_Fill_BW_1T"
+ },
+ {
+ "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "0",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "L3_Cache_Access_BW_1T"
+ },
+ {
"BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "HPC;Summary",
@@ -364,7 +399,8 @@
"BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
"MetricGroup": "Cor;Flops;HPC",
- "MetricName": "GFLOPs"
+ "MetricName": "GFLOPs",
+ "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
@@ -415,6 +451,12 @@
"MetricName": "Socket_CLKS"
},
{
+ "BriefDescription": "Uncore frequency per die [GHZ]",
+ "MetricExpr": "cbox_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
+ "MetricGroup": "SoC",
+ "MetricName": "UNCORE_FREQ"
+ },
+ {
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
"MetricGroup": "Branches;OS",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
index 0f4de912d099..4b77181b2c53 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
@@ -806,4 +806,4 @@
"SampleAfterValue": "100003",
"UMask": "0x10"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json
index fdf5dc40b835..46cf18490140 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json
@@ -190,4 +190,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x3"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json b/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json
index f0bcb945ff76..37ce8034b2ed 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json
@@ -292,4 +292,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/memory.json
index 604059e7eb58..a3a5cc6dab42 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/memory.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/memory.json
@@ -429,4 +429,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x40"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/other.json b/tools/perf/pmu-events/arch/x86/broadwellde/other.json
index 4b360fe96698..917d145d5227 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/other.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/other.json
@@ -41,4 +41,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json
index 7580b8af0d13..85654037b768 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json
@@ -1378,4 +1378,4 @@
"SampleAfterValue": "2000003",
"UMask": "0x1"
}
-] \ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json
index 58ed6d33d1f4..c4d154944ab6 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json
@@ -1,316 +1,3729 @@
[
{
- "BriefDescription": "Uncore cache clock ticks",
+ "BriefDescription": "Bounce Control",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xA",
+ "EventName": "UNC_C_BOUNCE_CONTROL",
+ "PerPkg": "1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Uncore Clocks",
"Counter": "0,1,2,3",
"EventName": "UNC_C_CLOCKTICKS",
"PerPkg": "1",
"Unit": "CBO"
},
{
- "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+ "BriefDescription": "Counter 0 Occupancy",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1F",
+ "EventName": "UNC_C_COUNTER0_OCCUPANCY",
+ "PerPkg": "1",
+ "PublicDescription": "Since occupancy counts can only be captured in the Cbo's 0 counter, this event allows a user to capture occupancy related information by filtering the Cb0 occupancy count captured in Counter 0. The filtering available is found in the control register - threshold, invert and edge detect. E.g. setting threshold to 1 can effectively monitor how many cycles the monitored queue has an entry.",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "FaST wire asserted",
+ "Counter": "0,1",
+ "EventCode": "0x9",
+ "EventName": "UNC_C_FAST_ASSERTED",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles either the local distress or incoming distress signals are asserted. Incoming distress includes both up and dn.",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cache Lookups; Any Request",
"Counter": "0,1,2,3",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.ANY",
- "Filter": "filter_state=0x1",
"PerPkg": "1",
- "ScaleUnit": "64Bytes",
+ "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Filters for any transaction originating from the IPQ or IRQ. This does not include lookups originating from the ISMQ.",
"UMask": "0x11",
"Unit": "CBO"
},
{
- "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+ "BriefDescription": "Cache Lookups; Data Read Request",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.DATA_READ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Read transactions",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cache Lookups; Lookups that Match NID",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.NID",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Qualify one of the other subevents by the Target NID. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid. In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
+ "UMask": "0x41",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cache Lookups; Any Read Request",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.READ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Read transactions",
+ "UMask": "0x21",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cache Lookups; External Snoop Request",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.REMOTE_SNOOP",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Filters for only snoop requests coming from the remote socket(s) through the IPQ.",
+ "UMask": "0x9",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cache Lookups; Write Requests",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.WRITE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Writeback transactions from L2 to the LLC This includes all write transactions -- both Cachable and UC.",
+ "UMask": "0x5",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Lines Victimized; Lines in E state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.E_STATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Lines Victimized",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.F_STATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x8",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Lines Victimized; Lines in S State",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.I_STATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Lines Victimized",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.MISS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
+ "UMask": "0x10",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Lines Victimized; Lines in M state",
"Counter": "0,1,2,3",
"EventCode": "0x37",
"EventName": "UNC_C_LLC_VICTIMS.M_STATE",
"PerPkg": "1",
- "ScaleUnit": "64Bytes",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
"UMask": "0x1",
"Unit": "CBO"
},
{
- "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode",
+ "BriefDescription": "Lines Victimized; Victimized Lines that Match NID",
"Counter": "0,1,2,3",
- "EventCode": "0x35",
- "EventName": "LLC_MISSES.DATA_READ",
- "Filter": "filter_opc=0x182",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.NID",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.; Qualify one of the other subevents by the Target NID. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid. In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
+ "UMask": "0x40",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cbo Misc; DRd hitting non-M with raw CV=0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x39",
+ "EventName": "UNC_C_MISC.CVZERO_PREFETCH_MISS",
+ "PerPkg": "1",
+ "PublicDescription": "Miscellaneous events in the Cbo.",
+ "UMask": "0x20",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cbo Misc; Clean Victim with raw CV=0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x39",
+ "EventName": "UNC_C_MISC.CVZERO_PREFETCH_VICTIM",
+ "PerPkg": "1",
+ "PublicDescription": "Miscellaneous events in the Cbo.",
+ "UMask": "0x10",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cbo Misc; RFO HitS",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x39",
+ "EventName": "UNC_C_MISC.RFO_HIT_S",
+ "PerPkg": "1",
+ "PublicDescription": "Miscellaneous events in the Cbo.; Number of times that an RFO hit in S state. This is useful for determining if it might be good for a workload to use RspIWB instead of RspSWB.",
+ "UMask": "0x8",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cbo Misc; Silent Snoop Eviction",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x39",
+ "EventName": "UNC_C_MISC.RSPI_WAS_FSE",
"PerPkg": "1",
- "ScaleUnit": "64Bytes",
+ "PublicDescription": "Miscellaneous events in the Cbo.; Counts the number of times when a Snoop hit in FSE states and triggered a silent eviction. This is useful because this information is lost in the PRE encodings.",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cbo Misc",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x39",
+ "EventName": "UNC_C_MISC.STARTED",
+ "PerPkg": "1",
+ "PublicDescription": "Miscellaneous events in the Cbo.",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Cbo Misc; Write Combining Aliasing",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x39",
+ "EventName": "UNC_C_MISC.WC_ALIASING",
+ "PerPkg": "1",
+ "PublicDescription": "Miscellaneous events in the Cbo.; Counts the number of times that a USWC write (WCIL(F)) transaction hit in the LLC in M state, triggering a WBMtoI followed by the USWC write. This occurs when there is WC aliasing.",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LRU Queue; LRU Age 0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3C",
+ "EventName": "UNC_C_QLRU.AGE0",
+ "PerPkg": "1",
+ "PublicDescription": "How often age was set to 0",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LRU Queue; LRU Age 1",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3C",
+ "EventName": "UNC_C_QLRU.AGE1",
+ "PerPkg": "1",
+ "PublicDescription": "How often age was set to 1",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LRU Queue; LRU Age 2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3C",
+ "EventName": "UNC_C_QLRU.AGE2",
+ "PerPkg": "1",
+ "PublicDescription": "How often age was set to 2",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LRU Queue; LRU Age 3",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3C",
+ "EventName": "UNC_C_QLRU.AGE3",
+ "PerPkg": "1",
+ "PublicDescription": "How often age was set to 3",
+ "UMask": "0x8",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LRU Queue; LRU Bits Decremented",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3C",
+ "EventName": "UNC_C_QLRU.LRU_DECREMENT",
+ "PerPkg": "1",
+ "PublicDescription": "How often all LRU bits were decremented by 1",
+ "UMask": "0x10",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LRU Queue; Non-0 Aged Victim",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3C",
+ "EventName": "UNC_C_QLRU.VICTIM_NON_ZERO",
+ "PerPkg": "1",
+ "PublicDescription": "How often we picked a victim that had a non-zero age",
+ "UMask": "0x20",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AD Ring In Use; All",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1B",
+ "EventName": "UNC_C_RING_AD_USED.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.",
+ "UMask": "0xF",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AD Ring In Use; Down",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1B",
+ "EventName": "UNC_C_RING_AD_USED.CCW",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.",
+ "UMask": "0xC",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AD Ring In Use; Up",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1B",
+ "EventName": "UNC_C_RING_AD_USED.CW",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.",
"UMask": "0x3",
"Unit": "CBO"
},
{
- "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_c_tor_inserts.miss_opcode",
+ "BriefDescription": "AD Ring In Use; Down and Even",
"Counter": "0,1,2,3",
- "EventCode": "0x35",
- "EventName": "LLC_MISSES.UNCACHEABLE",
- "Filter": "filter_opc=0x187",
+ "EventCode": "0x1B",
+ "EventName": "UNC_C_RING_AD_USED.DOWN_EVEN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Down and Even ring polarity.",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AD Ring In Use; Down and Odd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1B",
+ "EventName": "UNC_C_RING_AD_USED.DOWN_ODD",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Down and Odd ring polarity.",
+ "UMask": "0x8",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AD Ring In Use; Up and Even",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1B",
+ "EventName": "UNC_C_RING_AD_USED.UP_EVEN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Up and Even ring polarity.",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AD Ring In Use; Up and Odd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1B",
+ "EventName": "UNC_C_RING_AD_USED.UP_ODD",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Up and Odd ring polarity.",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AK Ring In Use; All",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1C",
+ "EventName": "UNC_C_RING_AK_USED.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.",
+ "UMask": "0xF",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AK Ring In Use; Down",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1C",
+ "EventName": "UNC_C_RING_AK_USED.CCW",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.",
+ "UMask": "0xC",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AK Ring In Use; Up",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1C",
+ "EventName": "UNC_C_RING_AK_USED.CW",
"PerPkg": "1",
- "ScaleUnit": "64Bytes",
+ "PublicDescription": "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.",
"UMask": "0x3",
"Unit": "CBO"
},
{
- "BriefDescription": "MMIO reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "BriefDescription": "AK Ring In Use; Down and Even",
"Counter": "0,1,2,3",
- "EventCode": "0x35",
- "EventName": "LLC_MISSES.MMIO_READ",
- "Filter": "filter_opc=0x187,filter_nc=1",
+ "EventCode": "0x1C",
+ "EventName": "UNC_C_RING_AK_USED.DOWN_EVEN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Down and Even ring polarity.",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AK Ring In Use; Down and Odd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1C",
+ "EventName": "UNC_C_RING_AK_USED.DOWN_ODD",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Down and Odd ring polarity.",
+ "UMask": "0x8",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AK Ring In Use; Up and Even",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1C",
+ "EventName": "UNC_C_RING_AK_USED.UP_EVEN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Up and Even ring polarity.",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AK Ring In Use; Up and Odd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1C",
+ "EventName": "UNC_C_RING_AK_USED.UP_ODD",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Up and Odd ring polarity.",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Down",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1D",
+ "EventName": "UNC_C_RING_BL_USED.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.",
+ "UMask": "0xF",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Down",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1D",
+ "EventName": "UNC_C_RING_BL_USED.CCW",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.",
+ "UMask": "0xC",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Up",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1D",
+ "EventName": "UNC_C_RING_BL_USED.CW",
"PerPkg": "1",
- "ScaleUnit": "64Bytes",
+ "PublicDescription": "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.",
"UMask": "0x3",
"Unit": "CBO"
},
{
- "BriefDescription": "MMIO writes. Derived from unc_c_tor_inserts.miss_opcode",
+ "BriefDescription": "BL Ring in Use; Down and Even",
"Counter": "0,1,2,3",
- "EventCode": "0x35",
- "EventName": "LLC_MISSES.MMIO_WRITE",
- "Filter": "filter_opc=0x18f,filter_nc=1",
+ "EventCode": "0x1D",
+ "EventName": "UNC_C_RING_BL_USED.DOWN_EVEN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Down and Even ring polarity.",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Down and Odd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1D",
+ "EventName": "UNC_C_RING_BL_USED.DOWN_ODD",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Down and Odd ring polarity.",
+ "UMask": "0x8",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Up and Even",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1D",
+ "EventName": "UNC_C_RING_BL_USED.UP_EVEN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Up and Even ring polarity.",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Up and Odd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1D",
+ "EventName": "UNC_C_RING_BL_USED.UP_ODD",
"PerPkg": "1",
- "ScaleUnit": "64Bytes",
+ "PublicDescription": "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the ring.; Filters for the Up and Odd ring polarity.",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Number of LLC responses that bounced on the Ring.; AD",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_C_RING_BOUNCES.AD",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Number of LLC responses that bounced on the Ring.; AK",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_C_RING_BOUNCES.AK",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Number of LLC responses that bounced on the Ring.; BL",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_C_RING_BOUNCES.BL",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Number of LLC responses that bounced on the Ring.; Snoops of processor's cache.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_C_RING_BOUNCES.IV",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Any",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1E",
+ "EventName": "UNC_C_RING_IV_USED.ANY",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the IV ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. There is only 1 IV ring in BDX Therefore, if one wants to monitor the Even ring, they should select both UP_EVEN and DN_EVEN. To monitor the Odd ring, they should select both UP_ODD and DN_ODD.; Filters any polarity",
+ "UMask": "0xF",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Any",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1E",
+ "EventName": "UNC_C_RING_IV_USED.DN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the IV ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. There is only 1 IV ring in BDX Therefore, if one wants to monitor the Even ring, they should select both UP_EVEN and DN_EVEN. To monitor the Odd ring, they should select both UP_ODD and DN_ODD.; Filters any polarity",
+ "UMask": "0xC",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Down",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1E",
+ "EventName": "UNC_C_RING_IV_USED.DOWN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the IV ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. There is only 1 IV ring in BDX Therefore, if one wants to monitor the Even ring, they should select both UP_EVEN and DN_EVEN. To monitor the Odd ring, they should select both UP_ODD and DN_ODD.; Filters for Down polarity",
+ "UMask": "0xCC",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL Ring in Use; Any",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1E",
+ "EventName": "UNC_C_RING_IV_USED.UP",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of cycles that the IV ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. There is only 1 IV ring in BDX Therefore, if one wants to monitor the Even ring, they should select both UP_EVEN and DN_EVEN. To monitor the Odd ring, they should select both UP_ODD and DN_ODD.; Filters any polarity",
"UMask": "0x3",
"Unit": "CBO"
},
{
- "BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode",
+ "BriefDescription": "AD",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_C_RING_SINK_STARVED.AD",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "AK",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_C_RING_SINK_STARVED.AK",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "BL",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_C_RING_SINK_STARVED.BL",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "IV",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_C_RING_SINK_STARVED.IV",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Number of cycles the Cbo is actively throttling traffic onto the Ring in order to limit bounce traffic.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x7",
+ "EventName": "UNC_C_RING_SRC_THRTL",
+ "PerPkg": "1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Arbiter Blocking Cycles; IRQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x12",
+ "EventName": "UNC_C_RxR_EXT_STARVED.IPQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles in external starvation. This occurs when one of the ingress queues is being starved by the other queues.; IPQ is externally startved and therefore we are blocking the IRQ.",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Arbiter Blocking Cycles; IPQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x12",
+ "EventName": "UNC_C_RxR_EXT_STARVED.IRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles in external starvation. This occurs when one of the ingress queues is being starved by the other queues.; IRQ is externally starved and therefore we are blocking the IPQ.",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Arbiter Blocking Cycles; ISMQ_BID",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x12",
+ "EventName": "UNC_C_RxR_EXT_STARVED.ISMQ_BIDS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles in external starvation. This occurs when one of the ingress queues is being starved by the other queues.; Number of times that the ISMQ Bid.",
+ "UMask": "0x8",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Arbiter Blocking Cycles; PRQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x12",
+ "EventName": "UNC_C_RxR_EXT_STARVED.PRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles in external starvation. This occurs when one of the ingress queues is being starved by the other queues.",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Allocations; IPQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x13",
+ "EventName": "UNC_C_RxR_INSERTS.IPQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts number of allocations per cycle into the specified Ingress queue.",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Allocations; IRQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x13",
+ "EventName": "UNC_C_RxR_INSERTS.IRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts number of allocations per cycle into the specified Ingress queue.",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Allocations; IRQ Rejected",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x13",
+ "EventName": "UNC_C_RxR_INSERTS.IRQ_REJ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts number of allocations per cycle into the specified Ingress queue.",
+ "UMask": "0x2",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Allocations; PRQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x13",
+ "EventName": "UNC_C_RxR_INSERTS.PRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts number of allocations per cycle into the specified Ingress queue.",
+ "UMask": "0x10",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Allocations; PRQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x13",
+ "EventName": "UNC_C_RxR_INSERTS.PRQ_REJ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts number of allocations per cycle into the specified Ingress queue.",
+ "UMask": "0x20",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Internal Starvation Cycles; IPQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x14",
+ "EventName": "UNC_C_RxR_INT_STARVED.IPQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles in internal starvation. This occurs when one (or more) of the entries in the ingress queue are being starved out by other entries in that queue.; Cycles with the IPQ in Internal Starvation.",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Internal Starvation Cycles; IRQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x14",
+ "EventName": "UNC_C_RxR_INT_STARVED.IRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles in internal starvation. This occurs when one (or more) of the entries in the ingress queue are being starved out by other entries in that queue.; Cycles with the IRQ in Internal Starvation.",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Internal Starvation Cycles; ISMQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x14",
+ "EventName": "UNC_C_RxR_INT_STARVED.ISMQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles in internal starvation. This occurs when one (or more) of the entries in the ingress queue are being starved out by other entries in that queue.; Cycles with the ISMQ in Internal Starvation.",
+ "UMask": "0x8",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Ingress Internal Starvation Cycles; PRQ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x14",
+ "EventName": "UNC_C_RxR_INT_STARVED.PRQ",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles in internal starvation. This occurs when one (or more) of the entries in the ingress queue are being starved out by other entries in that queue.",
+ "UMask": "0x10",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Probe Queue Retries; Address Conflict",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x31",
+ "EventName": "UNC_C_RxR_IPQ_RETRY.ADDR_CONFLICT",
+ "PerPkg": "1",
+ "PublicDescription": "Number of times a snoop (probe) request had to retry. Filters exist to cover some of the common cases retries.; Counts the number of times that a request form the IPQ was retried because of a TOR reject from an address conflicts. Address conflicts out of the IPQ should be rare. They will generally only occur if two different sockets are sending requests to the same address at the same time. This is a true conflict case, unlike the IPQ Address Conflict which is commonly caused by prefetching characteristics.",
+ "UMask": "0x4",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Probe Queue Retries; Any Reject",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x31",
+ "EventName": "UNC_C_RxR_IPQ_RETRY.ANY",
+ "PerPkg": "1",
+ "PublicDescription": "Number of times a snoop (probe) request had to retry. Filters exist to cover some of the common cases retries.; Counts the number of times that a request form the IPQ was retried because of a TOR reject. TOR rejects from the IPQ can be caused by the Egress being full or Address Conflicts.",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Probe Queue Retries; No Egress Credits",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x31",