diff options
Diffstat (limited to 'tools/perf')
399 files changed, 20358 insertions, 9374 deletions
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index ac841bc5c35b..6d148a40551c 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only include ../../scripts/Makefile.include include ../../scripts/utilities.mak diff --git a/tools/perf/Documentation/db-export.txt b/tools/perf/Documentation/db-export.txt new file mode 100644 index 000000000000..52ffccb02d55 --- /dev/null +++ b/tools/perf/Documentation/db-export.txt @@ -0,0 +1,41 @@ +Database Export +=============== + +perf tool's python scripting engine: + + tools/perf/util/scripting-engines/trace-event-python.c + +supports scripts: + + tools/perf/scripts/python/export-to-sqlite.py + tools/perf/scripts/python/export-to-postgresql.py + +which export data to a SQLite3 or PostgreSQL database. + +The export process provides records with unique sequential ids which allows the +data to be imported directly to a database and provides the relationships +between tables. + +Over time it is possible to continue to expand the export while maintaining +backward and forward compatibility, by following some simple rules: + +1. Because of the nature of SQL, existing tables and columns can continue to be +used so long as the names and meanings (and to some extent data types) remain +the same. + +2. New tables and columns can be added, without affecting existing SQL queries, +so long as the new names are unique. + +3. Scripts that use a database (e.g. exported-sql-viewer.py) can maintain +backward compatibility by testing for the presence of new tables and columns +before using them. e.g. function IsSelectable() in exported-sql-viewer.py + +4. The export scripts themselves maintain forward compatibility (i.e. an existing +script will continue to work with new versions of perf) by accepting a variable +number of arguments (e.g. def call_return_table(*x)) i.e. perf can pass more +arguments which old scripts will ignore. + +5. The scripting engine tests for the existence of script handler functions +before calling them. The scripting engine can also test for the support of new +or optional features by checking for the existence and value of script global +variables. diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 115eaacc455f..50c5b60101bd 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -88,21 +88,51 @@ smaller. To represent software control flow, "branches" samples are produced. By default a branch sample is synthesized for every single branch. To get an idea what -data is available you can use the 'perf script' tool with no parameters, which -will list all the samples. +data is available you can use the 'perf script' tool with all itrace sampling +options, which will list all the samples. perf record -e intel_pt//u ls - perf script + perf script --itrace=ibxwpe An interesting field that is not printed by default is 'flags' which can be displayed as follows: - perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags + perf script --itrace=ibxwpe -F+flags The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, system, asynchronous, interrupt, transaction abort, trace begin, trace end, and in transaction, respectively. +Another interesting field that is not printed by default is 'ipc' which can be +displayed as follows: + + perf script --itrace=be -F+ipc + +There are two ways that instructions-per-cycle (IPC) can be calculated depending +on the recording. + +If the 'cyc' config term (see config terms section below) was used, then IPC is +calculated using the cycle count from CYC packets, otherwise MTC packets are +used - refer to the 'mtc' config term. When MTC is used, however, the values +are less accurate because the timing is less accurate. + +Because Intel PT does not update the cycle count on every branch or instruction, +the values will often be zero. When there are values, they will be the number +of instructions and number of cycles since the last update, and thus represent +the average IPC since the last IPC for that event type. Note IPC for "branches" +events is calculated separately from IPC for "instructions" events. + +Also note that the IPC instruction count may or may not include the current +instruction. If the cycle count is associated with an asynchronous branch +(e.g. page fault or interrupt), then the instruction count does not include the +current instruction, otherwise it does. That is consistent with whether or not +that instruction has retired when the cycle count is updated. + +Another note, in the case of "branches" events, non-taken branches are not +presently sampled, so IPC values for them do not appear e.g. a CYC packet with a +TNT packet that starts with a non-taken branch. To see every possible IPC +value, "instructions" events can be used e.g. --itrace=i0ns + While it is possible to create scripts to analyze the data, an alternative approach is available to export the data to a sqlite or postgresql database. Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, @@ -713,7 +743,7 @@ Having no option is the same as which, in turn, is the same as - --itrace=ibxwpe + --itrace=cepwx The letters are: diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 462b3cde0675..e4aa268d2e38 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -564,9 +564,12 @@ llvm.*:: llvm.clang-bpf-cmd-template:: Cmdline template. Below lines show its default value. Environment variable is used to pass options. - "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \ - -Wno-unused-value -Wno-pointer-sign -working-directory \ - $WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -" + "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ + "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ + "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ + "-Wno-unused-value -Wno-pointer-sign " \ + "-working-directory $WORKING_DIR " \ + "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" llvm.clang-opt:: Options passed to clang. diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index da7809b15cc9..d5cc15e651cf 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -90,9 +90,10 @@ OPTIONS -c:: --compute:: - Differential computation selection - delta, ratio, wdiff, delta-abs - (default is delta-abs). Default can be changed using diff.compute - config option. See COMPARISON METHODS section for more info. + Differential computation selection - delta, ratio, wdiff, cycles, + delta-abs (default is delta-abs). Default can be changed using + diff.compute config option. See COMPARISON METHODS section for + more info. -p:: --period:: @@ -142,12 +143,14 @@ OPTIONS perf diff --time 0%-10%,30%-40% It also supports analyzing samples within a given time window - <start>,<stop>. Times have the format seconds.microseconds. If 'start' - is not given (i.e., time string is ',x.y') then analysis starts at - the beginning of the file. If stop time is not given (i.e, time - string is 'x.y,') then analysis goes to the end of the file. Time string is - 'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps for different - perf.data files. + <start>,<stop>. Times have the format seconds.nanoseconds. If 'start' + is not given (i.e. time string is ',x.y') then analysis starts at + the beginning of the file. If stop time is not given (i.e. time + string is 'x.y,') then analysis goes to the end of the file. + Multiple ranges can be separated by spaces, which requires the argument + to be quoted e.g. --time "1234.567,1234.789 1235," + Time string is'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps + for different perf.data files. For example, we get the timestamp information from 'perf script'. @@ -278,6 +281,16 @@ If specified the 'Weighted diff' column is displayed with value 'd' computed as: - WEIGHT-A being the weight of the data file - WEIGHT-B being the weight of the baseline data file +cycles +~~~~~~ +If specified the '[Program Block Range] Cycles Diff' column is displayed. +It displays the cycles difference of same program basic block amongst +two perf.data. The program basic block is the code between two branches. + +'[Program Block Range]' indicates the range of a program basic block. +Source line is reported if it can be found otherwise uses symbol+offset +instead. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 138fb6e94b3c..18ed1b0fceb3 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -199,6 +199,18 @@ also be supplied. For example: perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... +EVENT QUALIFIERS: + +It is also possible to add extra qualifiers to an event: + +percore: + +Sums up the event counts for all hardware threads in a core, e.g.: + + + perf stat -e cpu/event=0,umask=0x3,percore=1/ + + EVENT GROUPS ------------ diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 8fe4dffcadd0..15e0fa87241b 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -406,7 +406,8 @@ symbolic names, e.g. on x86, ax, si. To list the available registers use --intr-regs=ax,bx. The list of register is architecture dependent. --user-regs:: -Capture user registers at sample time. Same arguments as -I. +Similar to -I, but capture user registers at sample time. To list the available +user registers use --user-regs=\?. --running-time:: Record running and enabled time for read events (:S) @@ -459,12 +460,47 @@ Set affinity mask of trace reading thread according to the policy defined by 'mo node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer cpu - thread affinity mask is set to cpu of the processed mmap buffer +--mmap-flush=number:: + +Specify minimal number of bytes that is extracted from mmap data pages and +processed for output. One can specify the number using B/K/M/G suffixes. + +The maximal allowed value is a quarter of the size of mmaped data pages. + +The default option value is 1 byte which means that every time that the output +writing thread finds some new data in the mmaped buffer the data is extracted, +possibly compressed (-z) and written to the output, perf.data or pipe. + +Larger data chunks are compressed more effectively in comparison to smaller +chunks so extraction of larger chunks from the mmap data pages is preferable +from the perspective of output size reduction. + +Also at some cases executing less output write syscalls with bigger data size +can take less time than executing more output write syscalls with smaller data +size thus lowering runtime profiling overhead. + +-z:: +--compression-level[=n]:: +Produce compressed trace using specified level n (default: 1 - fastest compression, +22 - smallest trace) + --all-kernel:: Configure all used events to run in kernel space. --all-user:: Configure all used events to run in user space. +--kernel-callchains:: +Collect callchains only from kernel space. I.e. this option sets +perf_event_attr.exclude_callchain_user to 1. + +--user-callchains:: +Collect callchains only from user space. I.e. this option sets +perf_event_attr.exclude_callchain_kernel to 1. + +Don't use both --kernel-callchains and --user-callchains at the same time or no +callchains will be collected. + --timestamp-filename Append timestamp to output file name. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f441baa794ce..987261d158d4 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -89,7 +89,7 @@ OPTIONS - socket: processor socket number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The DWARF debugging info must be provided. - - srcfile: file name of the source file of the same. Requires dwarf + - srcfile: file name of the source file of the samples. Requires dwarf information. - weight: Event specific weight, e.g. memory latency or transaction abort cost. This is the global weight. @@ -412,12 +412,13 @@ OPTIONS --time:: Only analyze samples within given time window: <start>,<stop>. Times - have the format seconds.microseconds. If start is not given (i.e., time + have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If - stop time is not given (i.e, time string is 'x.y,') then analysis goes - to end of file. + stop time is not given (i.e. time string is 'x.y,') then analysis goes + to end of file. Multiple ranges can be separated by spaces, which + requires the argument to be quoted e.g. --time "1234.567,1234.789 1235," - Also support time percent with multiple time range. Time string is + Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 9b0d04dd2a61..d4e2e18a5881 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode. + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -203,6 +203,9 @@ OPTIONS The synth field is used by synthesized events which may be created when Instruction Trace decoding. + The ipc (instructions per cycle) field is synthesized and may have a value when + Instruction Trace decoding. + Finally, a user may not set fields to none for all event types. i.e., -F "" is not allowed. @@ -313,6 +316,9 @@ OPTIONS --show-round-events Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND. +--show-bpf-events + Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. @@ -355,12 +361,13 @@ include::itrace.txt[] --time:: Only analyze samples within given time window: <start>,<stop>. Times - have the format seconds.microseconds. If start is not given (i.e., time + have the format seconds.nanoseconds. If start is not given (i.e. time string is ',x.y') then analysis starts at the beginning of the file. If - stop time is not given (i.e, time string is 'x.y,') then analysis goes - to end of file. + stop time is not given (i.e. time string is 'x.y,') then analysis goes + to end of file. Multiple ranges can be separated by spaces, which + requires the argument to be quoted e.g. --time "1234.567,1234.789 1235," - Also support time percent with multipe time range. Time string is + Also support time percent with multiple time ranges. Time string is 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example: diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 39c05f89104e..930c51c01201 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -43,6 +43,10 @@ report:: param1 and param2 are defined as formats for the PMU in /sys/bus/event_source/devices/<pmu>/format/* + 'percore' is a event qualifier that sums up the event counts for both + hardware threads in a core. For example: + perf stat -A -a -e cpu/event,percore=1/,otherevent ... + - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' where M, N, K are numbers (in decimal, hex, octal format). Acceptable values for each of 'config', 'config1' and 'config2' @@ -196,6 +200,13 @@ use --per-socket in addition to -a. (system-wide). The output includes the socket number and the number of online processors on that socket. This is useful to gauge the amount of aggregation. +--per-die:: +Aggregate counts per processor die for system-wide mode measurements. This +is a useful mode to detect imbalance between dies. To enable this mode, +use --per-die in addition to -a. (system-wide). The output includes the +die number and the number of online processors on that die. This is +useful to gauge the amount of aggregation. + --per-core:: Aggregate counts per physical processor for system-wide mode measurements. This is a useful mode to detect imbalance between physical cores. To enable this mode, @@ -235,6 +246,9 @@ Input file name. --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. +--per-die:: +Aggregate counts per processor die for system-wide mode measurements. + --per-core:: Aggregate counts per physical processor for system-wide mode measurements. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 44d89fb9c788..cfea87c6f38e 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -262,6 +262,11 @@ Default is to monitor all CPUS. The number of threads to run when synthesizing events for existing processes. By default, the number of threads equals to the number of online CPUs. +--namespaces:: + Record events of type PERF_RECORD_NAMESPACES and display it with the + 'cgroup_id' sort key. + + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 593ef49b273c..5f54feb19977 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -151,25 +151,45 @@ struct { HEADER_CPU_TOPOLOGY = 13, -String lists defining the core and CPU threads topology. -The string lists are followed by a variable length array -which contains core_id and socket_id of each cpu. -The number of entries can be determined by the size of the -section minus the sizes of both string lists. - struct { + /* + * First revision of HEADER_CPU_TOPOLOGY + * + * See 'struct perf_header_string_list' definition earlier + * in this file. + */ + struct perf_header_string_list cores; /* Variable length */ struct perf_header_string_list threads; /* Variable length */ + + /* + * Second revision of HEADER_CPU_TOPOLOGY, older tools + * will not consider what comes next + */ + struct { uint32_t core_id; uint32_t socket_id; } cpus[nr]; /* Variable length records */ + /* 'nr' comes from previously processed HEADER_NRCPUS's nr_cpu_avail */ + + /* + * Third revision of HEADER_CPU_TOPOLOGY, older tools + * will not consider what comes next + */ + + struct perf_header_string_list dies; /* Variable length */ + uint32_t die_id[nr_cpus_avail]; /* from previously processed HEADER_NR_CPUS, VLA */ }; Example: - sibling cores : 0-3 + sibling sockets : 0-8 + sibling dies : 0-3 + sibling dies : 4-7 sibling threads : 0-1 sibling threads : 2-3 + sibling threads : 4-5 + sibling threads : 6-7 HEADER_NUMA_TOPOLOGY = 14, @@ -272,6 +292,82 @@ struct { Two uint64_t for the time of first sample and the time of last sample. + HEADER_SAMPLE_TOPOLOGY = 22, + +Physical memory map and its node assignments. + +The format of data in MEM_TOPOLOGY is as follows: + + 0 - version | for future changes + 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes + 16 - count | number of nodes + +For each node we store map of physical indexes: + + 32 - node id | node index + 40 - size | size of bitmap + 48 - bitmap | bitmap of memory indexes that belongs to node + | /sys/devices/system/node/node<NODE>/memory<INDEX> + +The MEM_TOPOLOGY can be displayed with following command: + +$ perf report --header-only -I +... +# memory nodes (nr 1, block size 0x8000000): +# 0 [7G]: 0-23,32-69 + + HEADER_CLOCKID = 23, + +One uint64_t for the clockid frequency, specified, for instance, via 'perf +record -k' (see clock_gettime()), to enable timestamps derived metrics +conversion into wall clock time on the reporting stage. + + HEADER_DIR_FORMAT = 24, + +The data files layout is described by HEADER_DIR_FORMAT feature. Currently it +holds only version number (1): + + uint64_t version; + +The current version holds only version value (1) means that data files: + +- Follow the 'data.*' name format. + +- Contain raw events data in standard perf format as read from kernel (and need + to be sorted) + +Future versions are expected to describe different data files layout according +to special needs. + + HEADER_BPF_PROG_INFO = 25, + +struct bpf_prog_info_linear, which contains detailed information about +a BPF program, including type, id, tag, jited/xlated instructions, etc. + + HEADER_BPF_BTF = 26, + +Contains BPF Type Format (BTF). For more information about BTF, please +refer to Documentation/bpf/btf.rst. + +struct { + u32 id; + u32 data_size; + char data[]; +}; + + HEADER_COMPRESSED = 27, + +struct { + u32 version; + u32 type; + u32 level; + u32 ratio; + u32 mmap_len; +}; + +Indicates that trace contains records of PERF_RECORD_COMPRESSED type +that have perf_events records in compressed form. + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, @@ -437,6 +533,17 @@ struct auxtrace_error_event { Describes a header feature. These are records used in pipe-mode that contain information that otherwise would be in perf.data file's header. + PERF_RECORD_COMPRESSED = 81, + +struct compressed_event { + struct perf_event_header header; + char data[]; +}; + +The header is followed by compressed data frame that can be decompressed +into array of perf trace records. The size of the entire compressed event +record including the header is limited by the max value of header.size. + Event types Define the event attributes with their IDs. diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 864e37597252..401f0ed67439 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -22,6 +22,8 @@ OPTIONS verbose - general debug messages ordered-events - ordered events object debug messages data-convert - data convert command debug messages + stderr - write debug output (option -v) to stderr + in browser mode --buildid-dir:: Setup buildid cache directory. It has higher priority than diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 869965d629ce..825745a645c1 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -38,6 +38,6 @@ To report cacheline events from previous recording: perf c2c report To browse sample contexts use perf report --sample 10 and select in context menu To separate samples by time use perf report --sort time,overhead,sym To set sample time separation other than 100ms with --sort time use --time-quantum -Add -I to perf report to sample register values visible in perf report context. +Add -I to perf record to sample register values, which will be visible in perf report sample context. To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context To show context switches in perf report sample context add --switch-events to perf record. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 627b7cada144..70f1ff4e2eb4 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -7,6 +7,8 @@ tools/lib/traceevent tools/lib/api tools/lib/bpf tools/lib/subcmd +tools/lib/argv_split.c +tools/lib/ctype.c tools/lib/hweight.c tools/lib/rbtree.c tools/lib/string.c @@ -16,3 +18,4 @@ tools/lib/find_bit.c tools/lib/bitmap.c tools/lib/str_error_r.c tools/lib/vsprintf.c +tools/lib/zalloc.c diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6d65874e16c3..89ac5a1f1550 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ifeq ($(src-perf),) src-perf := $(srctree)/tools/perf @@ -59,6 +60,10 @@ ifeq ($(SRCARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(SRCARCH),csky) + NO_PERF_REGS := 0 +endif + ifeq ($(ARCH),s390) NO_PERF_REGS := 0 NO_SYSCALL_TABLE := 0 @@ -77,7 +82,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky)) NO_LIBDW_DWARF_UNWIND := 1 endif @@ -152,6 +157,13 @@ endif FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +ifdef LIBZSTD_DIR + LIBZSTD_CFLAGS := -I$(LIBZSTD_DIR)/lib + LIBZSTD_LDFLAGS := -L$(LIBZSTD_DIR)/lib +endif +FEATURE_CHECK_CFLAGS-libzstd := $(LIBZSTD_CFLAGS) +FEATURE_CHECK_LDFLAGS-libzstd := $(LIBZSTD_LDFLAGS) + FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config -include $(src-perf)/arch/$(SRCARCH)/Makefile @@ -320,6 +332,10 @@ ifeq ($(feature-get_current_dir_name), 1) CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME endif +ifeq ($(feature-gettid), 1) + CFLAGS += -DHAVE_GETTID +endif + ifdef NO_LIBELF NO_DWARF := 1 NO_DEMANGLE := 1 @@ -401,6 +417,9 @@ ifdef CORESIGHT $(call feature_check,libopencsd) ifeq ($(feature-libopencsd), 1) CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) + ifeq ($(feature-reallocarray), 0) + CFLAGS += -DCOMPAT_NEED_REALLOCARRAY + endif LDFLAGS += $(LIBOPENCSD_LDFLAGS) EXTLIBS += $(OPENCSDLIBS) $(call detected,CONFIG_LIBOPENCSD) @@ -625,11 +644,15 @@ endif ifndef NO_SLANG ifneq ($(feature-libslang), 1) - msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev); - NO_SLANG := 1 - else + ifneq ($(feature-libslang-include-subdir), 1) + msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev); + NO_SLANG := 1 + else + CFLAGS += -DHAVE_SLANG_INCLUDE_SUBDIR + endif + endif + ifndef NO_SLANG # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h - CFLAGS += -I/usr/include/slang CFLAGS += -DHAVE_SLANG_SUPPORT EXTLIBS += -lslang $(call detected,CONFIG_SLANG) @@ -787,6 +810,19 @@ ifndef NO_LZMA endif endif +ifndef NO_LIBZSTD + ifeq ($(feature-libzstd), 1) + CFLAGS += -DHAVE_ZSTD_SUPPORT + CFLAGS += $(LIBZSTD_CFLAGS) + LDFLAGS += $(LIBZSTD_LDFLAGS) + EXTLIBS += -lzstd + $(call detected,CONFIG_ZSTD) + else + msg := $(warning No libzstd found, disables trace compression, please install libzstd-dev[el] and/or set LIBZSTD_DIR); + NO_LIBZSTD := 1 + endif +endif + ifndef NO_BACKTRACE ifeq ($(feature-backtrace), 1) CFLAGS += -DHAVE_BACKTRACE_SUPPORT diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e8c9f77e9010..0fffd2bb6cd9 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only include ../scripts/Makefile.include include ../scripts/Makefile.arch @@ -108,6 +109,9 @@ include ../scripts/utilities.mak # streaming for record mode. Currently Posix AIO trace streaming is # supported only when linking with glibc. # +# Define NO_LIBZSTD if you do not want support of Zstandard based runtime +# trace compression in record mode. +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -416,6 +420,24 @@ fadvise_advice_tbl := $(srctree)/tools/perf/trace/beauty/fadvise.sh $(fadvise_advice_array): $(linux_uapi_dir)/in.h $(fadvise_advice_tbl) $(Q)$(SHELL) '$(fadvise_advice_tbl)' $(linux_uapi_dir) > $@ +fsmount_arrays := $(beauty_outdir)/fsmount_arrays.c +fsmount_tbls := $(srctree)/tools/perf/trace/beauty/fsmount.sh + +$(fsmount_arrays): $(linux_uapi_dir)/fs.h $(fsmount_tbls) + $(Q)$(SHELL) '$(fsmount_tbls)' $(linux_uapi_dir) > $@ + +fspick_arrays := $(beauty_outdir)/fspick_arrays.c +fspick_tbls := $(srctree)/tools/perf/trace/beauty/fspick.sh + +$(fspick_arrays): $(linux_uapi_dir)/fs.h $(fspick_tbls) + $(Q)$(SHELL) '$(fspick_tbls)' $(linux_uapi_dir) > $@ + +fsconfig_arrays := $(beauty_outdir)/fsconfig_arrays.c +fsconfig_tbls := $(srctree)/tools/perf/trace/beauty/fsconfig.sh + +$(fsconfig_arrays): $(linux_uapi_dir)/fs.h $(fsconfig_tbls) + $(Q)$(SHELL) '$(fsconfig_tbls)' $(linux_uapi_dir) > $@ + pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/ pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh @@ -490,6 +512,12 @@ mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh $(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl) $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@ +move_mount_flags_array := $(beauty_outdir)/move_mount_flags_array.c +move_mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/move_mount_flags.sh + +$(move_mount_flags_array): $(linux_uapi_dir)/fs.h $(move_mount_flags_tbl) + $(Q)$(SHELL) '$(move_mount_flags_tbl)' $(linux_uapi_dir) > $@ + prctl_option_array := $(beauty_outdir)/prctl_option_array.c prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh @@ -522,6 +550,12 @@ arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh $(arch_errno_name_array): $(arch_errno_tbl) $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@ +sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c +sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh + +$(sync_file_range_arrays): $(linux_uapi_dir)/fs.h $(sync_file_range_tbls) + $(Q)$(SHELL) '$(sync_file_range_tbls)' $(linux_uapi_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) # Create python binding output directory if not already present @@ -625,6 +659,9 @@ build-dir = $(if $(__build-dir),$(__build-dir),.) prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \ $(fadvise_advice_array) \ + $(fsconfig_arrays) \ + $(fsmount_arrays) \ + $(fspick_arrays) \ $(pkey_alloc_access_rights_array) \ $(sndrv_pcm_ioctl_array) \ $(sndrv_ctl_ioctl_array) \ @@ -635,12 +672,14 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(madvise_behavior_array) \ $(mmap_flags_array) \ $(mount_flags_array) \ + $(move_mount_flags_array) \ $(perf_ioctl_array) \ $(prctl_option_array) \ $(usbdevfs_ioctl_array) \ $(x86_arch_prctl_code_array) \ $(rename_flags_array) \ - $(arch_errno_name_array) + $(arch_errno_name_array) \ + $(sync_file_range_arrays) $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -919,9 +958,13 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ $(OUTPUT)$(fadvise_advice_array) \ + $(OUTPUT)$(fsconfig_arrays) \ + $(OUTPUT)$(fsmount_arrays) \ + $(OUTPUT)$(fspick_arrays) \ $(OUTPUT)$(madvise_behavior_array) \ $(OUTPUT)$(mmap_flags_array) \ $(OUTPUT)$(mount_flags_array) \ + $(OUTPUT)$(move_mount_flags_array) \ $(OUTPUT)$(drm_ioctl_array) \ $(OUTPUT)$(pkey_alloc_access_rights_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ @@ -935,7 +978,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(usbdevfs_ioctl_array) \ $(OUTPUT)$(x86_arch_prctl_code_array) \ $(OUTPUT)$(rename_flags_array) \ - $(OUTPUT)$(arch_errno_name_array) + $(OUTPUT)$(arch_errno_name_array) \ + $(OUTPUT)$(sync_file_range_arrays) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean # diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 18b13518d8d8..1d88fdab13bf 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c index f64516d5b23e..c7d1a69b894f 100644 --- a/tools/perf/arch/arm/annotate/instructions.c +++ b/tools/perf/arch/arm/annotate/instructions.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> +#include <linux/zalloc.h> #include <sys/types.h> #include <regex.h> diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index 1ce6bdbda561..02014740a1aa 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -6,6 +6,7 @@ #include <stdbool.h> #include <linux/coresight-pmu.h> +#include <linux/zalloc.h> #include "../../util/auxtrace.h" #include "../../util/evlist.h" diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 911426721170..4208974c24f8 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/log2.h> #include <linux/types.h> +#include <linux/zalloc.h> #include "cs-etm.h" #include "../../perf.h" @@ -22,6 +23,7 @@ #include "../../util/pmu.h" #include "../../util/thread_map.h" #include "../../util/cs-etm.h" +#include "../../util/util.h" #include <errno.h> #include <stdlib.h> @@ -31,12 +33,158 @@ struct cs_etm_recording { struct auxtrace_record itr; struct perf_pmu *cs_etm_pmu; struct perf_evlist *evlist; + int wrapped_cnt; + bool *wrapped; bool snapshot_mode; size_t snapshot_size; }; +static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { + [CS_ETM_ETMCCER] = "mgmt/etmccer", + [CS_ETM_ETMIDR] = "mgmt/etmidr", +}; + +static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { + [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", + [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", + [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", + [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", + [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", +}; + static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); +static int cs_etm_set_context_id(struct auxtrace_record *itr, + struct perf_evsel *evsel, int cpu) +{ + struct cs_etm_recording *ptr; + struct perf_pmu *cs_etm_pmu; + char path[PATH_MAX]; + int err = -EINVAL; + u32 val; + + ptr = container_of(itr, struct cs_etm_recording, itr); + cs_etm_pmu = ptr->cs_etm_pmu; + + if (!cs_etm_is_etmv4(itr, cpu)) + goto out; + + /* Get a handle on TRCIRD2 */ + snprintf(path, PATH_MAX, "cpu%d/%s", + cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); + err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); + + /* There was a problem reading the file, bailing out */ + if (err != 1) { + pr_err("%s: can't read file %s\n", + CORESIGHT_ETM_PMU_NAME, path); + goto out; + } + + /* + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing + * is supported: + * 0b00000 Context ID tracing is not supported. + * 0b00100 Maximum of 32-bit Context ID size. + * All other values are reserved. + */ + val = BMVAL(val, 5, 9); + if (!val || val != 0x4) { + err = -EINVAL; + goto out; + } + + /* All good, let the kernel know */ + evsel->attr.config |= (1 << ETM_OPT_CTXTID); + err = 0; + +out: + + return err; +} + +static int cs_etm_set_timestamp(struct auxtrace_record *itr, + struct perf_evsel *evsel, int cpu) +{ + struct cs_etm_recording *ptr; + struct perf_pmu *cs_etm_pmu; + char path[PATH_MAX]; + int err = -EINVAL; + u32 val; + + ptr = container_of(itr, struct cs_etm_recording, itr); + cs_etm_pmu = ptr->cs_etm_pmu; + + if (!cs_etm_is_etmv4(itr, cpu)) + goto out; + + /* Get a handle on TRCIRD0 */ + snprintf(path, PATH_MAX, "cpu%d/%s", + cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); + err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); + + /* There was a problem reading the file, bailing out */ + if (err != 1) { + pr_err("%s: can't read file %s\n", + CORESIGHT_ETM_PMU_NAME, path); + goto out; + } + + /* + * TRCIDR0.TSSIZE, bit [28-24], indicates whether global timestamping + * is supported: + * 0b00000 Global timestamping is not implemented + * 0b00110 Implementation supports a maximum timestamp of 48bits. + * 0b01000 Implementation supports a maximum timestamp of 64bits. + */ + val &= GENMASK(28, 24); + if (!val) { + err = -EINVAL; + goto out; + } + + /* All good, let the kernel know */ + evsel->attr.config |= (1 << ETM_OPT_TS); + err = 0; + +out: + return err; +} + +static int cs_etm_set_option(struct auxtrace_record *itr, + struct perf_evsel *evsel, u32 option) +{ + int i, err = -EINVAL; + struct cpu_map *event_cpus = evsel->evlist->cpus; + struct cpu_map *online_cpus = cpu_map__new(NULL); + + /* Set option of each CPU we have */ + for (i = 0; i < cpu__max_cpu(); i++) { + if (!cpu_map__has(event_cpus, i) || + !cpu_map__has(online_cpus, i)) + continue; + + if (option & ETM_OPT_CTXTID) { + err = cs_etm_set_context_id(itr, evsel, i); + if (err) + goto out; + } + if (option & ETM_OPT_TS) { + err = cs_etm_set_timestamp(itr, evsel, i); + if (err) + goto out; + } + if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS)) + /* Nothing else is currently supported */ + goto out; + } + + err = 0; +out: + cpu_map__put(online_cpus); + return err; +} + static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr, struct record_opts *opts, const char *str) @@ -105,12 +253,16 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; struct perf_evsel *evsel, *cs_etm_evsel = NULL; - const struct cpu_map *cpus = evlist->cpus; + struct cpu_map *cpus = evlist->cpus; bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0); + int err = 0; ptr->evlist = evlist; ptr->snapshot_mode = opts->auxtrace_snapshot_mode; + if (perf_can_record_switch_events()) + opts->record_switch_events = true; + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type == cs_etm_pmu->type) { if (cs_etm_evsel) { @@ -241,19 +393,25 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, /* * In the case of per-cpu mmaps, we need the CPU on the - * AUX event. + * AUX event. We also need the contextID in order to be notified + * when a context switch happened. */ - if (!cpu_map__empty(cpus)) + if (!cpu_map__empty(cpus)) { perf_evsel__set_sample_bit(cs_etm_evsel, CPU); + err = cs_etm_set_option(itr, cs_etm_evsel, + ETM_OPT_CTXTID | ETM_OPT_TS); + if (err) + goto out; + } + /* Add dummy event to keep tracking */ if (opts->full_auxtrace) { struct perf_evsel *tracking_evsel; - int err; err = parse_events(evlist, "dummy:u", NULL); if (err) - return err; + goto out; tracking_evsel = perf_evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); @@ -266,7 +424,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, perf_evsel__set_sample_bit(tracking_evsel, TIME); } - return 0; +out: + return err; } static u64 cs_etm_get_config(struct auxtrace_record *itr) @@ -314,6 +473,8 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr) config_opts = cs_etm_get_config(itr); if (config_opts & BIT(ETM_OPT_CYCACC)) config |= BIT(ETM4_CFG_BIT_CYCACC); + if (config_opts & BIT(ETM_OPT_CTXTID)) + config |= BIT(ETM4_CFG_BIT_CTXTID); if (config_opts & BIT(ETM_OPT_TS)) config |= BIT(ETM4_CFG_BIT_TS); if (config_opts & BIT(ETM_OPT_RETSTK)) @@ -363,19 +524,6 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, (etmv3 * CS_ETMV3_PRIV_SIZE)); } -static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { - [CS_ETM_ETMCCER] = "mgmt/etmccer", - [CS_ETM_ETMIDR] = "mgmt/etmidr", -}; - -static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { - [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", - [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", - [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", - [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", - [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", -}; - static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu) { bool ret = false; @@ -536,16 +684,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, return 0; } -static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused, +static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx) +{ + bool *wrapped; + int cnt = ptr->wrapped_cnt; + + /* Make @ptr->wrapped as big as @idx */ + while (cnt <= idx) + cnt++; + + /* + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid + * cross compilation problems where the host's system supports + * reallocarray() but not the target. + */ + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool)); + if (!wrapped) + return -ENOMEM; + + wrapped[cnt - 1] = false; + ptr->wrapped_cnt = cnt; + ptr->wrapped = wrapped; + + return 0; +} + +static bool cs_etm_buffer_has_wrapped(unsigned char *buffer, + size_t buffer_size, u64 head) +{ + u64 i, watermark; + u64 *buf = (u64 *)buffer; + size_t buf_size = buffer_size; + + /* + * We want to look the very last 512 byte (chosen arbitrarily) in + * the ring buffer. + */ + watermark = buf_size - 512; + + /* + * @head is continuously increasing - if its value is equal or greater + * than the size of the ring buffer, it has wrapped around. + */ + if (head >= buffer_size) + return true; + + /* + * The value of @head is somewhere within the size of the ring buffer. + * This can be that there hasn't been enough data to fill the ring + * buffer yet or the trace time was so long that @head has numerically + * wrapped around. To find we need to check if we have data at the very + * end of the ring buffer. We can reliably do this because mmap'ed + * pages are zeroed out and there is a fresh mapping with every new + * session. + */ + + /* @head is less than 512 byte from the end of the ring buffer */ + if (head > watermark) + watermark = head; + + /* + * Speed things up by using 64 bit transactions (see "u64 *buf" above) + */ + watermark >>= 3; + buf_size >>= 3; + + /* + * If we find trace data at the end of the ring buffer, @head has + * been there and has numerically wrapped around at least once. + */ + for (i = watermark; i < buf_size; i++) + if (buf[i]) + return true; + + return false; +} + +static int cs_etm_find_snapshot(struct auxtrace_record *itr, int idx, struct auxtrace_mmap *mm, - unsigned char *data __maybe_unused, + unsigned char *data, u64 *head, u64 *old) { + int err; + bool wrapped; + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + + /* + * Allocate memory to keep track of wrapping if this is the first + * time we deal with this *mm. + */ + if (idx >= ptr->wrapped_cnt) { + err = cs_etm_alloc_wrapped_array(ptr, idx); + if (err) + return err; + } + + /* + * Check to see if *head has wrapped around. If it hasn't only the + * amount of data between *head and *old is snapshot'ed to avoid + * bloating the perf.data file with zeros. But as soon as *head has + * wrapped around the entire size of the AUX ring buffer it taken. + */ + wrapped = ptr->wrapped[idx]; + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) { + wrapped = true; + ptr->wrapped[idx] = true; + } + pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", __func__, idx, (size_t)*old, (size_t)*head, mm->len); - *old = *head; - *head += mm->len; + /* No wrap has occurred, we can just use *head and *old. */ + if (!wrapped) + return 0; + + /* + * *head has wrapped around - adjust *head and *old to pickup the + * entire content of the AUX buffer. + */ + if (*head >= mm->len) { + *old = *head - mm->len; + } else { + *head += mm->len; + *old = *head - mm->len; + } return 0; } @@ -586,6 +849,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr) { struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + + zfree(&ptr->wrapped); free(ptr); } diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c index 8bb176a37990..fc5f71c91802 100644 --- a/tools/perf/arch/arm/util/dwarf-regs.c +++ b/tools/perf/arch/arm/util/dwarf-regs.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Mapping of DWARF debug register numbers into register names. * * Copyright (C) 2010 Will Deacon, ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <stddef.h> diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build index 36222e64bbf7..a7dd46a5b678 100644 --- a/tools/perf/arch/arm64/Build +++ b/tools/perf/arch/arm64/Build @@ -1,2 +1,2 @@ perf-y += util/ -perf-$(CONFIG_DWARF_UNWIND) += tests/ +perf-y += tests/ diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl index c88fd32563eb..459469b7222c 100755 --- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -56,7 +56,7 @@ create_table() echo "};" } -$gcc -E -dM -x c $input \ +$gcc -E -dM -x c -I $incpath/include/uapi $input \ |sed -ne 's/^#define __NR_//p' \ |sort -t' ' -k2 -nu \ |create_table diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build index 41707fea74b3..a61c06bdb757 100644 --- a/tools/perf/arch/arm64/tests/Build +++ b/tools/perf/arch/arm64/tests/Build @@ -1,4 +1,4 @@ perf-y += regs_load.o -perf-y += dwarf-unwind.o +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 5ccfce87e693..2c009aa74633 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <linux/zalloc.h> #include <time.h> #include "../../util/cpumap.h" diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c index cd764a9fd098..b047b882c5b1 100644 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ b/tools/perf/arch/arm64/util/dwarf-regs.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Mapping of DWARF debug register numbers into register names. * * Copyright (C) 2010 Will Deacon, ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <errno.h> diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c index 0051b1ee8450..27fcf24d6850 100644 --- a/tools/perf/arch/arm64/util/sym-handling.c +++ b/tools/perf/arch/arm64/util/sym-handling.c @@ -1,7 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. * * Copyright (C) 2015 Naveen N. Rao, IBM Corporation */ diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index f3824ca7c20b..1a9e22f78c22 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> +#include <stdlib.h> #include "common.h" #include "../util/env.h" -#include "../util/util.h" #include "../util/debug.h" +#include <linux/zalloc.h> const char *const arc_triplets[] = { "arc-linux-", diff --git a/tools/perf/arch/csky/Build b/tools/perf/arch/csky/Build new file mode 100644 index 000000000000..e4e5f33c84d8 --- /dev/null +++ b/tools/perf/arch/csky/Build @@ -0,0 +1 @@ +perf-y += util/ diff --git a/tools/perf/arch/csky/Makefile b/tools/perf/arch/csky/Makefile new file mode 100644 index 000000000000..88c08eed9c7b --- /dev/null +++ b/tools/perf/arch/csky/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif diff --git a/tools/perf/arch/csky/annotate/instructions.c b/tools/perf/arch/csky/annotate/instructions.c new file mode 100644 index 000000000000..5337bfb7d5fc --- /dev/null +++ b/tools/perf/arch/csky/annotate/instructions.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/compiler.h> + +static struct ins_ops *csky__associate_ins_ops(struct arch *arch, + const char *name) +{ + struct ins_ops *ops = NULL; + + /* catch all kind of jumps */ + if (!strcmp(name, "bt") || + !strcmp(name, "bf") || + !strcmp(name, "bez") || + !strcmp(name, "bnez") || + !strcmp(name, "bnezad") || + !strcmp(name, "bhsz") || + !strcmp(name, "bhz") || + !strcmp(name, "blsz") || + !strcmp(name, "blz") || + !strcmp(name, "br") || + !strcmp(name, "jmpi") || + !strcmp(name, "jmp")) + ops = &jump_ops; + + /* catch function call */ + if (!strcmp(name, "bsr") || + !strcmp(name, "jsri") || + !strcmp(name, "jsr")) + ops = &call_ops; + + /* catch function return */ + if (!strcmp(name, "rts")) + ops = &ret_ops; + + if (ops) + arch__associate_ins_ops(arch, name, ops); + return ops; +} + +static int csky__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + arch->initialized = true; + arch->objdump.comment_char = '/'; + arch->associate_instruction_ops = csky__associate_ins_ops; + + return 0; +} diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h new file mode 100644 index 000000000000..8f336ea1161a --- /dev/null +++ b/tools/perf/arch/csky/include/perf_regs.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. + +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include <stdlib.h> +#include <linux/types.h> +#include <asm/perf_regs.h> + +#define PERF_REGS_MASK ((1ULL << PERF_REG_CSKY_MAX) - 1) +#define PERF_REGS_MAX PERF_REG_CSKY_MAX +#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 + +#define PERF_REG_IP PERF_REG_CSKY_PC +#define PERF_REG_SP PERF_REG_CSKY_SP + +static inline const char *perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_CSKY_A0: + return "a0"; + case PERF_REG_CSKY_A1: + return "a1"; + case PERF_REG_CSKY_A2: + return "a2"; + case PERF_REG_CSKY_A3: + return "a3"; + case PERF_REG_CSKY_REGS0: + return "regs0"; + case PERF_REG_CSKY_REGS1: + return "regs1"; + case PERF_REG_CSKY_REGS2: + return "regs2"; + case PERF_REG_CSKY_REGS3: + return "regs3"; + case PERF_REG_CSKY_REGS4: + return "regs4"; + case PERF_REG_CSKY_REGS5: + return "regs5"; + case PERF_REG_CSKY_REGS6: + return "regs6"; + case PERF_REG_CSKY_REGS7: + return "regs7"; + case PERF_REG_CSKY_REGS8: + return "regs8"; + case PERF_REG_CSKY_REGS9: + return "regs9"; + case PERF_REG_CSKY_SP: + return "sp"; + case PERF_REG_CSKY_LR: + return "lr"; + case PERF_REG_CSKY_PC: + return "pc"; +#if defined(__CSKYABIV2__) + case PERF_REG_CSKY_EXREGS0: + return "exregs0"; + case PERF_REG_CSKY_EXREGS1: + return "exregs1"; + case PERF_REG_CSKY_EXREGS2: + return "exregs2"; + case PERF_REG_CSKY_EXREGS3: + return "exregs3"; + case PERF_REG_CSKY_EXREGS4: + return "exregs4"; + case PERF_REG_CSKY_EXREGS5: + return "exregs5"; + case PERF_REG_CSKY_EXREGS6: + return "exregs6"; + case PERF_REG_CSKY_EXREGS7: + return "exregs7"; + case PERF_REG_CSKY_EXREGS8: + return "exregs8"; + case PERF_REG_CSKY_EXREGS9: + return "exregs9"; + case PERF_REG_CSKY_EXREGS10: + return "exregs10"; + case PERF_REG_CSKY_EXREGS11: + return "exregs11"; + case PERF_REG_CSKY_EXREGS12: + return "exregs12"; + case PERF_REG_CSKY_EXREGS13: + return "exregs13"; + case PERF_REG_CSKY_EXREGS14: + return "exregs14"; + case PERF_REG_CSKY_TLS: + return "tls"; + case PERF_REG_CSKY_HI: + return "hi"; + case PERF_REG_CSKY_LO: + return "lo"; +#endif + default: + return NULL; + } + + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/csky/util/Build b/tools/perf/arch/csky/util/Build new file mode 100644 index 000000000000..1160bb2332ba --- /dev/null +++ b/tools/perf/arch/csky/util/Build @@ -0,0 +1,2 @@ +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/csky/util/dwarf-regs.c b/tools/perf/arch/csky/util/dwarf-regs.c new file mode 100644 index 000000000000..ca86ecaeacbb --- /dev/null +++ b/tools/perf/arch/csky/util/dwarf-regs.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. +// Mapping of DWARF debug register numbers into register names. + +#include <stddef.h> +#include <dwarf-regs.h> + +#if defined(__CSKYABIV2__) +#define CSKY_MAX_REGS 73 +const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { + /* r0 ~ r8 */ + "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", "%regs2", "%regs3", + /* r9 ~ r15 */ + "%regs4", "%regs5", "%regs6", "%regs7", "%regs8", "%regs9", "%sp", + "%lr", + /* r16 ~ r23 */ + "%exregs0", "%exregs1", "%exregs2", "%exregs3", "%exregs4", + "%exregs5", "%exregs6", "%exregs7", + /* r24 ~ r31 */ + "%exregs8", "%exregs9", "%exregs10", "%exregs11", "%exregs12", + "%exregs13", "%exregs14", "%tls", + "%pc", NULL, NULL, NULL, "%hi", "%lo", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "%epc", +}; +#else +#define CSKY_MAX_REGS 57 +const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { + /* r0 ~ r8 */ + "%sp", "%regs9", "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", + /* r9 ~ r15 */ + "%regs2", "%regs3", "%regs4", "%regs5", "%regs6", "%regs7", "%regs8", + "%lr", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "%epc", +}; +#endif + +const char *get_arch_regstr(unsigned int n) +{ + return (n < CSKY_MAX_REGS) ? csky_dwarf_regs_table[n] : NULL; +} diff --git a/tools/perf/arch/csky/util/unwind-libdw.c b/tools/perf/arch/csky/util/unwind-libdw.c new file mode 100644 index 000000000000..4bb4a06776e4 --- /dev/null +++ b/tools/perf/arch/csky/util/unwind-libdw.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. + +#include <elfutils/libdwfl.h> +#include "../../util/unwind-libdw.h" +#include "../../util/perf_regs.h" +#include "../../util/event.h" + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[PERF_REG_CSKY_MAX]; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_CSKY_##r); \ + val; \ +}) + +#if defined(__CSKYABIV2__) + dwarf_regs[0] = REG(A0); + dwarf_regs[1] = REG(A1); + dwarf_regs[2] = REG(A2); + dwarf_regs[3] = REG(A3); + dwarf_regs[4] = REG(REGS0); + dwarf_regs[5] = REG(REGS1); + dwarf_regs[6] = REG(REGS2); + dwarf_regs[7] = REG(REGS3); + dwarf_regs[8] = REG(REGS4); + dwarf_regs[9] = REG(REGS5); + dwarf_regs[10] = REG(REGS6); + dwarf_regs[11] = REG(REGS7); + dwarf_regs[12] = REG(REGS8); + dwarf_regs[13] = REG(REGS9); + dwarf_regs[14] = REG(SP); + dwarf_regs[15] = REG(LR); + dwarf_regs[16] = REG(EXREGS0); + dwarf_regs[17] = REG(EXREGS1); + dwarf_regs[18] = REG(EXREGS2); + dwarf_regs[19] = REG(EXREGS3); + dwarf_regs[20] = REG(EXREGS4); + dwarf_regs[21] = REG(EXREGS5); + dwarf_regs[22] = REG(EXREGS6); + dwarf_regs[23] = REG(EXREGS7); + dwarf_regs[24] = REG(EXREGS8); + dwarf_regs[25] = REG(EXREGS9); + dwarf_regs[26] = REG(EXREGS10); + dwarf_regs[27] = REG(EXREGS11); + dwarf_regs[28] = REG(EXREGS12); + dwarf_regs[29] = REG(EXREGS13); + dwarf_regs[30] = REG(EXREGS14); + dwarf_regs[31] = REG(TLS); + dwarf_regs[32] = REG(PC); +#else + dwarf_regs[0] = REG(SP); + dwarf_regs[1] = REG(REGS9); + dwarf_regs[2] = REG(A0); + dwarf_regs[3] = REG(A1); + dwarf_regs[4] = REG(A2); + dwarf_regs[5] = REG(A3); + dwarf_regs[6] = REG(REGS0); + dwarf_regs[7] = REG(REGS1); + dwarf_regs[8] = REG(REGS2); + dwarf_regs[9] = REG(REGS3); + dwarf_regs[10] = REG(REGS4); + dwarf_regs[11] = REG(REGS5); + dwarf_regs[12] = REG(REGS6); + dwarf_regs[13] = REG(REGS7); + dwarf_regs[14] = REG(REGS8); + dwarf_regs[15] = REG(LR); +#endif + dwfl_thread_state_register_pc(thread, REG(PC)); + + return dwfl_thread_state_registers(thread, 0, PERF_REG_CSKY_MAX, + dwarf_regs); +} diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 98ac87052a74..4952890b9428 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Mapping of DWARF debug register numbers into register names. * * Copyright (C) 2010 Ian Munsie, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <stddef.h> diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index 34d5134681d9..f14102b85509 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -2,12 +2,14 @@ #include <errno.h> #include <string.h> #include <regex.h> +#include <linux/zalloc.h> #include "../../perf.h" -#include "../../util/util.h" #include "../../util/perf_regs.h" #include "../../util/debug.h" +#include <linux/kernel.h> + const struct sample_reg sample_reg_masks[] = { SMPL_REG(r0, PERF_REG_POWERPC_R0), SMPL_REG(r1, PERF_REG_POWERPC_R1), diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 2918bb16c892..fc9c2f5fcd52 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Use DWARF Debug information to skip unnecessary callchain entries. * * Copyright (C) 2014 Sukadev Bhattiprolu, IBM Corporation. * Copyright (C) 2014 Ulrich Weigand, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <inttypes.h> #include <dwarf.h> diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 10a44e946f77..b0a67eaf2ce8 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -1,7 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. * * Copyright (C) 2015 Naveen N. Rao, IBM Corporation */ diff --git a/tools/perf/arch/powerpc/util/unwind-libunwind.c b/tools/perf/arch/powerpc/util/unwind-libunwind.c index 9e15f92ae49f..90a6beda20de 100644 --- a/tools/perf/arch/powerpc/util/unwind-libunwind.c +++ b/tools/perf/arch/powerpc/util/unwind-libunwind.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2016 Chandan Kumar, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <errno.h> diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index dfa6e3103437..cb198787570a 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c index 44c857388897..0fe1be93f375 100644 --- a/tools/perf/arch/s390/util/auxtrace.c +++ b/tools/perf/arch/s390/util/auxtrace.c @@ -3,6 +3,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <linux/zalloc.h> #include "../../util/evlist.h" #include "../../util/auxtrace.h" diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c index 163b92f33998..8b0b018d896a 100644 --- a/tools/perf/arch/s390/util/header.c +++ b/tools/perf/arch/s390/util/header.c @@ -1,23 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Implementation of get_cpuid(). * * Copyright IBM Corp. 2014, 2018 * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com> * Thomas Richter <tmricht@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include <sys/types.h> #include <unistd.h> #include <stdio.h> #include <string.h> -#include <ctype.h> +#include <linux/ctype.h> +#include <linux/kernel.h> +#include <linux/zalloc.h> #include "../../util/header.h" -#include "../../util/util.h" #define SYSINFO_MANU "Manufacturer:" #define SYSINFO_TYPE "Type:" diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index 7e3961a4b292..f852f2a77e0a 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Arch specific functions for perf kvm stat. * * Copyright 2014 IBM Corp. * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include <errno.h> diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index 0b2054007314..a19690a17291 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -5,16 +5,19 @@ #include "util.h" #include "machine.h" #include "api/fs/fs.h" +#include "debug.h" int arch__fix_module_text_start(u64 *start, const char *name) { + u64 m_start = *start; char path[PATH_MAX]; snprintf(path, PATH_MAX, "module/%.*s/sections/.text", (int)strlen(name) - 2, name + 1); - - if (sysfs__read_ull(path, (unsigned long long *)start) < 0) - return -1; + if (sysfs__read_ull(path, (unsigned long long *)start) < 0) { + pr_debug2("Using module %s start:%#lx\n", path, m_start); + *start = m_start; + } return 0; } diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile index 7fbca175099e..88c08eed9c7b 100644 --- a/tools/perf/arch/sh/Makefile +++ b/tools/perf/arch/sh/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c index f8dfa89696f4..4b17fc86c73b 100644 --- a/tools/perf/arch/sh/util/dwarf-regs.c +++ b/tools/perf/arch/sh/util/dwarf-regs.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Mapping of DWARF debug register numbers into register names. * * Copyright (C) 2010 Matt Fleming <matt@console-pimps.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #include <stddef.h> diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 275dea7ff59a..4031db72ba71 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c index b704fdb9237a..1282cb2dc7bd 100644 --- a/tools/perf/arch/sparc/util/dwarf-regs.c +++ b/tools/perf/arch/sparc/util/dwarf-regs.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Mapping of DWARF debug register numbers into register names. * * Copyright (C) 2010 David S. Miller <davem@davemloft.net> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <stddef.h> diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 92ee0b4378d4..b4e6f9e6204a 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -349,6 +349,12 @@ 425 common io_uring_setup __x64_sys_io_uring_setup 426 common io_uring_enter __x64_sys_io_uring_enter 427 common io_uring_register __x64_sys_io_uring_register +428 common open_tree __x64_sys_open_tree +429 common move_mount __x64_sys_move_mount +430 common fsopen __x64_sys_fsopen +431 common fsconfig __x64_sys_fsconfig +432 common fsmount __x64_sys_fsmount +433 common fspick __x64_sys_fspick # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 613709cfbbd0..c41c5affe4be 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -9,6 +9,7 @@ struct test; int test__rdpmc(struct test *test __maybe_unused, int subtest); int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); +int test__intel_pt_pkt_decoder(struct test *test, int subtest); int test__bp_modify(struct test *test, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index 7f6d538f8a89..b7321337d100 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -8,9 +8,9 @@ void perf_regs_load(u64 *regs); +#define PERF_REGS_MAX PERF_REG_X86_XMM_MAX #ifndef HAVE_ARCH_X86_64_SUPPORT #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) -#define PERF_REGS_MAX PERF_REG_X86_32_MAX #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 #else #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ @@ -18,7 +18,6 @@ void perf_regs_load(u64 *regs); (1ULL << PERF_REG_X86_FS) | \ (1ULL << PERF_REG_X86_GS)) #define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT) -#define PERF_REGS_MAX PERF_REG_X86_64_MAX #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 #endif #define PERF_REG_IP PERF_REG_X86_IP @@ -77,6 +76,28 @@ static inline const char *perf_reg_name(int id) case PERF_REG_X86_R15: return "R15"; #endif /* HAVE_ARCH_X86_64_SUPPORT */ + +#define XMM(x) \ + case PERF_REG_X86_XMM ## x: \ + case PERF_REG_X86_XMM ## x + 1: \ + return "XMM" #x; + XMM(0) + XMM(1) + XMM(2) + XMM(3) + XMM(4) + XMM(5) + XMM(6) + XMM(7) + XMM(8) + XMM(9) + XMM(10) + XMM(11) + XMM(12) + XMM(13) + XMM(14) + XMM(15) +#undef XMM default: return NULL; } diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 3d83d0c6982d..2997c506550c 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -4,5 +4,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += rdpmc.o perf-y += perf-time-to-tsc.o -perf-$(CONFIG_AUXTRACE) += insn-x86.o +perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index d47d3f8e3c8e..6763135aec17 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -23,6 +23,10 @@ struct test arch_tests[] = { .desc = "x86 instruction decoder - new instructions", .func = test__insn_x86, }, + { + .desc = "Intel PT packet decoder", + .func = test__intel_pt_pkt_decoder, + }, #endif #if defined(__x86_64__) { diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk b/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk index a21454835cd4..1a29f6379bde 100644 --- a/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk +++ b/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk @@ -1,15 +1,8 @@ #!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0-only # gen-insn-x86-dat.awk: script to convert data for the insn-x86 test # Copyright (c) 2015, Intel Corporation. # -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. BEGIN { print "/*" diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh index 2d4ef94cff98..0d0a003a9c5e 100755 --- a/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh +++ b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh @@ -1,15 +1,8 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only # gen-insn-x86-dat: generate data for the insn-x86 test # Copyright (c) 2015, Intel Corporation. # -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. set -e diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 90a4a8c58a62..94aa0b673b7f 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -6,6 +6,7 @@ #include "evlist.h" #include "evsel.h" #include "arch-tests.h" +#include "util.h" #include <signal.h> #include <sys/mman.h> diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c new file mode 100644 index 000000000000..901bf1f449c4 --- /dev/null +++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <string.h> + +#include "intel-pt-decoder/intel-pt-pkt-decoder.h" + +#include "debug.h" +#include "tests/tests.h" +#include "arch-tests.h" + +/** + * struct test_data - Test data. + * @len: number of bytes to decode + * @bytes: bytes to decode + * @ctx: packet context to decode + * @packet: expected packet + * @new_ctx: expected new packet context + * @ctx_unchanged: the packet context must not change + */ +struct test_data { + int len; + u8 bytes[INTEL_PT_PKT_MAX_SZ]; + enum intel_pt_pkt_ctx ctx; + struct intel_pt_pkt packet; + enum intel_pt_pkt_ctx new_ctx; + int ctx_unchanged; +} data[] = { + /* Padding Packet */ + {1, {0}, 0, {INTEL_PT_PAD, 0, 0}, 0, 1 }, + /* Short Taken/Not Taken Packet */ + {1, {4}, 0, {INTEL_PT_TNT, 1, 0}, 0, 0 }, + {1, {6}, 0, {INTEL_PT_TNT, 1, 0x20ULL << 58}, 0, 0 }, + {1, {0x80}, 0, {INTEL_PT_TNT, 6, 0}, 0, 0 }, + {1, {0xfe}, 0, {INTEL_PT_TNT, 6, 0x3fULL << 58}, 0, 0 }, + /* Long Taken/Not Taken Packet */ + {8, {0x02, 0xa3, 2}, 0, {INTEL_PT_TNT, 1, 0xa302ULL << 47}, 0, 0 }, + {8, {0x02, 0xa3, 3}, 0, {INTEL_PT_TNT, 1, 0x1a302ULL << 47}, 0, 0 }, + {8, {0x02, 0xa3, 0, 0, 0, 0, 0, 0x80}, 0, {INTEL_PT_TNT, 47, 0xa302ULL << 1}, 0, 0 }, + {8, {0x02, 0xa3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_TNT, 47, 0xffffffffffffa302ULL << 1}, 0, 0 }, + /* Target IP Packet */ + {1, {0x0d}, 0, {INTEL_PT_TIP, 0, 0}, 0, 0 }, + {3, {0x2d, 1, 2}, 0, {INTEL_PT_TIP, 1, 0x201}, 0, 0 }, + {5, {0x4d, 1, 2, 3, 4}, 0, {INTEL_PT_TIP, 2, 0x4030201}, 0, 0 }, + {7, {0x6d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 3, 0x60504030201}, 0, 0 }, + {7, {0x8d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 4, 0x60504030201}, 0, 0 }, + {9, {0xcd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP, 6, 0x807060504030201}, 0, 0 }, + /* Packet Generation Enable */ + {1, {0x11}, 0, {INTEL_PT_TIP_PGE, 0, 0}, 0, 0 }, + {3, {0x31, 1, 2}, 0, {INTEL_PT_TIP_PGE, 1, 0x201}, 0, 0 }, + {5, {0x51, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGE, 2, 0x4030201}, 0, 0 }, + {7, {0x71, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 3, 0x60504030201}, 0, 0 }, + {7, {0x91, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 4, 0x60504030201}, 0, 0 }, + {9, {0xd1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGE, 6, 0x807060504030201}, 0, 0 }, + /* Packet Generation Disable */ + {1, {0x01}, 0, {INTEL_PT_TIP_PGD, 0, 0}, 0, 0 }, + {3, {0x21, 1, 2}, 0, {INTEL_PT_TIP_PGD, 1, 0x201}, 0, 0 }, + {5, {0x41, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGD, 2, 0x4030201}, 0, 0 }, + {7, {0x61, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 3, 0x60504030201}, 0, 0 }, + {7, {0x81, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 4, 0x60504030201}, 0, 0 }, + {9, {0xc1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGD, 6, 0x807060504030201}, 0, 0 }, + /* Flow Update Packet */ + {1, {0x1d}, 0, {INTEL_PT_FUP, 0, 0}, 0, 0 }, + {3, {0x3d, 1, 2}, 0, {INTEL_PT_FUP, 1, 0x201}, 0, 0 }, + {5, {0x5d, 1, 2, 3, 4}, 0, {INTEL_PT_FUP, 2, 0x4030201}, 0, 0 }, + {7, {0x7d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 3, 0x60504030201}, 0, 0 }, + {7, {0x9d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 4, 0x60504030201}, 0, 0 }, + {9, {0xdd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_FUP, 6, 0x807060504030201}, 0, 0 }, + /* Paging Information Packet */ + {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201}, 0, 0 }, + {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201 | (1ULL << 63)}, 0, 0 }, + /* Mode Exec Packet */ + {2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 }, + {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 0, 64}, 0, 0 }, + {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 0, 32}, 0, 0 }, + /* Mode TSX Packet */ + {2, {0x99, 0x20}, 0, {INTEL_PT_MODE_TSX, 0, 0}, 0, 0 }, + {2, {0x99, 0x21}, 0, {INTEL_PT_MODE_TSX, 0, 1}, 0, 0 }, + {2, {0x99, 0x22}, 0, {INTEL_PT_MODE_TSX, 0, 2}, 0, 0 }, + /* Trace Stop Packet */ + {2, {0x02, 0x83}, 0, {INTEL_PT_TRACESTOP, 0, 0}, 0, 0 }, + /* Core:Bus Ratio Packet */ + {4, {0x02, 0x03, 0x12, 0}, 0, {INTEL_PT_CBR, 0, 0x12}, 0, 1 }, + /* Timestamp Counter Packet */ + {8, {0x19, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_TSC, 0, 0x7060504030201}, 0, 1 }, + /* Mini Time Counter Packet */ + {2, {0x59, 0x12}, 0, {INTEL_PT_MTC, 0, 0x12}, 0, 1 }, + /* TSC / MTC Alignment Packet */ + {7, {0x02, 0x73}, 0, {INTEL_PT_TMA, 0, 0}, 0, 1 }, + {7, {0x02, 0x73, 1, 2}, 0, {INTEL_PT_TMA, 0, 0x201}, 0, 1 }, + {7, {0x02, 0x73, 0, 0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0}, 0, 1 }, + {7, {0x02, 0x73, 0x80, 0xc0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0xc080}, 0, 1 }, + /* Cycle Count Packet */ + {1, {0x03}, 0, {INTEL_PT_CYC, 0, 0}, 0, 1 }, + {1, {0x0b}, 0, {INTEL_PT_CYC, 0, 1}, 0, 1 }, + {1, {0xfb}, 0, {INTEL_PT_CYC, 0, 0x1f}, 0, 1 }, + {2, {0x07, 2}, 0, {INTEL_PT_CYC, 0, 0x20}, 0, 1 }, + {2, {0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xfff}, 0, 1 }, + {3, {0x07, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x1000}, 0, 1 }, + {3, {0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7ffff}, 0, 1 }, + {4, {0x07, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x80000}, 0, 1 }, + {4, {0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3ffffff}, 0, 1 }, + {5, {0x07, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x4000000}, 0, 1 }, + {5, {0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1ffffffff}, 0, 1 }, + {6, {0x07, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x200000000}, 0, 1 }, + {6, {0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xffffffffff}, 0, 1 }, + {7, {0x07, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x10000000000}, 0, 1 }, + {7, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7fffffffffff}, 0, 1 }, + {8, {0x07, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x800000000000}, 0, 1 }, + {8, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3fffffffffffff}, 0, 1 }, + {9, {0x07, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x40000000000000}, 0, 1 }, + {9, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1fffffffffffffff}, 0, 1 }, + {10, {0x07, 1, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x2000000000000000}, 0, 1 }, + {10, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe}, 0, {INTEL_PT_CYC, 0, 0xffffffffffffffff}, 0, 1 }, + /* Virtual-Machine Control Structure Packet */ + {7, {0x02, 0xc8, 1, 2, 3, 4, 5}, 0, {INTEL_PT_VMCS, 5, 0x504030201}, 0, 0 }, + /* Overflow Packet */ + {2, {0x02, 0xf3}, 0, {INTEL_PT_OVF, 0, 0}, 0, 0 }, + {2, {0x02, 0xf3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 }, + {2, {0x02, 0xf3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 }, + /* Packet Stream Boundary*/ + {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, 0, {INTEL_PT_PSB, 0, 0}, 0, 0 }, + {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_4_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 }, + {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_8_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 }, + /* PSB End Packet */ + {2, {0x02, 0x23}, 0, {INTEL_PT_PSBEND, 0, 0}, 0, 0 }, + /* Maintenance Packet */ + {11, {0x02, 0xc3, 0x88, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_MNT, 0, 0x7060504030201}, 0, 1 }, + /* Write Data to PT Packet */ + {6, {0x02, 0x12, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE, 0, 0x4030201}, 0, 0 }, + {10, {0x02, 0x32, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE, 1, 0x807060504030201}, 0, 0 }, + {6, {0x02, 0x92, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE_IP, 0, 0x4030201}, 0, 0 }, + {10, {0x02, 0xb2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE_IP, 1, 0x807060504030201}, 0, 0 }, + /* Execution Stop Packet */ + {2, {0x02, 0x62}, 0, {INTEL_PT_EXSTOP, 0, 0}, 0, 1 }, + {2, {0x02, 0xe2}, 0, {INTEL_PT_EXSTOP_IP, 0, 0}, 0, 1 }, + /* Monitor Wait Packet */ + {10, {0x02, 0xc2}, 0, {INTEL_PT_MWAIT, 0, 0}, 0, 0 }, + {10, {0x02, 0xc2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x807060504030201}, 0, 0 }, + {10, {0x02, 0xc2, 0xff, 2, 3, 4, 7, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x8070607040302ff}, 0, 0 }, + /* Power Entry Packet */ + {4, {0x02, 0x22}, 0, {INTEL_PT_PWRE, 0, 0}, 0, 1 }, + {4, {0x02, 0x22, 1, 2}, 0, {INTEL_PT_PWRE, 0, 0x0201}, 0, 1 }, + {4, {0x02, 0x22, 0x80, 0x34}, 0, {INTEL_PT_PWRE, 0, 0x3480}, 0, 1 }, + {4, {0x02, 0x22, 0x00, 0x56}, 0, {INTEL_PT_PWRE, 0, 0x5600}, 0, 1 }, + /* Power Exit Packet */ + {7, {0x02, 0xa2}, 0, {INTEL_PT_PWRX, 0, 0}, 0, 1 }, + {7, {0x02, 0xa2, 1, 2, 3, 4, 5}, 0, {INTEL_PT_PWRX, 0, 0x504030201}, 0, 1 }, + {7, {0x02, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_PWRX, 0, 0xffffffffff}, 0, 1 }, + /* Block Begin Packet */ + {3, {0x02, 0x63, 0x00}, 0, {INTEL_PT_BBP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 }, + {3, {0x02, 0x63, 0x80}, 0, {INTEL_PT_BBP, 1, 0}, INTEL_PT_BLK_4_CTX, 0 }, + {3, {0x02, 0x63, 0x1f}, 0, {INTEL_PT_BBP, 0, 0x1f}, INTEL_PT_BLK_8_CTX, 0 }, + {3, {0x02, 0x63, 0x9f}, 0, {INTEL_PT_BBP, 1, 0x1f}, INTEL_PT_BLK_4_CTX, 0 }, + /* 4-byte Block Item Packet */ + {5, {0x04}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_4_CTX, 0 }, + {5, {0xfc}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_4_CTX, 0 }, + {5, {0x04, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 }, + {5, {0xfc, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 }, + /* 8-byte Block Item Packet */ + {9, {0x04}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 }, + {9, {0xfc}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_8_CTX, 0 }, + {9, {0x04, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 }, + {9, {0xfc, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 }, + /* Block End Packet */ + {2, {0x02, 0x33}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 }, + {2, {0x02, 0xb3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, + {2, {0x02, 0x33}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 }, + {2, {0x02, 0xb3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 }, + /* Terminator */ + {0, {0}, 0, {0, 0, 0}, 0, 0 }, +}; + +static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len) +{ + char desc[INTEL_PT_PKT_DESC_MAX]; + int ret, i; + + for (i = 0; i < len; i++) + pr_debug(" %02x", bytes[i]); + for (; i < INTEL_PT_PKT_MAX_SZ; i++) + pr_debug(" "); + pr_debug(" "); + ret = intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX); + if (ret < 0) { + pr_debug("intel_pt_pkt_desc failed!\n"); + return TEST_FAIL; + } + pr_debug("%s\n", desc); + + return TEST_OK; +} + +static void decoding_failed(struct test_data *d) +{ + pr_debug("Decoding failed!\n"); + pr_debug("Decoding: "); + dump_packet(&d->packet, d->bytes, d->len); +} + +static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len, + enum intel_pt_pkt_ctx new_ctx) +{ + decoding_failed(d); + + if (len != d->len) + pr_debug("Expected length: %d Decoded length %d\n", + d->len, len); + + if (packet->type != d->packet.type) + pr_debug("Expected type: %d Decoded type %d\n", + d->packet.type, packet->type); + + if (packet->count != d->packet.count) + pr_debug("Expected count: %d Decoded count %d\n", + d->packet.count, packet->count); + + if (packet->payload != d->packet.payload) + pr_debug("Expected payload: 0x%llx Decoded payload 0x%llx\n", + (unsigned long long)d->packet.payload, + (unsigned long long)packet->payload); + + if (new_ctx != d->new_ctx) + pr_debug("Expected packet context: %d Decoded packet context %d\n", + d->new_ctx, new_ctx); + + return TEST_FAIL; +} + +static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet, + enum intel_pt_pkt_ctx ctx) +{ + enum intel_pt_pkt_ctx old_ctx = ctx; + + intel_pt_upd_pkt_ctx(packet, &ctx); + + if (ctx != old_ctx) { + decoding_failed(d); + pr_debug("Packet context changed!\n"); + return TEST_FAIL; + } + + return TEST_OK; +} + +static int test_one(struct test_data *d) +{ + struct intel_pt_pkt packet; + enum intel_pt_pkt_ctx ctx = d->ctx; + int ret; + + memset(&packet, 0xff, sizeof(packet)); + + /* Decode a packet */ + ret = intel_pt_get_packet(d->bytes, d->len, &packet, &ctx); + if (ret < 0 || ret > INTEL_PT_PKT_MAX_SZ) { + decoding_failed(d); + pr_debug("intel_pt_get_packet returned %d\n", ret); + return TEST_FAIL; + } + + /* Some packets must always leave the packet context unchanged */ + if (d->ctx_unchanged) { + int err; + + err = test_ctx_unchanged(d, &packet, INTEL_PT_NO_CTX); + if (err) + return err; + err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_4_CTX); + if (err) + return err; + err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_8_CTX); + if (err) + return err; + } + + /* Compare to the expected values */ + if (ret != d->len || packet.type != d->packet.type || + packet.count != d->packet.count || + packet.payload != d->packet.payload || ctx != d->new_ctx) + return fail(d, &packet, ret, ctx); + + pr_debug("Decoded ok:"); + ret = dump_packet(&d->packet, d->bytes, d->len); + + return ret; +} + +/* + * This test feeds byte sequences to the Intel PT packet decoder and checks the + * results. Changes to the packet context are also checked. + */ +int test__intel_pt_pkt_decoder(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + struct test_data *d = data; + int ret; + + for (d = data; d->len; d++) { + ret = test_one(d); + if (ret) + return ret; + } + + return TEST_OK; +} diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index b135af62011c..d711268af330 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * auxtrace.c: AUX area tracing support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <errno.h> diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index 1f86ee8fb831..530934805710 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. * Extracted from probe-finder.c * * Written by Masami Hiramatsu <mhiramat@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #include <stddef.h> diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 675a0213044d..a3a0b6884779 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/string.h> +#include <linux/zalloc.h> #include "../../util/machine.h" #include "../../util/tool.h" #include "../../util/map.h" -#include "../../util/util.h" #include "../../util/debug.h" #if defined(__x86_64__) diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 781df40b2966..ec5c1bb84095 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel-bts.c: Intel Processor Trace support * Copyright (c) 2013-2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <errno.h> @@ -18,12 +9,12 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <linux/zalloc.h> #include "../../util/cpumap.h" #include "../../util/evsel.h" #include "../../util/evlist.h" #include "../../util/session.h" -#include "../../util/util.h" #include "../../util/pmu.h" #include "../../util/debug.h" #include "../../util/tsc.h" diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index ba8ecaf52200..609088c01e3a 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel_pt.c: Intel Processor Trace support * Copyright (c) 2013-2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <errno.h> @@ -19,6 +10,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <linux/zalloc.h> #include <cpuid.h> #include "../../perf.h" diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c index 4520ac53caa9..1e9ec783b9a1 100644 --- a/tools/perf/arch/x86/util/machine.c +++ b/tools/perf/arch/x86/util/machine.c @@ -3,10 +3,11 @@ #include <linux/string.h> #include <stdlib.h> +#include "../../util/util.h" #include "../../util/machine.h" #include "../../util/map.h" #include "../../util/symbol.h" -#include "../../util/sane_ctype.h" +#include <linux/ctype.h> #include <symbol/kallsyms.h> diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index fead6b3b4206..0d7b77ff0ae6 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -2,9 +2,9 @@ #include <errno.h> #include <string.h> #include <regex.h> +#include <linux/zalloc.h> #include "../../perf.h" -#include "../../util/util.h" #include "../../util/perf_regs.h" #include "../../util/debug.h" @@ -31,6 +31,22 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(R14, PERF_REG_X86_R14), SMPL_REG(R15, PERF_REG_X86_R15), #endif + SMPL_REG2(XMM0, PERF_REG_X86_XMM0), + SMPL_REG2(XMM1, PERF_REG_X86_XMM1), + SMPL_REG2(XMM2, PERF_REG_X86_XMM2), + SMPL_REG2(XMM3, PERF_REG_X86_XMM3), + SMPL_REG2(XMM4, PERF_REG_X86_XMM4), + SMPL_REG2(XMM5, PERF_REG_X86_XMM5), + SMPL_REG2(XMM6, PERF_REG_X86_XMM6), + SMPL_REG2(XMM7, PERF_REG_X86_XMM7), + SMPL_REG2(XMM8, PERF_REG_X86_XMM8), + SMPL_REG2(XMM9, PERF_REG_X86_XMM9), + SMPL_REG2(XMM10, PERF_REG_X86_XMM10), + SMPL_REG2(XMM11, PERF_REG_X86_XMM11), + SMPL_REG2(XMM12, PERF_REG_X86_XMM12), + SMPL_REG2(XMM13, PERF_REG_X86_XMM13), + SMPL_REG2(XMM14, PERF_REG_X86_XMM14), + SMPL_REG2(XMM15, PERF_REG_X86_XMM15), SMPL_REG_END }; @@ -254,3 +270,31 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) return SDT_ARG_VALID; } + +uint64_t arch__intr_reg_mask(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_type = PERF_SAMPLE_REGS_INTR, + .sample_regs_intr = PERF_REG_EXTENDED_MASK, + .precise_ip = 1, + .disabled = 1, + .exclude_kernel = 1, + }; + int fd; + /* + * In an unnamed union, init it here to build on older gcc versions + */ + attr.sample_period = 1; + + event_attr_init(&attr); + + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK); + } + + return PERF_REGS_MASK; +} diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile index 7fbca175099e..88c08eed9c7b 100644 --- a/tools/perf/arch/xtensa/Makefile +++ b/tools/perf/arch/xtensa/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c index 4dba76bfb4ce..12f5457300f5 100644 --- a/tools/perf/arch/xtensa/util/dwarf-regs.c +++ b/tools/perf/arch/xtensa/util/dwarf-regs.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Mapping of DWARF debug register numbers into register names. * * Copyright (c) 2015 Cadence Design Systems Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <stddef.h> diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 9aa3a674829b..a80797763e1f 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -18,6 +18,7 @@ #include <stdlib.h> #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/zalloc.h> #include <sys/time.h> #include "../util/stat.h" @@ -214,7 +215,7 @@ int bench_futex_hash(int argc, const char **argv) &worker[i].futex[nfutexes-1], t); } - free(worker[i].futex); + zfree(&worker[i].futex); } print_summary(); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 8e9c4753e304..d02330a69745 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -12,6 +12,7 @@ #include <subcmd/parse-options.h> #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/zalloc.h> #include <errno.h> #include "bench.h" #include "futex.h" @@ -217,7 +218,7 @@ int bench_futex_lock_pi(int argc, const char **argv) worker[i].tid, worker[i].futex, t); if (multi) - free(worker[i].futex); + zfree(&worker[i].futex); } print_summary(); diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 0251dd348124..64dc994c72ea 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -9,7 +9,6 @@ #include "debug.h" #include "../perf.h" -#include "../util/util.h" #include <subcmd/parse-options.h> #include "../util/header.h" #include "../util/cloexec.h" @@ -24,6 +23,7 @@ #include <sys/time.h> #include <errno.h> #include <linux/time64.h> +#include <linux/zalloc.h> #define K 1024 diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index a7784554a80d..a640ca7aaada 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -11,7 +11,6 @@ #include "../perf.h" #include "../builtin.h" -#include "../util/util.h" #include <subcmd/parse-options.h> #include "../util/cloexec.h" @@ -35,6 +34,7 @@ #include <linux/kernel.h> #include <linux/time64.h> #include <linux/numa.h> +#include <linux/zalloc.h> #include <numa.h> #include <numaif.h> diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 67f9d9ffacfb..e0aa14faf2b5 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -8,11 +8,11 @@ */ #include "builtin.h" -#include "util/util.h" #include "util/color.h" #include <linux/list.h> #include "util/cache.h" #include <linux/rbtree.h> +#include <linux/zalloc.h> #include "util/symbol.h" #include "perf.h" @@ -159,8 +159,6 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, struct perf_evsel *evsel = iter->evsel; int err; - hist__account_cycles(sample->branch_stack, al, sample, false); - bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); @@ -199,6 +197,8 @@ static int process_branch_callback(struct perf_evsel *evsel, if (a.map != NULL) a.map->dso->hit = 1; + hist__account_cycles(sample->branch_stack, al, sample, false); + ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann); return ret; } diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 334c77ffc1d9..b8e7c38ef221 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -17,7 +17,6 @@ * epoll ... Event poll performance */ #include "perf.h" -#include "util/util.h" #include <subcmd/parse-options.h> #include "builtin.h" #include "bench/bench.h" @@ -26,6 +25,7 @@ #include <stdlib.h> #include <string.h> #include <sys/prctl.h> +#include <linux/zalloc.h> typedef int (*bench_fn_t)(int argc, const char **argv); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 9e6cc868bdb4..e3776f5c2e01 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -15,9 +15,9 @@ #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/stringify.h> +#include <linux/zalloc.h> #include <asm/bug.h> #include <sys/param.h> -#include "util.h" #include "debug.h" #include "builtin.h" #include <subcmd/parse-options.h> diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index d76f831f94c7..6c1284c87aaa 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -15,6 +15,7 @@ #include "util/debug.h" #include "util/config.h" #include <linux/string.h> +#include <stdlib.h> static bool use_system_config, use_user_config; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 6e7920793729..f6f5dd15bea7 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -16,10 +16,12 @@ #include "util/tool.h" #include "util/sort.h" #include "util/symbol.h" -#include "util/util.h" #include "util/data.h" #include "util/config.h" #include "util/time-utils.h" +#include "util/annotate.h" +#include "util/map.h" +#include <linux/zalloc.h> #include <errno.h> #include <inttypes.h> @@ -32,6 +34,7 @@ struct perf_diff { struct perf_time_interval *ptime_range; int range_size; int range_num; + bool has_br_stack; }; /* Diff command specific HPP columns. */ @@ -44,6 +47,7 @@ enum { PERF_HPP_DIFF__WEIGHTED_DIFF, PERF_HPP_DIFF__FORMULA, PERF_HPP_DIFF__DELTA_ABS, + PERF_HPP_DIFF__CYCLES, PERF_HPP_DIFF__MAX_INDEX }; @@ -86,11 +90,14 @@ static s64 compute_wdiff_w2; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +static struct addr_location dummy_al; + enum { COMPUTE_DELTA, COMPUTE_RATIO, COMPUTE_WEIGHTED_DIFF, COMPUTE_DELTA_ABS, + COMPUTE_CYCLES, COMPUTE_MAX, }; @@ -99,6 +106,7 @@ const char *compute_names[COMPUTE_MAX] = { [COMPUTE_DELTA_ABS] = "delta-abs", [COMPUTE_RATIO] = "ratio", [COMPUTE_WEIGHTED_DIFF] = "wdiff", + [COMPUTE_CYCLES] = "cycles", }; static int compute = COMPUTE_DELTA_ABS; @@ -108,6 +116,7 @@ static int compute_2_hpp[COMPUTE_MAX] = { [COMPUTE_DELTA_ABS] = PERF_HPP_DIFF__DELTA_ABS, [COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO, [COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF, + [COMPUTE_CYCLES] = PERF_HPP_DIFF__CYCLES, }; #define MAX_COL_WIDTH 70 @@ -146,6 +155,10 @@ static struct header_column { [PERF_HPP_DIFF__FORMULA] = { .name = "Formula", .width = MAX_COL_WIDTH, + }, + [PERF_HPP_DIFF__CYCLES] = { + .name = "[Program Block Range] Cycles Diff", + .width = 70, } }; @@ -335,6 +348,31 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair, return -1; } +static void *block_hist_zalloc(size_t size) +{ + struct block_hist *bh; + + bh = zalloc(size + sizeof(*bh)); + if (!bh) + return NULL; + + return &bh->he; +} + +static void block_hist_free(void *he) +{ + struct block_hist *bh; + + bh = container_of(he, struct block_hist, he); + hists__delete_entries(&bh->block_hists); + free(bh); +} + +struct hist_entry_ops block_hist_ops = { + .new = block_hist_zalloc, + .free = block_hist_free, +}; + static int diff__process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -362,9 +400,22 @@ static int diff__process_sample_event(struct perf_tool *tool, goto out_put; } - if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) { - pr_warning("problem incrementing symbol period, skipping event\n"); - goto out_put; + if (compute != COMPUTE_CYCLES) { + if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, + true)) { + pr_warning("problem incrementing symbol period, " + "skipping event\n"); + goto out_put; + } + } else { + if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL, + NULL, NULL, sample, true)) { + pr_warning("problem incrementing symbol period, " + "skipping event\n"); + goto out_put; + } + + hist__account_cycles(sample->branch_stack, &al, sample, false); } /* @@ -474,6 +525,203 @@ static void hists__baseline_only(struct hists *hists) } } +static int64_t block_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct block_info *bi_l = left->block_info; + struct block_info *bi_r = right->block_info; + int cmp; + + if (!bi_l->sym || !bi_r->sym) { + if (!bi_l->sym && !bi_r->sym) + return 0; + else if (!bi_l->sym) + return -1; + else + return 1; + } + + if (bi_l->sym == bi_r->sym) { + if (bi_l->start == bi_r->start) { + if (bi_l->end == bi_r->end) + return 0; + else + return (int64_t)(bi_r->end - bi_l->end); + } else + return (int64_t)(bi_r->start - bi_l->start); + } else { + cmp = strcmp(bi_l->sym->name, bi_r->sym->name); + return cmp; + } + + if (bi_l->sym->start != bi_r->sym->start) + return (int64_t)(bi_r->sym->start - bi_l->sym->start); + + return (int64_t)(bi_r->sym->end - bi_l->sym->end); +} + +static int64_t block_cycles_diff_cmp(struct hist_entry *left, + struct hist_entry *right) +{ + bool pairs_left = hist_entry__has_pairs(left); + bool pairs_right = hist_entry__has_pairs(right); + s64 l, r; + + if (!pairs_left && !pairs_right) + return 0; + + l = labs(left->diff.cycles); + r = labs(right->diff.cycles); + return r - l; +} + +static int64_t block_sort(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return block_cycles_diff_cmp(right, left); +} + +static void init_block_hist(struct block_hist *bh) +{ + __hists__init(&bh->block_hists, &bh->block_list); + perf_hpp_list__init(&bh->block_list); + + INIT_LIST_HEAD(&bh->block_fmt.list); + INIT_LIST_HEAD(&bh->block_fmt.sort_list); + bh->block_fmt.cmp = block_cmp; + bh->block_fmt.sort = block_sort; + perf_hpp_list__register_sort_field(&bh->block_list, + &bh->block_fmt); + bh->valid = true; +} + +static void init_block_info(struct block_info *bi, struct symbol *sym, + struct cyc_hist *ch, int offset) +{ + bi->sym = sym; + bi->start = ch->start; + bi->end = offset; + bi->cycles = ch->cycles; + bi->cycles_aggr = ch->cycles_aggr; + bi->num = ch->num; + bi->num_aggr = ch->num_aggr; +} + +static int process_block_per_sym(struct hist_entry *he) +{ + struct annotation *notes; + struct cyc_hist *ch; + struct block_hist *bh; + + if (!he->ms.map || !he->ms.sym) + return 0; + + notes = symbol__annotation(he->ms.sym); + if (!notes || !notes->src || !notes->src->cycles_hist) + return 0; + + bh = container_of(he, struct block_hist, he); + init_block_hist(bh); + + ch = notes->src->cycles_hist; + for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { + if (ch[i].num_aggr) { + struct block_info *bi; + struct hist_entry *he_block; + + bi = block_info__new(); + if (!bi) + return -1; + + init_block_info(bi, he->ms.sym, &ch[i], i); + he_block = hists__add_entry_block(&bh->block_hists, + &dummy_al, bi); + if (!he_block) { + block_info__put(bi); + return -1; + } + } + } + + return 0; +} + +static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b) +{ + struct block_info *bi_a = a->block_info; + struct block_info *bi_b = b->block_info; + int cmp; + + if (!bi_a->sym || !bi_b->sym) + return -1; + + cmp = strcmp(bi_a->sym->name, bi_b->sym->name); + + if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end)) + return 0; + + return -1; +} + +static struct hist_entry *get_block_pair(struct hist_entry *he, + struct hists *hists_pair) +{ + struct rb_root_cached *root = hists_pair->entries_in; + struct rb_node *next = rb_first_cached(root); + int cmp; + + while (next != NULL) { + struct hist_entry *he_pair = rb_entry(next, struct hist_entry, + rb_node_in); + + next = rb_next(&he_pair->rb_node_in); + + cmp = block_pair_cmp(he_pair, he); + if (!cmp) + return he_pair; + } + + return NULL; +} + +static void compute_cycles_diff(struct hist_entry *he, + struct hist_entry *pair) +{ + pair->diff.computed = true; + if (pair->block_info->num && he->block_info->num) { + pair->diff.cycles = + pair->block_info->cycles_aggr / pair->block_info->num_aggr - + he->block_info->cycles_aggr / he->block_info->num_aggr; + } +} + +static void block_hists_match(struct hists *hists_base, + struct hists *hists_pair) +{ + struct rb_root_cached *root = hists_base->entries_in; + struct rb_node *next = rb_first_cached(root); + + while (next != NULL) { + struct hist_entry *he = rb_entry(next, struct hist_entry, + rb_node_in); + struct hist_entry *pair = get_block_pair(he, hists_pair); + + next = rb_next(&he->rb_node_in); + + if (pair) { + hist_entry__add_pair(pair, he); + compute_cycles_diff(he, pair); + } + } +} + +static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) +{ + /* Skip the calculation of column length in output_resort */ + he->filtered = true; + return 0; +} + static void hists__precompute(struct hists *hists) { struct rb_root_cached *root; @@ -486,6 +734,7 @@ static void hists__precompute(struct hists *hists) next = rb_first_cached(root); while (next != NULL) { + struct block_hist *bh, *pair_bh; struct hist_entry *he, *pair; struct data__file *d; int i; @@ -493,6 +742,9 @@ static void hists__precompute(struct hists *hists) he = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&he->rb_node_in); + if (compute == COMPUTE_CYCLES) + process_block_per_sym(he); + data__for_each_file_new(i, d) { pair = get_pair_data(he, d); if (!pair) @@ -509,6 +761,19 @@ static void hists__precompute(struct hists *hists) case COMPUTE_WEIGHTED_DIFF: compute_wdiff(he, pair); break; + case COMPUTE_CYCLES: + process_block_per_sym(pair); + bh = container_of(he, struct block_hist, he); + pair_bh = container_of(pair, struct block_hist, + he); + + if (bh->valid && pair_bh->valid) { + block_hists_match(&bh->block_hists, + &pair_bh->block_hists); + hists__output_resort_cb(&pair_bh->block_hists, + NULL, filter_cb); + } + break; default: BUG_ON(1); } @@ -720,6 +985,9 @@ static void hists__process(struct hists *hists) hists__precompute(hists); hists__output_resort(hists, NULL); + if (compute == COMPUTE_CYCLES) + symbol_conf.report_block = true; + hists__fprintf(hists, !quiet, 0, 0, 0, stdout, !symbol_conf.use_callchain); } @@ -873,6 +1141,31 @@ static int parse_time_str(struct data__file *d, char *abstime_ostr, return ret; } +static int check_file_brstack(void) +{ + struct data__file *d; + bool has_br_stack; + int i; + + data__for_each_file(i, d) { + d->session = perf_session__new(&d->data, false, &pdiff.tool); + if (!d->session) { + pr_err("Failed to open %s\n", d->data.path); + return -1; + } + + has_br_stack = perf_header__has_feat(&d->session->header, + HEADER_BRANCH_STACK); + perf_session__delete(d->session); + if (!has_br_stack) + return 0; + } + + /* Set only all files having branch stacks */ + pdiff.has_br_stack = true; + return 0; +} + static int __cmd_diff(void) { struct data__file *d; @@ -950,7 +1243,7 @@ static const struct option options[] = { OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, "Show only items with match in baseline"), OPT_CALLBACK('c', "compute", &compute, - "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)", + "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs),cycles", "Entries differential computation selection", setup_compute), OPT_BOOLEAN('p', "period", &show_period, @@ -1028,6 +1321,49 @@ static int hpp__entry_baseline(struct hist_entry *he, char *buf, size_t size) return ret; } +static int cycles_printf(struct hist_entry *he, struct hist_entry *pair, + struct perf_hpp *hpp, int width) +{ + struct block_hist *bh = container_of(he, struct block_hist, he); + struct block_hist *bh_pair = container_of(pair, struct block_hist, he); + struct hist_entry *block_he; + struct block_info *bi; + char buf[128]; + char *start_line, *end_line; + + block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx); + if (!block_he) { + hpp->skip = true; + return 0; + } + + /* + * Avoid printing the warning "addr2line_init failed for ..." + */ + symbol_conf.disable_add2line_warn = true; + + bi = block_he->block_info; + + start_line = map__srcline(he->ms.map, bi->sym->start + bi->start, + he->ms.sym); + + end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, + he->ms.sym); + + if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld", + start_line, end_line, block_he->diff.cycles); + } else { + scnprintf(buf, sizeof(buf), "[%7lx -> %7lx] %4ld", + bi->start, bi->end, block_he->diff.cycles); + } + + free_srcline(start_line); + free_srcline(end_line); + + return scnprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + static int __hpp__color_compare(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he, int comparison_method) @@ -1039,8 +1375,17 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, s64 wdiff; char pfmt[20] = " "; - if (!pair) + if (!pair) { + if (comparison_method == COMPUTE_CYCLES) { + struct block_hist *bh; + + bh = container_of(he, struct block_hist, he); + if (bh->block_idx) + hpp->skip = true; + } + goto no_print; + } switch (comparison_method) { case COMPUTE_DELTA: @@ -1075,6 +1420,8 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, return color_snprintf(hpp->buf, hpp->size, get_percent_color(wdiff), pfmt, wdiff); + case COMPUTE_CYCLES: + return cycles_printf(he, pair, hpp, dfmt->header_width); default: BUG_ON(1); } @@ -1104,6 +1451,12 @@ static int hpp__color_wdiff(struct perf_hpp_fmt *fmt, return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF); } +static int hpp__color_cycles(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp, struct hist_entry *he) +{ + return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES); +} + static void hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size) { @@ -1305,6 +1658,10 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->color = hpp__color_delta; fmt->sort = hist_entry__cmp_delta_abs; break; + case PERF_HPP_DIFF__CYCLES: + fmt->color = hpp__color_cycles; + fmt->sort = hist_entry__cmp_nop; + break; default: fmt->sort = hist_entry__cmp_nop; break; @@ -1385,6 +1742,13 @@ static int ui_init(void) case COMPUTE_DELTA_ABS: fmt->sort = hist_entry__cmp_delta_abs_idx; break; + case COMPUTE_CYCLES: + /* + * Should set since 'fmt->sort' is called without + * checking valid during sorting + */ + fmt->sort = hist_entry__cmp_nop; + break; default: BUG_ON(1); } @@ -1481,12 +1845,20 @@ int cmd_diff(int argc, const char **argv) if (quiet) perf_quiet_option(); + symbol__annotation_init(); + if (symbol__init(NULL) < 0) return -1; if (data_init(argc, argv) < 0) return -1; + if (check_file_brstack() < 0) + return -1; + + if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack) + return -1; + if (ui_init() < 0) return -1; diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index f42f228e8899..66d5a6658daf 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * builtin-ftrace.c * * Copyright (c) 2013 LG Electronics, Namhyung Kim <namhyung@kernel.org> - * - * Released under the GPL v2. */ #include "builtin.h" @@ -432,7 +431,7 @@ static void delete_filter_func(struct list_head *head) struct filter_entry *pos, *tmp; list_for_each_entry_safe(pos, tmp, head, list) { - list_del(&pos->list); + list_del_init(&pos->list); free(pos); } } diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 3d29d0524a89..a83af92fb0d1 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -14,8 +14,10 @@ #include <subcmd/help.h> #include "util/debug.h" #include <linux/kernel.h> +#include <linux/zalloc.h> #include <errno.h> #include <stdio.h> +#include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 24086b7f1b14..f4591a1438b4 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -224,7 +224,7 @@ static int perf_event__repipe_sample(struct perf_tool *tool, struct perf_evsel *evsel, struct machine *machine) { - if (evsel->handler) { + if (evsel && evsel->handler) { inject_handler f = evsel->handler; return f(tool, event, sample, evsel, machine); } @@ -837,6 +837,9 @@ int cmd_inject(int argc, const char **argv) if (inject.session == NULL) return -1; + if (zstd_init(&(inject.session->zstd_data), 0) < 0) + pr_warning("Decompression initialization failed.\n"); + if (inject.build_ids) { /* * to make sure the mmap records are ordered correctly @@ -867,6 +870,7 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); out_delete: + zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); return ret; } diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index bc7a2bc7aed7..c1a44671b0b5 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * builtin-kallsyms.c * * Builtin command: Look for a symbol in the running kernel and its modules * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include <inttypes.h> #include "builtin.h" diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index fa520f4b8095..9e5e60898083 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -4,7 +4,6 @@ #include "util/evlist.h" #include "util/evsel.h" -#include "util/util.h" #include "util/config.h" #include "util/map.h" #include "util/symbol.h" @@ -21,16 +20,18 @@ #include "util/cpumap.h" #include "util/debug.h" +#include "util/string2.h" #include <linux/kernel.h> #include <linux/rbtree.h> #include <linux/string.h> +#include <linux/zalloc.h> #include <errno.h> #include <inttypes.h> #include <locale.h> #include <regex.h> -#include "sane_ctype.h" +#include <linux/ctype.h> static int kmem_slab; static int kmem_page; @@ -1975,7 +1976,7 @@ int cmd_kmem(int argc, const char **argv) goto out_delete; } - kmem_page_size = tep_get_page_size(evsel->tp_format->pevent); + kmem_page_size = tep_get_page_size(evsel->tp_format->tep); symbol_conf.use_callchain = true; } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index dbb6f737a3e2..b33c83489120 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -5,7 +5,6 @@ #include "util/evsel.h" #include "util/evlist.h" #include "util/term.h" -#include "util/util.h" #include "util/cache.h" #include "util/symbol.h" #include "util/thread.h" @@ -32,6 +31,7 @@ #include <linux/kernel.h> #include <linux/time64.h> +#include <linux/zalloc.h> #include <errno.h> #include <inttypes.h> #include <poll.h> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index a8394b4f1167..e0312a1c4792 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -70,10 +70,11 @@ int cmd_list(int argc, const char **argv) print_symbol_events(NULL, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); else if (strcmp(argv[i], "sw") == 0 || - strcmp(argv[i], "software") == 0) + strcmp(argv[i], "software") == 0) { print_symbol_events(NULL, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); - else if (strcmp(argv[i], "cache") == 0 || + print_tool_events(NULL, raw_dump); + } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) @@ -113,6 +114,7 @@ int cmd_list(int argc, const char **argv) event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); print_symbol_events(s, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); + print_tool_events(s, raw_dump); print_hwcache_events(s, raw_dump); print_pmu_events(s, raw_dump, !desc_flag, long_desc_flag, diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index b9810a8d350a..574e30ec6d7c 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -6,7 +6,6 @@ #include "util/evlist.h" #include "util/evsel.h" -#include "util/util.h" #include "util/cache.h" #include "util/symbol.h" #include "util/thread.h" @@ -30,6 +29,7 @@ #include <linux/list.h> #include <linux/hash.h> #include <linux/kernel.h> +#include <linux/zalloc.h> static struct perf_session *session; @@ -454,7 +454,7 @@ broken: /* broken lock sequence, discard it */ ls->discard = 1; bad_hist[BROKEN_ACQUIRE]++; - list_del(&seq->list); + list_del_init(&seq->list); free(seq); goto end; default: @@ -515,7 +515,7 @@ static int report_lock_acquired_event(struct perf_evsel *evsel, /* broken lock sequence, discard it */ ls->discard = 1; bad_hist[BROKEN_ACQUIRED]++; - list_del(&seq->list); + list_del_init(&seq->list); free(seq); goto end; default: @@ -570,7 +570,7 @@ static int report_lock_contended_event(struct perf_evsel *evsel, /* broken lock sequence, discard it */ ls->discard = 1; bad_hist[BROKEN_CONTENDED]++; - list_del(&seq->list); + list_del_init(&seq->list); free(seq); goto end; default: @@ -639,7 +639,7 @@ static int report_lock_release_event(struct perf_evsel *evsel, ls->nr_release++; free_seq: - list_del(&seq->list); + list_del_init(&seq->list); free(seq); end: return 0; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 46d3c2deeb40..6418782951a4 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * builtin-probe.c * * Builtin probe command: Set up probe events by C expression * * Written by Masami Hiramatsu <mhiramat@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #include <sys/utsname.h> #include <sys/types.h> @@ -33,7 +19,6 @@ #include "perf.h" #include "builtin.h" #include "namespaces.h" -#include "util/util.h" #include "util/strlist.h" #include "util/strfilter.h" #include "util/symbol.h" @@ -42,6 +27,7 @@ #include "util/probe-finder.h" #include "util/probe-event.h" #include "util/probe-file.h" +#include <linux/zalloc.h> #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" #define DEFAULT_FUNC_FILTER "!_*" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4e2d953d4bc5..8779cee58185 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -11,7 +11,6 @@ #include "perf.h" #include "util/build-id.h" -#include "util/util.h" #include <subcmd/parse-options.h> #include "util/parse-events.h" #include "util/config.h" @@ -54,6 +53,7 @@ #include <sys/mman.h> #include <sys/wait.h> #include <linux/time64.h> +#include <linux/zalloc.h> struct switch_output { bool enabled; @@ -133,6 +133,11 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse return 0; } +static int record__aio_enabled(struct record *rec); +static int record__comp_enabled(struct record *rec); +static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, + void *src, size_t src_size); + #ifdef HAVE_AIO_SUPPORT static int record__aio_write(struct aiocb *cblock, int trace_fd, void *buf, size_t size, off_t off) @@ -183,9 +188,9 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) if (rem_size == 0) { cblock->aio_fildes = -1; /* - * md->refcount is incremented in perf_mmap__push() for - * every enqueued aio write request so decrement it because - * the request is now complete. + * md->refcount is incremented in record__aio_pushfn() for + * every aio write request started in record__aio_push() so + * decrement it because the request is now complete. */ perf_mmap__put(md); rc = 1; @@ -240,18 +245,89 @@ static int record__aio_sync(struct perf_mmap *md, bool sync_all) } while (1); } -static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off) +struct record_aio { + struct record *rec; + void *data; + size_t size; +}; + +static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size) { - struct record *rec = to; - int ret, trace_fd = rec->session->data->file.fd; + struct record_aio *aio = to; - rec->samples++; + /* + * map->base data pointed by buf is copied into free map->aio.data[] buffer + * to release space in the kernel buffer as fast as possible, calling + * perf_mmap__consume() from perf_mmap__push() function. + * + * That lets the kernel to proceed with storing more profiling data into + * the kernel buffer earlier than other per-cpu kernel buffers are handled. + * + * Coping can be done in two steps in case the chunk of profiling data + * crosses the upper bound of the kernel buffer. In this case we first move + * part of data from map->start till the upper bound and then the reminder + * from the beginning of the kernel buffer till the end of the data chunk. + */ + + if (record__comp_enabled(aio->rec)) { + size = zstd_compress(aio->rec->session, aio->data + aio->size, + perf_mmap__mmap_len(map) - aio->size, + buf, size); + } else { + memcpy(aio->data + aio->size, buf, size); + } + + if (!aio->size) { + /* + * Increment map->refcount to guard map->aio.data[] buffer + * from premature deallocation because map object can be + * released earlier than aio write request started on + * map->aio.data[] buffer is complete. + * + * perf_mmap__put() is done at record__aio_complete() + * after started aio request completion or at record__aio_push() + * if the request failed to start. + */ + perf_mmap__get(map); + } + + aio->size += size; + + return size; +} + +static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off) +{ + int ret, idx; + int trace_fd = rec->session->data->file.fd; + struct record_aio aio = { .rec = rec, .size = 0 }; + + /* + * Call record__aio_sync() to wait till map->aio.data[] buffer + * becomes available after previous aio write operation. + */ + + idx = record__aio_sync(map, false); + aio.data = map->aio.data[idx]; + ret = perf_mmap__push(map, &aio, record__aio_pushfn); + if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ + return ret; - ret = record__aio_write(cblock, trace_fd, bf, size, off); + rec->samples++; + ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); if (!ret) { - rec->bytes_written += size; + *off += aio.size; + rec->bytes_written += aio.size; if (switch_output_size(rec)) trigger_hit(&switch_output_trigger); + } else { + /* + * Decrement map->refcount incremented in record__aio_pushfn() + * back if record__aio_write() operation failed to start, otherwise + * map->refcount is decremented in record__aio_complete() after + * aio write operation finishes successfully. + */ + perf_mmap__put(map); } return ret; @@ -273,7 +349,7 @@ static void record__aio_mmap_read_sync(struct record *rec) struct perf_evlist *evlist = rec->evlist; struct perf_mmap *maps = evlist->mmap; - if (!rec->opts.nr_cblocks) + if (!record__aio_enabled(rec)) return; for (i = 0; i < evlist->nr_mmaps; i++) { @@ -307,13 +383,8 @@ static int record__aio_parse(const struct option *opt, #else /* HAVE_AIO_SUPPORT */ static int nr_cblocks_max = 0; -static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused) -{ - return -1; -} - -static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused, - void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused) +static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused, + off_t *off __maybe_unused) { return -1; } @@ -337,6 +408,67 @@ static int record__aio_enabled(struct record *rec) return rec->opts.nr_cblocks > 0; } +#define MMAP_FLUSH_DEFAULT 1 +static int record__mmap_flush_parse(const struct option *opt, + const char *str, + int unset) +{ + int flush_max; + struct record_opts *opts = (struct record_opts *)opt->value; + static struct parse_tag tags[] = { + { .tag = 'B', .mult = 1 }, + { .tag = 'K', .mult = 1 << 10 }, + { .tag = 'M', .mult = 1 << 20 }, + { .tag = 'G', .mult = 1 << 30 }, + { .tag = 0 }, + }; + + if (unset) + return 0; + + if (str) { + opts->mmap_flush = parse_tag_value(str, tags); + if (opts->mmap_flush == (int)-1) + opts->mmap_flush = strtol(str, NULL, 0); + } + + if (!opts->mmap_flush) + opts->mmap_flush = MMAP_FLUSH_DEFAULT; + + flush_max = perf_evlist__mmap_size(opts->mmap_pages); + flush_max /= 4; + if (opts->mmap_flush > flush_max) + opts->mmap_flush = flush_max; + + return 0; +} + +#ifdef HAVE_ZSTD_SUPPORT +static unsigned int comp_level_default = 1; + +static int record__parse_comp_level(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = opt->value; + + if (unset) { + opts->comp_level = 0; + } else { + if (str) + opts->comp_level = strtol(str, NULL, 0); + if (!opts->comp_level) + opts->comp_level = comp_level_default; + } + + return 0; +} +#endif +static unsigned int comp_level_max = 22; + +static int record__comp_enabled(struct record *rec) +{ + return rec->opts.comp_level > 0; +} + static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -350,6 +482,11 @@ static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size { struct record *rec = to; + if (record__comp_enabled(rec)) { + size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); + bf = map->data; + } + rec->samples++; return record__write(rec, map, bf, size); } @@ -546,7 +683,8 @@ static int record__mmap_evlist(struct record *rec, if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode, - opts->nr_cblocks, opts->affinity) < 0) { + opts->nr_cblocks, opts->affinity, + opts->mmap_flush, opts->comp_level) < 0) { if (errno == EPERM) { pr_err("Permission error mapping pages.\n" "Consider increasing " @@ -735,15 +873,46 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) } } +static size_t process_comp_header(void *record, size_t increment) +{ + struct compressed_event *event = record; + size_t size = sizeof(*event); + + if (increment) { + event->header.size += increment; + return increment; + } + + event->header.type = PERF_RECORD_COMPRESSED; + event->header.size = size; + + return size; +} + +static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, + void *src, size_t src_size) +{ + size_t compressed; + size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event) - 1; + + compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size, + max_record_size, process_comp_header); + + session->bytes_transferred += src_size; + session->bytes_compressed += compressed; + + return compressed; +} + static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, - bool overwrite) + bool overwrite, bool synch) { u64 bytes_written = rec->bytes_written; int i; int rc = 0; struct perf_mmap *maps; int trace_fd = rec->data.file.fd; - off_t off; + off_t off = 0; if (!evlist) return 0; @@ -759,28 +928,33 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli off = record__aio_get_pos(trace_fd); for (i = 0; i < evlist->nr_mmaps; i++) { + u64 flush = 0; struct perf_mmap *map = &maps[i]; if (map->base) { record__adjust_affinity(rec, map); + if (synch) { + flush = map->flush; + map->flush = 1; + } if (!record__aio_enabled(rec)) { - if (perf_mmap__push(map, rec, record__pushfn) != 0) { + if (perf_mmap__push(map, rec, record__pushfn) < 0) { + if (synch) + map->flush = flush; rc = -1; goto out; } } else { - int idx; - /* - * Call record__aio_sync() to wait till map->data buffer - * becomes available after previous aio write request. - */ - idx = record__aio_sync(map, false); - if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) { + if (record__aio_push(rec, map, &off) < 0) { record__aio_set_pos(trace_fd, off); + if (synch) + map->flush = flush; rc = -1; goto out; } } + if (synch) + map->flush = flush; } if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && @@ -806,15 +980,15 @@ out: return rc; } -static int record__mmap_read_all(struct record *rec) +static int record__mmap_read_all(struct record *rec, bool synch) { int err; - err = record__mmap_read_evlist(rec, rec->evlist, false); + err = record__mmap_read_evlist(rec, rec->evlist, false, synch); if (err) return err; - return record__mmap_read_evlist(rec, rec->evlist, true); + return record__mmap_read_evlist(rec, rec->evlist, true, synch); } static void record__init_features(struct record *rec) @@ -841,6 +1015,8 @@ static void record__init_features(struct record *rec) perf_header__clear_feat(&session->header, HEADER_CLOCKID); perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); + if (!record__comp_enabled(rec)) + perf_header__clear_feat(&session->header, HEADER_COMPRESSED); perf_header__clear_feat(&session->header, HEADER_STAT); } @@ -934,7 +1110,7 @@ record__switch_output(struct record *rec, bool at_exit) rec->switch_output.cur_file = n; if (rec->switch_output.filenames[n]) { remove(rec->switch_output.filenames[n]); - free(rec->switch_output.filenames[n]); + zfree(&rec->switch_output.filenames[n]); } rec->switch_output.filenames[n] = new_filename; } else { @@ -1139,6 +1315,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) bool disabled = false, draining = false; struct perf_evlist *sb_evlist = NULL; int fd; + float ratio = 0; atexit(record__sig_exit); signal(SIGCHLD, sig_handler); @@ -1168,6 +1345,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) fd = perf_data__fd(data); rec->session = session; + if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { + pr_err("Compression initialization failed.\n"); + return -1; + } + + session->header.env.comp_type = PERF_COMP_ZSTD; + session->header.env.comp_level = rec->opts.comp_level; + record__init_features(rec); if (rec->opts.use_clockid && rec->opts.clockid_res_ns) @@ -1197,6 +1382,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = -1; goto out_child; } + session->header.env.comp_mmap_len = session->evlist->mmap_len; err = bpf__apply_obj_config(); if (err) { @@ -1340,7 +1526,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (trigger_is_hit(&switch_output_trigger) || done || draining) perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); - if (record__mmap_read_all(rec) < 0) { + if (record__mmap_read_all(rec, false) < 0) { trigger_error(&auxtrace_snapshot_trigger); trigger_error(&switch_output_trigger); err = -1; @@ -1441,8 +1627,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__synthesize_workload(rec, true); out_child: + record__mmap_read_all(rec, true); record__aio_mmap_read_sync(rec); + if (rec->session->bytes_transferred && rec->session->bytes_compressed) { + ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; + session->header.env.comp_ratio = ratio + 0.5; + } + if (forks) { int exit_status; @@ -1489,12 +1681,19 @@ out_child: else samples[0] = '\0'; - fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", + fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", perf_data__size(data) / 1024.0 / 1024.0, data->path, postfix, samples); + if (ratio) { + fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", + rec->session->bytes_transferred / 1024.0 / 1024.0, + ratio); + } + fprintf(stderr, " ]\n"); } out_delete_session: + zstd_fini(&session->zstd_data); perf_session__delete(session); if (!opts->no_bpf_event) @@ -1846,6 +2045,7 @@ static struct record record = { .uses_mmap = true, .default_per_cpu = true, }, + .mmap_flush = MMAP_FLUSH_DEFAULT, }, .tool = { .sample = process_sample_event, @@ -1912,6 +2112,9 @@ static struct option __record_options[] = { OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", "number of mmap data pages and AUX area tracing mmap pages", record__parse_mmap_pages), + OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", + "Minimal number of bytes that is extracted from mmap data pages (default: 1)", + record__mmap_flush_parse), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, @@ -1965,10 +2168,10 @@ static struct option __record_options[] = { "use per-thread mmaps"), OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", "sample selected machine registers on interrupt," - " use -I ? to list register names", parse_regs), + " use '-I?' to list register names", parse_intr_regs), OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", "sample selected machine registers on interrupt," - " use -I ? to list register names", parse_regs), + " use '--user-regs=?' to list register names", parse_user_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), OPT_CALLBACK('k', "clockid", &record.opts, @@ -1988,6 +2191,10 @@ static struct option __record_options[] = { OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, "Configure all used events to run in user space.", PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, + "collect kernel callchains"), + OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, + "collect user callchains"), OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", "clang binary to use for compiling BPF scriptlets"), OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", @@ -2016,6 +2223,11 @@ static struct option __record_options[] = { OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", record__parse_affinity), +#ifdef HAVE_ZSTD_SUPPORT + OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, + "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", + record__parse_comp_level), +#endif OPT_END() }; @@ -2075,6 +2287,12 @@ int cmd_record(int argc, const char **argv) "cgroup monitoring only available in system-wide mode"); } + + if (rec->opts.comp_level != 0) { + pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); + rec->no_buildid = true; + } + if (rec->opts.record_switch_events && !perf_can_record_switch_events()) { ui__error("kernel does not support recording context switch events\n"); @@ -2220,10 +2438,14 @@ int cmd_record(int argc, const char **argv) if (rec->opts.nr_cblocks > nr_cblocks_max) rec->opts.nr_cblocks = nr_cblocks_max; - if (verbose > 0) - pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); + pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); + pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); + + if (rec->opts.comp_level > comp_level_max) + rec->opts.comp_level = comp_level_max; + pr_debug("comp level: %d\n", rec->opts.comp_level); err = __cmd_record(&record, argc, argv); out: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4054eb1f98ac..abf0b9b8f566 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -8,7 +8,6 @@ */ #include "builtin.h" -#include "util/util.h" #include "util/config.h" #include "util/annotate.h" @@ -16,6 +15,7 @@ #include <linux/list.h> #include <linux/rbtree.h> #include <linux/err.h> +#include <linux/zalloc.h> #include "util/map.h" #include "util/symbol.h" #include "util/callchain.h" @@ -47,7 +47,7 @@ #include <errno.h> #include <inttypes.h> #include <regex.h> -#include "sane_ctype.h" +#include <linux/ctype.h> #include <signal.h> #include <linux/bitmap.h> #include <linux/stringify.h> @@ -136,9 +136,6 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, if (!ui__has_annotation() && !rep->symbol_ipc) return 0; - hist__account_cycles(sample->branch_stack, al, sample, - rep->nonany_branch_mode); - if (sort__mode == SORT_MODE__BRANCH) { bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); @@ -181,9 +178,6 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, if (!ui__has_annotation() && !rep->symbol_ipc) return 0; - hist__account_cycles(sample->branch_stack, al, sample, - rep->nonany_branch_mode); - bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) @@ -282,6 +276,11 @@ static int process_sample_event(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; + if (ui__has_annotation() || rep->symbol_ipc) { + hist__account_cycles(sample->branch_stack, &al, sample, + rep->nonany_branch_mode); + } + ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep); if (ret < 0) pr_debug("problem adding hist entry, skipping event\n"); @@ -299,7 +298,7 @@ static int process_read_event(struct perf_tool *tool, struct report *rep = container_of(tool, struct report, tool); if (rep->show_threads) { - const char *name = evsel ? perf_evsel__name(evsel) : "unknown"; + const char *name = perf_evsel__name(evsel); int err = perf_read_values_add_value(&rep->show_threads_values, event->read.pid, event->read.tid, evsel->idx, @@ -942,8 +941,7 @@ parse_time_quantum(const struct option *opt, const char *arg, pr_err("time quantum cannot be 0"); return -1; } - while (isspace(*end)) - end++; + end = skip_spaces(end); if (*end == 0) return 0; if (!strcmp(end, "s")) { @@ -1259,6 +1257,9 @@ repeat: if (session == NULL) return -1; + if (zstd_init(&(session->zstd_data), 0) < 0) + pr_warning("Decompression initialization failed. Reported data may be incomplete.\n"); + if (report.queue_size) { ordered_events__set_alloc_size(&session->ordered_events, report.queue_size); @@ -1426,6 +1427,10 @@ repeat: &report.range_num); if (ret < 0) goto error; + + itrace_synth_opts__set_time_range(&itrace_synth_opts, + report.ptime_range, + report.range_num); } if (session->tevent.pevent && @@ -1447,9 +1452,11 @@ repeat: ret = 0; error: - if (report.ptime_range) + if (report.ptime_range) { + itrace_synth_opts__clear_time_range(&itrace_synth_opts); zfree(&report.ptime_range); - + } + zstd_fini(&(session->zstd_data)); perf_session__delete(session); return ret; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 275f2d92a7bf..56d1907b1215 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2,7 +2,6 @@ #include "builtin.h" #include "perf.h" -#include "util/util.h" #include "util/evlist.h" #include "util/cache.h" #include "util/evsel.h" @@ -15,6 +14,7 @@ #include "util/thread_map.h" #include "util/color.h" #include "util/stat.h" +#include "util/string2.h" #include "util/callchain.h" #include "util/time-utils.h" @@ -25,6 +25,7 @@ #include <linux/kernel.h> #include <linux/log2.h> +#include <linux/zalloc.h> #include <sys/prctl.h> #include <sys/resource.h> #include <inttypes.h> @@ -36,7 +37,7 @@ #include <api/fs/fs.h> #include <linux/time64.h> -#include "sane_ctype.h" +#include <linux/ctype.h> #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 61cfd8f70989..79367087bd18 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -14,7 +14,6 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" -#include "util/util.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/sort.h" @@ -34,6 +33,7 @@ #include <linux/kernel.h> #include <linux/stringify.h> #include <linux/time64.h> +#include <linux/zalloc.h> #include <sys/utsname.h> #include "asm/bug.h" #include "util/mem-events.h" @@ -49,7 +49,7 @@ #include <unistd.h> #include <subcmd/pager.h> -#include "sane_ctype.h" +#include <linux/ctype.h> static char const *script_name; static char const *generate_script_lang; @@ -102,6 +102,7 @@ enum perf_output_field { PERF_OUTPUT_METRIC = 1U << 28, PERF_OUTPUT_MISC = 1U << 29, PERF_OUTPUT_SRCCODE = 1U << 30, + PERF_OUTPUT_IPC = 1U << 31, }; struct output_option { @@ -139,6 +140,7 @@ struct output_option { {.str = "metric", .field = PERF_OUTPUT_METRIC}, {.str = "misc", .field = PERF_OUTPUT_MISC}, {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, + {.str = "ipc", .field = PERF_OUTPUT_IPC}, }; enum { @@ -1268,6 +1270,20 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, return printed; } +static int perf_sample__fprintf_ipc(struct perf_sample *sample, + struct perf_event_attr *attr, FILE *fp) +{ + unsigned int ipc; + + if (!PRINT_FIELD(IPC) || !sample->cyc_cnt || !sample->insn_cnt) + return 0; + + ipc = (sample->insn_cnt * 100) / sample->cyc_cnt; + + return fprintf(fp, " \t IPC: %u.%02u (%" PRIu64 "/%" PRIu64 ") ", + ipc / 100, ipc % 100, sample->insn_cnt, sample->cyc_cnt); +} + static int perf_sample__fprintf_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -1312,6 +1328,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample, printed += perf_sample__fprintf_addr(sample, thread, attr, fp); } + printed += perf_sample__fprintf_ipc(sample, attr, fp); + if (print_srcline_last) printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); @@ -1606,6 +1624,7 @@ struct perf_script { bool show_namespace_events; bool show_lost_events; bool show_round_events; + bool show_bpf_events; bool allocated; bool per_event_dump; struct cpu_map *cpus; @@ -1858,6 +1877,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(PHYS_ADDR)) fprintf(fp, "%16" PRIx64, sample->phys_addr); + + perf_sample__fprintf_ipc(sample, attr, fp); + fprintf(fp, "\n"); if (PRINT_FIELD(SRCCODE)) { @@ -2318,6 +2340,41 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused, return 0; } +static int +process_bpf_events(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + + if (machine__process_ksymbol(machine, event, sample) < 0) + return -1; + + if (!evsel->attr.sample_id_all) { + perf_event__fprintf(event, stdout); + return 0; + } + + thread = machine__findnew_thread(machine, sample->pid, sample->tid); + if (thread == NULL) { + pr_debug("problem processing MMAP event, skipping it.\n"); + return -1; + } + + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + event->header.type, stdout); + perf_event__fprintf(event, stdout); + } + + thread__put(thread); + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -2420,6 +2477,10 @@ static int __cmd_script(struct perf_script *script) script->tool.ordered_events = false; script->tool.finished_round = process_finished_round_event; } + if (script->show_bpf_events) { + script->tool.ksymbol = process_bpf_events; + script->tool.bpf_event = process_bpf_events; + } if (perf_script__setup_per_event_dump(script)) { pr_err("Couldn't create the per event dump files\n"); @@ -2819,7 +2880,7 @@ static int read_script_info(struct script_desc *desc, const char *filename) return -1; while (fgets(line, sizeof(line), fp)) { - p = ltrim(line); + p = skip_spaces(line); if (strlen(p) == 0) continue; if (*p != '#') @@ -2828,19 +2889,19 @@ static int read_script_info(struct script_desc *desc, const char *filename) if (strlen(p) && *p == '!') continue; - p = ltrim(p); + p = skip_spaces(p); if (strlen(p) && p[strlen(p) - 1] == '\n') p[strlen(p) - 1] = '\0'; if (!strncmp(p, "description:", strlen("description:"))) { p += strlen("description:"); - desc->half_liner = strdup(ltrim(p)); + desc->half_liner = strdup(skip_spaces(p)); continue; } if (!strncmp(p, "args:", strlen("args:"))) { p += strlen("args:"); - desc->args = strdup(ltrim(p)); + desc->args = strdup(skip_spaces(p)); continue; } } @@ -2947,7 +3008,7 @@ static int check_ev_match(char *dir_name, char *scriptname, return -1; while (fgets(line, sizeof(line), fp)) { - p = ltrim(line); + p = skip_spaces(line); if (*p == '#') continue; @@ -2957,7 +3018,7 @@ static int check_ev_match(char *dir_name, char *scriptname, break; p += 2; - p = ltrim(p); + p = skip_spaces(p); len = strcspn(p, " \t"); if (!len) break; @@ -3297,6 +3358,7 @@ static int parse_call_trace(const struct option *opt __maybe_unused, parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0); itrace_parse_synth_opts(opt, "cewp", 0); symbol_conf.nanosecs = true; + symbol_conf.pad_output_len_dso = 50; return 0; } @@ -3392,7 +3454,7 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," - "callindent,insn,insnlen,synth,phys_addr,metric,misc", + "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), @@ -3438,6 +3500,8 @@ int cmd_script(int argc, const char **argv) "Show lost events (if recorded)"), OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events, "Show round events (if recorded)"), + OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events, + "Show bpf related events (if recorded)"), OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, "Dump trace output to files named by the monitored events"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), @@ -3458,6 +3522,15 @@ int cmd_script(int argc, const char **argv) "Time span of interest (start,stop)"), OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, "Show inline function"), + OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", + "guest mount directory under which every guest os" + " instance has a subdir"), + OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name, + "file", "file saving guest os vmlinux"), + OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms, + "file", "file saving guest os /proc/kallsyms"), + OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules, + "file", "file saving guest os /proc/modules"), OPT_END() }; const char * const script_subcommands[] = { "record", "report", NULL }; @@ -3477,6 +3550,16 @@ int cmd_script(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (symbol_conf.guestmount || + symbol_conf.default_guest_vmlinux_name || + symbol_conf.default_guest_kallsyms || + symbol_conf.default_guest_modules) { + /* + * Enable guest sample processing. + */ + perf_guest = true; + } + data.path = input_name; data.force = symbol_conf.force; @@ -3669,7 +3752,8 @@ int cmd_script(int argc, const char **argv) goto out_delete; uname(&uts); - if (!strcmp(uts.machine, session->header.env.arch) || + if (data.is_pipe || /* assume pipe_mode indicates native_arch */ + !strcmp(uts.machine, session->header.env.arch) || (!strcmp(uts.machine, "x86_64") && !strcmp(session->header.env.arch, "i386"))) native_arch = true; @@ -3765,6 +3849,10 @@ int cmd_script(int argc, const char **argv) &script.range_num); if (err < 0) goto out_delete; + + itrace_synth_opts__set_time_range(&itrace_synth_opts, + script.ptime_range, + script.range_num); } err = __cmd_script(&script); @@ -3772,8 +3860,10 @@ int cmd_script(int argc, const char **argv) flush_scripting(); out_delete: - if (script.ptime_range) + if (script.ptime_range) { + itrace_synth_opts__clear_time_range(&itrace_synth_opts); zfree(&script.ptime_range); + } perf_evlist__free_stats(session->evlist); perf_session__delete(session); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c3625ec374e0..b55a534b4de0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * builtin-stat.c * @@ -37,14 +38,11 @@ * Mike Galbraith <efault@gmx.de> * Paul Mackerras <paulus@samba.org> * Jaswinder Singh Rajput <jaswinder@kernel.org> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "perf.h" #include "builtin.h" #include "util/cgroup.h" -#include "util/util.h" #include <subcmd/parse-options.h> #include "util/parse-events.h" #include "util/pmu.h" @@ -68,6 +66,7 @@ #include "asm/bug.h" #include <linux/time64.h> +#include <linux/zalloc.h> #include <api/fs/fs.h> #include <errno.h> #include <signal.h> @@ -83,7 +82,7 @@ #include <sys/time.h> #include <sys/resource.h> -#include "sane_ctype.h" +#include <linux/ctype.h> #define DEFAULT_SEPARATOR " " #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" @@ -244,11 +243,25 @@ perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, process_synthesized_event, NULL); } +static int read_single_counter(struct perf_evsel *counter, int cpu, + int thread, struct timespec *rs) +{ + if (counter->tool_event == PERF_TOOL_DURATION_TIME) { + u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; + struct perf_counts_values *count = + perf_counts(counter->counts, cpu, thread); + count->ena = count->run = val; + count->val = val; + return 0; + } + return perf_evsel__read_counter(counter, cpu, thread); +} + /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter(struct perf_evsel *counter) +static int read_counter(struct perf_evsel *counter, struct timespec *rs) { int nthreads = thread_map__nr(evsel_list->threads); int ncpus, cpu, thread; @@ -275,7 +288,7 @@ static int read_counter(struct perf_evsel *counter) * (via perf_evsel__read_counter) and sets threir count->loaded. */ if (!count->loaded && - perf_evsel__read_counter(counter, cpu, thread)) { + read_single_counter(counter, cpu, thread, rs)) { counter->counts->scaled = -1; perf_counts(counter->counts, cpu, thread)->ena = 0; perf_counts(counter->counts, cpu, thread)->run = 0; @@ -304,13 +317,13 @@ static int read_counter(struct perf_evsel *counter) return 0; } -static void read_counters(void) +static void read_counters(struct timespec *rs) { struct perf_evsel *counter; int ret; evlist__for_each_entry(evsel_list, counter) { - ret = read_counter(counter); + ret = read_counter(counter, rs); if (ret) pr_debug("failed to read counter %s\n", counter->name); @@ -323,11 +336,11 @@ static void process_interval(void) { struct timespec ts, rs; - read_counters(); - clock_gettime(CLOCK_MONOTONIC, &ts); diff_timespec(&rs, &ts, &ref_time); + read_counters(&rs); + if (STAT_RECORD) { if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) pr_err("failed to write stat round event\n"); @@ -593,7 +606,7 @@ try_again: * avoid arbitrary skew, we must read all counters before closing any * group leaders. */ - read_counters(); + read_counters(&(struct timespec) { .tv_nsec = t1-t0 }); perf_evlist__close(evsel_list); return WEXITSTATUS(status); @@ -763,6 +776,8 @@ static struct option stat_options[] = { "stop workload and print counts after a timeout period in ms (>= 10ms)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), + OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode, + "aggregate counts per processor die", AGGR_DIE), OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, @@ -787,6 +802,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, return cpu_map__get_socket(map, cpu, NULL); } +static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int cpu) +{ + return cpu_map__get_die(map, cpu, NULL); +} + static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, struct cpu_map *map, int cpu) { @@ -827,12 +848,30 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config, return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } +static int perf_stat__get_die_cached(struct perf_stat_config *config, + struct cpu_map *map, int idx) +{ + return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); +} + static int perf_stat__get_core_cached(struct perf_stat_config *config, struct cpu_map *map, int idx) { return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); } +static bool term_percore_set(void) +{ + struct perf_evsel *counter; + + evlist__for_each_entry(evsel_list, counter) { + if (counter->percore) + return true; + } + + return false; +} + static int perf_stat_init_aggr_mode(void) { int nr; @@ -845,6 +884,13 @@ static int perf_stat_init_aggr_mode(void) } stat_config.aggr_get_id = perf_stat__get_socket_cached; break; + case AGGR_DIE: + if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_cached; + break; case AGGR_CORE: if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); @@ -853,6 +899,15 @@ static int perf_stat_init_aggr_mode(void) stat_config.aggr_get_id = perf_stat__get_core_cached; break; case AGGR_NONE: + if (term_percore_set()) { + if (cpu_map__build_core_map(evsel_list->cpus, + &stat_config.aggr_map)) { + perror("cannot build core map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_core_cached; + } + break; case AGGR_GLOBAL: case AGGR_THREAD: case AGGR_UNSET: @@ -901,21 +956,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) return cpu == -1 ? -1 : env->cpu[cpu].socket_id; } +static int perf_env__get_die(struct cpu_map *map, int idx, void *data) +{ + struct perf_env *env = data; + int die_id = -1, cpu = perf_env__get_cpu(env, map, idx); + + if (cpu != -1) { + /* + * Encode socket in bit range 15:8 + * die_id is relative to socket, + * we need a global id. So we combine + * socket + die id + */ + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) + return -1; + + die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + } + + return die_id; +} + static int perf_env__get_core(struct cpu_map *map, int idx, void *data) { struct perf_env *env = data; int core = -1, cpu = perf_env__get_cpu(env, map, idx); if (cpu != -1) { - int socket_id = env->cpu[cpu].socket_id; - /* - * Encode socket in upper 16 bits - * core_id is relative to socket, and + * Encode socket in bit range 31:24 + * encode die id in bit range 23:16 + * core_id is relative to socket and die, * we need a global id. So we combine - * socket + core id. + * socket + die id + core id */ - core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); + if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) + return -1; + + if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) + return -1; + + core = (env->cpu[cpu].socket_id << 24) | + (env->cpu[cpu].die_id << 16) | + (env->cpu[cpu].core_id & 0xffff); } return core; @@ -927,6 +1016,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); } +static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus, + struct cpu_map **diep) +{ + return cpu_map__build_map(cpus, diep, perf_env__get_die, env); +} + static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, struct cpu_map **corep) { @@ -938,6 +1033,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un { return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } +static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int idx) +{ + return perf_env__get_die(map, idx, &perf_stat.session->header.env); +} static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, struct cpu_map *map, int idx) @@ -957,6 +1057,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) } stat_config.aggr_get_id = perf_stat__get_socket_file; break; + case AGGR_DIE: + if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) { + perror("cannot build die map"); + return -1; + } + stat_config.aggr_get_id = perf_stat__get_die_file; + break; case AGGR_CORE: if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); @@ -1242,8 +1349,8 @@ static int add_default_attributes(void) fprintf(stderr, "Cannot set up top down events %s: %d\n", str, err); - free(str); parse_events_print_error(&errinfo, str); + free(str); return -1; } } else { @@ -1479,7 +1586,7 @@ static void runtime_stat_delete(struct perf_stat_config *config) for (i = 0; i < config->stats_num; i++) runtime_stat__exit(&config->stats[i]); - free(config->stats); + zfree(&config->stats); } static const char * const stat_report_usage[] = { @@ -1507,6 +1614,8 @@ static int __cmd_report(int argc, const char **argv) OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), + OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode, + "aggregate counts per processor die", AGGR_DIE), OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, @@ -1894,7 +2003,7 @@ int cmd_stat(int argc, const char **argv) perf_stat__exit_aggr_mode(); perf_evlist__free_stats(evsel_list); out: - free(stat_config.walltime_run); + zfree(&stat_config.walltime_run); if (smi_cost && smi_reset) sysfs__write_int(FREEZE_ON_SMI_PATH, 0); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 9b98687a27b9..4bde3fa245d1 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * builtin-timechart.c - make an svg timechart of system activity * @@ -5,11 +6,6 @@ * * Authors: * Arjan van de Ven <arjan@linux.intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. */ #include <errno.h> @@ -17,9 +13,6 @@ #include <traceevent/event-parse.h> #include "builtin.h" - -#include "util/util.h" - #include "util/color.h" #include <linux/list.h> #include "util/cache.h" @@ -28,6 +21,7 @@ #include <linux/kernel.h> #include <linux/rbtree.h> #include <linux/time64.h> +#include <linux/zalloc.h> #include "util/symbol.h" #include "util/thread.h" #include "util/callchain.h" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fbbb0da43abb..b46b3c9f57a0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * builtin-top.c * @@ -14,8 +15,6 @@ * Wu Fengguang <fengguang.wu@intel.com> * Mike Galbraith <efault@gmx.de> * Paul Mackerras <paulus@samba.org> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "builtin.h" @@ -41,6 +40,7 @@ #include "util/cpumap.h" #include "util/xyarray.h" #include "util/sort.h" +#include "util/string2.h" #include "util/term.h" #include "util/intlist.h" #include "util/parse-branch-options.h" @@ -76,7 +76,7 @@ #include <linux/time64.h> #include <linux/types.h> -#include "sane_ctype.h" +#include <linux/ctype.h> static volatile int done; static volatile int resize; @@ -101,7 +101,7 @@ static void perf_top__resize(struct perf_top *top) static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) { - struct perf_evsel *evsel = hists_to_evsel(he->hists); + struct perf_evsel *evsel; struct symbol *sym; struct annotation *notes; struct map *map; @@ -110,6 +110,8 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) if (!he || !he->ms.sym) return -1; + evsel = hists_to_evsel(he->hists); + sym = he->ms.sym; map = he->ms.map; @@ -226,7 +228,7 @@ static void perf_top__record_precise_ip(struct perf_top *top, static void perf_top__show_details(struct perf_top *top) { struct hist_entry *he = top->sym_filter_entry; - struct perf_evsel *evsel = hists_to_evsel(he->hists); + struct perf_evsel *evsel; struct annotation *notes; struct symbol *symbol; int more; @@ -234,6 +236,8 @@ static void perf_top__show_details(struct perf_top *top) if (!he) return; + evsel = hists_to_evsel(he->hists); + symbol = he->ms.sym; notes = symbol__annotation(symbol); @@ -1208,11 +1212,14 @@ static int __cmd_top(struct perf_top *top) init_process_thread(top); + if (opts->record_namespaces) + top->tool.namespace_events = true; + ret = perf_event__synthesize_bpf_events(top->session, perf_event__process, &top->session->machines.host, &top->record_opts); if (ret < 0) - pr_warning("Couldn't synthesize bpf events.\n"); + pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n"); machine__synthesize_threads(&top->session->machines.host, &opts->target, top->evlist->threads, false, @@ -1500,6 +1507,8 @@ int cmd_top(int argc, const char **argv) OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize, "number of thread to run event synthesize"), + OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces, + "Record namespaces events"), OPT_END() }; struct perf_evlist *sb_evlist = NULL; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f5b3a1e9c1dd..1aa2ed096f65 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * builtin-trace.c * @@ -12,8 +13,6 @@ * Initially based on the 'trace' prototype by Thomas Gleixner: * * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") - * - * Released under the GPL v2. (and only v2, not any later version) */ #include <traceevent/event-parse.h> @@ -62,10 +61,11 @@ #include <linux/random.h> #include <linux/stringify.h> #include <linux/time64.h> +#include <linux/zalloc.h> #include <fcntl.h> #include <sys/sysmacros.h> -#include "sane_ctype.h" +#include <linux/ctype.h> #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 @@ -403,6 +403,11 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, #define SCA_STRARRAY syscall_arg__scnprintf_strarray +size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val); +} + size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val) { size_t printed; @@ -482,6 +487,15 @@ static const char *bpf_cmd[] = { }; static DEFINE_STRARRAY(bpf_cmd, "BPF_"); +static const char *fsmount_flags[] = { + [1] = "CLOEXEC", +}; +static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_"); + +#include "trace/beauty/generated/fsconfig_arrays.c" + +static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_"); + static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1); @@ -642,6 +656,10 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, { .scnprintf = SCA_STRARRAY, \ .parm = &strarray__##array, } +#define STRARRAY_FLAGS(name, array) \ + { .scnprintf = SCA_STRARRAY_FLAGS, \ + .parm = &strarray__##array, } + #include "trace/beauty/arch_errno_names.c" #include "trace/beauty/eventfd.c" #include "trace/beauty/futex_op.c" @@ -713,6 +731,15 @@ static struct syscall_fmt { [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, { .name = "flock", .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, + { .name = "fsconfig", + .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, }, + { .name = "fsmount", + .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags), + [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, }, + { .name = "fspick", + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, + [1] = { .scnprintf = SCA_FILENAME, /* path */ }, + [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, }, { .name = "fstat", .alias = "newfstat", }, { .name = "fstatat", .alias = "newfstatat", }, { .name = "futex", @@ -775,6 +802,12 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ }, [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, }, + { .name = "move_mount", + .arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ }, + [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ }, + [2] = { .scnprintf = SCA_FDAT, /* to_dfd */ }, + [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ }, + [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, }, { .name = "mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, @@ -879,6 +912,8 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, { .name = "symlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, + { .name = "sync_file_range", + .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, }, { .name = "tgkill", .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "tkill", @@ -937,8 +972,14 @@ struct syscall { struct syscall_arg_fmt *arg_fmt; }; +/* + * Must match what is in the BPF program: + * + * tools/perf/examples/bpf/augmented_raw_syscalls.c + */ struct bpf_map_syscall_entry { bool enabled; + u16 string_args_len[6]; }; /* @@ -998,10 +1039,10 @@ static struct thread_trace *thread_trace__new(void) { struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); - if (ttrace) + if (ttrace) { ttrace->files.max = -1; - - ttrace->syscall_stats = intlist__new(NULL); + ttrace->syscall_stats = intlist__new(NULL); + } return ttrace; } @@ -1192,8 +1233,17 @@ static void thread__set_filename_pos(struct thread *thread, const char *bf, static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size) { struct augmented_arg *augmented_arg = arg->augmented.args; + size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value); + /* + * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls + * we would have two strings, each prefixed by its size. + */ + int consumed = sizeof(*augmented_arg) + augmented_arg->size; + + arg->augmented.args = ((void *)arg->augmented.args) + consumed; + arg->augmented.size -= consumed; - return scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value); + return printed; } static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, @@ -1381,10 +1431,11 @@ static int syscall__set_arg_fmts(struct syscall *sc) if (sc->fmt && sc->fmt->arg[idx].scnprintf) continue; + len = strlen(field->name); + if (strcmp(field->type, "const char *") == 0 && - (strcmp(field->name, "filename") == 0 || - strcmp(field->name, "path") == 0 || - strcmp(field->name, "pathname") == 0)) + ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) || + strstr(field->name, "path") != NULL)) sc->arg_fmt[idx].scnprintf = SCA_FILENAME; else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr")) sc->arg_fmt[idx].scnprintf = SCA_PTR; @@ -1395,8 +1446,7 @@ static int syscall__set_arg_fmts(struct syscall *sc) else if ((strcmp(field->type, "int") == 0 || strcmp(field->type, "unsigned int") == 0 || strcmp(field->type, "long") == 0) && - (len = strlen(field->name)) >= 2 && - strcmp(field->name + len - 2, "fd") == 0) { + len >= 2 && strcmp(field->name + len - 2, "fd") == 0) { /* * /sys/kernel/tracing/events/syscalls/sys_enter* * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c @@ -1478,12 +1528,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) static int trace__validate_ev_qualifier(struct trace *trace) { - int err = 0, i; - size_t nr_allocated; + int err = 0; + bool printed_invalid_prefix = false; struct str_node *pos; + size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier); - trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); - trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * + trace->ev_qualifier_ids.entries = malloc(nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0])); if (trace->ev_qualifier_ids.entries == NULL) { @@ -1493,9 +1543,6 @@ static int trace__validate_ev_qualifier(struct trace *trace) goto out; } - nr_allocated = trace->ev_qualifier_ids.nr; - i = 0; - strlist__for_each_entry(pos, trace->ev_qualifier) { const char *sc = pos->s; int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; @@ -1505,17 +1552,18 @@ static int trace__validate_ev_qualifier(struct trace *trace) if (id >= 0) goto matches; - if (err == 0) { - fputs("Error:\tInvalid syscall ", trace->output); - err = -EINVAL; + if (!printed_invalid_prefix) { + pr_debug("Skipping unknown syscalls: "); + printed_invalid_prefix = true; } else { - fputs(", ", trace->output); + pr_debug(", "); } - fputs(sc, trace->output); + pr_debug("%s", sc); + continue; } matches: - trace->ev_qualifier_ids.entries[i++] = id; + trace->ev_qualifier_ids.entries[nr_used++] = id; if (match_next == -1) continue; @@ -1523,7 +1571,7 @@ matches: id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); if (id < 0) break; - if (nr_allocated == trace->ev_qualifier_ids.nr) { + if (nr_allocated == nr_used) { void *entries; nr_allocated += 8; @@ -1536,20 +1584,19 @@ matches: } trace->ev_qualifier_ids.entries = entries; } - trace->ev_qualifier_ids.nr++; - trace->ev_qualifier_ids.entries[i++] = id; + trace->ev_qualifier_ids.entries[nr_used++] = id; } } - if (err < 0) { - fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" - "\nHint:\tand: 'man syscalls'\n", trace->output); -out_free: - zfree(&trace->ev_qualifier_ids.entries); - trace->ev_qualifier_ids.nr = 0; - } + trace->ev_qualifier_ids.nr = nr_used; out: + if (printed_invalid_prefix) + pr_debug("\n"); return err; +out_free: + zfree(&trace->ev_qualifier_ids.entries); + trace->ev_qualifier_ids.nr = 0; + goto out; } /* @@ -2676,6 +2723,25 @@ out_enomem: } #ifdef HAVE_LIBBPF_SUPPORT +static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry) +{ + struct syscall *sc = trace__syscall_info(trace, NULL, id); + int arg = 0; + + if (sc == NULL) + goto out; + + for (; arg < sc->nr_args; ++arg) { + entry->string_args_len[arg] = 0; + if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) { + /* Should be set like strace -s strsize */ + entry->string_args_len[arg] = PATH_MAX; + } + } +out: + for (; arg < 6; ++arg) + entry->string_args_len[arg] = 0; +} static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) { int fd = bpf_map__fd(trace->syscalls.map); @@ -2688,6 +2754,9 @@ static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { int key = trace->ev_qualifier_ids.entries[i]; + if (value.enabled) + trace__init_bpf_map_syscall_args(trace, key, &value); + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); if (err) break; @@ -2705,6 +2774,9 @@ static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled) int err = 0, key; for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { + if (enabled) + trace__init_bpf_map_syscall_args(trace, key, &value); + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); if (err) break; @@ -3628,7 +3700,12 @@ static int trace__config(const char *var, const char *value, void *arg) struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", "event selector. use 'perf list' to list available events", parse_events_option); - err = parse_events_option(&o, value, 0); + /* + * We can't propagate parse_event_option() return, as it is 1 + * for failure while perf_config() expects -1. + */ + if (parse_events_option(&o, value, 0)) + err = -1; } else if (!strcmp(var, "trace.show_timestamp")) { trace->show_tstamp = perf_config_bool(var, value); } else if (!strcmp(var, "trace.show_duration")) { diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index 50df168be326..f470144d1a70 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -78,6 +78,8 @@ static void library_status(void) STATUS(HAVE_LZMA_SUPPORT, lzma); STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid); STATUS(HAVE_LIBBPF_SUPPORT, bpf); + STATUS(HAVE_AIO_SUPPORT, aio); + STATUS(HAVE_ZSTD_SUPPORT, zstd); } int cmd_version(int argc, const char **argv) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index c68ee06cae63..f211c015cb76 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -105,6 +105,8 @@ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/ex check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' +check include/linux/ctype.h '-I "isdigit("' +check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index f9b2161e1ca4..2f822bb51717 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -15,13 +15,20 @@ */ #include <unistd.h> +#include <linux/limits.h> #include <pid_filter.h> /* bpf-output associated map */ bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); +/* + * string_args_len: one per syscall arg, 0 means not a string or don't copy it, + * PATH_MAX for copying everything, any other value to limit + * it a la 'strace -s strsize'. + */ struct syscall { bool enabled; + u16 string_args_len[6]; }; bpf_map(syscalls, ARRAY, int, struct syscall, 512); @@ -40,33 +47,74 @@ struct syscall_exit_args { struct augmented_filename { unsigned int size; - int reserved; - char value[256]; + int err; + char value[PATH_MAX]; }; -#define SYS_OPEN 2 -#define SYS_ACCESS 21 -#define SYS_OPENAT 257 - pid_filter(pids_filtered); +struct augmented_args_filename { + struct syscall_enter_args args; + struct augmented_filename filename; +}; + +bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1); + +static inline +unsigned int augmented_filename__read(struct augmented_filename *augmented_filename, + const void *filename_arg, unsigned int filename_len) +{ + unsigned int len = sizeof(*augmented_filename); + int size = probe_read_str(&augmented_filename->value, filename_len, filename_arg); + + augmented_filename->size = augmented_filename->err = 0; + /* + * probe_read_str may return < 0, e.g. -EFAULT + * So we leave that in the augmented_filename->size that userspace will + */ + if (size > 0) { + len -= sizeof(augmented_filename->value) - size; + len &= sizeof(augmented_filename->value) - 1; + augmented_filename->size = size; + } else { + /* + * So that username notice the error while still being able + * to skip this augmented arg record + */ + augmented_filename->err = size; + len = offsetof(struct augmented_filename, value); + } + + return len; +} + SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { - struct { - struct syscall_enter_args args; - struct augmented_filename filename; - } augmented_args; + struct augmented_args_filename *augmented_args; + /* + * We start len, the amount of data that will be in the perf ring + * buffer, if this is not filtered out by one of pid_filter__has(), + * syscall->enabled, etc, with the non-augmented raw syscall payload, + * i.e. sizeof(augmented_args->args). + * + * We'll add to this as we add augmented syscalls right after that + * initial, non-augmented raw_syscalls:sys_enter payload. + */ + unsigned int len = sizeof(augmented_args->args); struct syscall *syscall; - unsigned int len = sizeof(augmented_args); - const void *filename_arg = NULL; + int key = 0; + + augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key); + if (augmented_args == NULL) + return 1; if (pid_filter__has(&pids_filtered, getpid())) return 0; - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + probe_read(&augmented_args->args, sizeof(augmented_args->args), args); - syscall = bpf_map_lookup_elem(&syscalls, &augmented_args.args.syscall_nr); + syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr); if (syscall == NULL || !syscall->enabled) return 0; /* @@ -109,30 +157,70 @@ int sys_enter(struct syscall_enter_args *args) * * after the ctx memory access to prevent their down stream merging. */ - switch (augmented_args.args.syscall_nr) { - case SYS_ACCESS: - case SYS_OPEN: filename_arg = (const void *)args->args[0]; + /* + * For now copy just the first string arg, we need to improve the protocol + * and have more than one. + * + * Using the unrolled loop is not working, only when we do it manually, + * check this out later... + + u8 arg; +#pragma clang loop unroll(full) + for (arg = 0; arg < 6; ++arg) { + if (syscall->string_args_len[arg] != 0) { + filename_len = syscall->string_args_len[arg]; + filename_arg = (const void *)args->args[arg]; __asm__ __volatile__("": : :"memory"); - break; - case SYS_OPENAT: filename_arg = (const void *)args->args[1]; - break; - } - - if (filename_arg != NULL) { - augmented_args.filename.reserved = 0; - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, - sizeof(augmented_args.filename.value), - filename_arg); - if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { - len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; - len &= sizeof(augmented_args.filename.value) - 1; + break; } - } else { - len = sizeof(augmented_args.args); } + verifier log: + +; if (syscall->string_args_len[arg] != 0) { +37: (69) r3 = *(u16 *)(r0 +2) + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv0 R2_w=map_value(id=0,off=2,ks=4,vs=14,imm=0) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +; if (syscall->string_args_len[arg] != 0) { +38: (55) if r3 != 0x0 goto pc+5 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv0 R2=map_value(id=0,off=2,ks=4,vs=14,imm=0) R3=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +39: (b7) r1 = 1 +; if (syscall->string_args_len[arg] != 0) { +40: (bf) r2 = r0 +41: (07) r2 += 4 +42: (69) r3 = *(u16 *)(r0 +4) + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv1 R2_w=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3_w=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +; if (syscall->string_args_len[arg] != 0) { +43: (15) if r3 == 0x0 goto pc+32 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv1 R2=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +; filename_arg = (const void *)args->args[arg]; +44: (67) r1 <<= 3 +45: (bf) r3 = r6 +46: (0f) r3 += r1 +47: (b7) r5 = 64 +48: (79) r3 = *(u64 *)(r3 +16) +dereference of modified ctx ptr R3 off=8 disallowed +processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_states 12 mark_read 7 + */ + +#define __loop_iter(arg) \ + if (syscall->string_args_len[arg] != 0) { \ + unsigned int filename_len = syscall->string_args_len[arg]; \ + const void *filename_arg = (const void *)args->args[arg]; \ + if (filename_len <= sizeof(augmented_args->filename.value)) \ + len += augmented_filename__read(&augmented_args->filename, filename_arg, filename_len); +#define loop_iter_first() __loop_iter(0); } +#define loop_iter(arg) else __loop_iter(arg); } +#define loop_iter_last(arg) else __loop_iter(arg); __asm__ __volatile__("": : :"memory"); } + + loop_iter_first() + loop_iter(1) + loop_iter(2) + loop_iter(3) + loop_iter(4) + loop_iter_last(5) + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len); } SEC("raw_syscalls:sys_exit") diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index f7eb63cbbc65..88108598d6e9 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -45,10 +45,12 @@ static char jit_path[PATH_MAX]; static void *marker_addr; +#ifndef HAVE_GETTID static inline pid_t gettid(void) { return (pid_t)syscall(__NR_gettid); } +#endif static int get_e_machine(struct jitheader *hdr) { diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index aea7b1fe85aa..c441a34cb1c0 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> +#include <linux/string.h> #include <sys/types.h> #include <stdio.h> #include <string.h> @@ -162,8 +163,7 @@ copy_class_filename(const char * class_sign, const char * file_name, char * resu result[i] = '\0'; } else { /* fallback case */ - size_t file_name_len = strlen(file_name); - strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length); + strlcpy(result, file_name, max_length); } } diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh index 7e47a7cbc195..0b96545c8184 100644 --- a/tools/perf/perf-with-kcore.sh +++ b/tools/perf/perf-with-kcore.sh @@ -1,15 +1,8 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only # perf-with-kcore: use perf with a copy of kcore # Copyright (c) 2014, Intel Corporation. # -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. set -e @@ -111,11 +104,6 @@ fix_buildid_cache_permissions() USER_HOME=$(bash <<< "echo ~$SUDO_USER") - if [ "$HOME" != "$USER_HOME" ] ; then - echo "Fix unnecessary because root has a home: $HOME" >&2 - exit 1 - fi - echo "Fixing buildid cache permissions" find "$USER_HOME/.debug" -xdev -type d ! -user "$SUDO_USER" -ls -exec chown "$SUDO_USER" \{\} \; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 72df4b6fa36f..97e2628ea5dd 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -29,6 +29,7 @@ #include <sys/stat.h> #include <unistd.h> #include <linux/kernel.h> +#include <linux/zalloc.h> const char perf_usage_string[] = "perf [--version] [--help] [OPTIONS] COMMAND [ARGS]"; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index c59743def8d3..74d0124d38f3 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -26,7 +26,7 @@ static inline unsigned long long rdclock(void) } #ifndef MAX_NR_CPUS -#define MAX_NR_CPUS 1024 +#define MAX_NR_CPUS 2048 #endif extern const char *input_name; @@ -61,6 +61,8 @@ struct record_opts { bool record_switch_events; bool all_kernel; bool all_user; + bool kernel_callchains; + bool user_callchains; bool tail_synthesize; bool overwrite; bool ignore_missing_thread; @@ -85,6 +87,8 @@ struct record_opts { u64 clockid_res_ns; int nr_cblocks; int affinity; + int mmap_flush; + unsigned int comp_level; }; enum perf_affinity { diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a57-a72/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a57-a72/core-imp-def.json new file mode 100644 index 000000000000..0ac9b7927450 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a57-a72/core-imp-def.json @@ -0,0 +1,179 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_WR", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR", + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM", + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN", + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_RD", + }, + { + "ArchStdEvent": "L2D_CACHE_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD", + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM", + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN", + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL", + }, + { + "ArchStdEvent": "BUS_ACCESS_RD", + }, + { + "ArchStdEvent": "BUS_ACCESS_WR", + }, + { + "ArchStdEvent": "BUS_ACCESS_SHARED", + }, + { + "ArchStdEvent": "BUS_ACCESS_NOT_SHARED", + }, + { + "ArchStdEvent": "BUS_ACCESS_NORMAL", + }, + { + "ArchStdEvent": "BUS_ACCESS_PERIPH", + }, + { + "ArchStdEvent": "MEM_ACCESS_RD", + }, + { + "ArchStdEvent": "MEM_ACCESS_WR", + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC", + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC", + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC", + }, + { + "ArchStdEvent": "LDREX_SPEC", + }, + { + "ArchStdEvent": "STREX_PASS_SPEC", + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC", + }, + { + "ArchStdEvent": "LD_SPEC", + }, + { + "ArchStdEvent": "ST_SPEC", + }, + { + "ArchStdEvent": "LDST_SPEC", + }, + { + "ArchStdEvent": "DP_SPEC", + }, + { + "ArchStdEvent": "ASE_SPEC", + }, + { + "ArchStdEvent": "VFP_SPEC", + }, + { + "ArchStdEvent": "PC_WRITE_SPEC", + }, + { + "ArchStdEvent": "CRYPTO_SPEC", + }, + { + "ArchStdEvent": "BR_IMMED_SPEC", + }, + { + "ArchStdEvent": "BR_RETURN_SPEC", + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC", + }, + { + "ArchStdEvent": "ISB_SPEC", + }, + { + "ArchStdEvent": "DSB_SPEC", + }, + { + "ArchStdEvent": "DMB_SPEC", + }, + { + "ArchStdEvent": "EXC_UNDEF", + }, + { + "ArchStdEvent": "EXC_SVC", + }, + { + "ArchStdEvent": "EXC_PABORT", + }, + { + "ArchStdEvent": "EXC_DABORT", + }, + { + "ArchStdEvent": "EXC_IRQ", + }, + { + "ArchStdEvent": "EXC_FIQ", + }, + { + "ArchStdEvent": "EXC_SMC", + }, + { + "ArchStdEvent": "EXC_HVC", + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT", + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT", + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER", + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ", + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ", + }, + { + "ArchStdEvent": "RC_LD_SPEC", + }, + { + "ArchStdEvent": "RC_ST_SPEC", + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json new file mode 100644 index 000000000000..0d1556fcdffe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json @@ -0,0 +1,44 @@ +[ + { + "EventCode": "0x02", + "EventName": "uncore_hisi_ddrc.flux_wcmd", + "BriefDescription": "DDRC write commands", + "PublicDescription": "DDRC write commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x03", + "EventName": "uncore_hisi_ddrc.flux_rcmd", + "BriefDescription": "DDRC read commands", + "PublicDescription": "DDRC read commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x04", + "EventName": "uncore_hisi_ddrc.flux_wr", + "BriefDescription": "DDRC precharge commands", + "PublicDescription": "DDRC precharge commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x05", + "EventName": "uncore_hisi_ddrc.act_cmd", + "BriefDescription": "DDRC active commands", + "PublicDescription": "DDRC active commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x06", + "EventName": "uncore_hisi_ddrc.rnk_chg", + "BriefDescription": "DDRC rank commands", + "PublicDescription": "DDRC rank commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x07", + "EventName": "uncore_hisi_ddrc.rw_chg", + "BriefDescription": "DDRC read and write changes", + "PublicDescription": "DDRC read and write changes", + "Unit": "hisi_sccl,ddrc", + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json new file mode 100644 index 000000000000..447d3064de90 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json @@ -0,0 +1,51 @@ +[ + { + "EventCode": "0x00", + "EventName": "uncore_hisi_hha.rx_ops_num", + "BriefDescription": "The number of all operations received by the HHA", + "PublicDescription": "The number of all operations received by the HHA", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x01", + "EventName": "uncore_hisi_hha.rx_outer", + "BriefDescription": "The number of all operations received by the HHA from another socket", + "PublicDescription": "The number of all operations received by the HHA from another socket", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x02", + "EventName": "uncore_hisi_hha.rx_sccl", + "BriefDescription": "The number of all operations received by the HHA from another SCCL in this socket", + "PublicDescription": "The number of all operations received by the HHA from another SCCL in this socket", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1c", + "EventName": "uncore_hisi_hha.rd_ddr_64b", + "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 64 bytes", + "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 64bytes", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1d", + "EventName": "uncore_hisi_hha.wr_dr_64b", + "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", + "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1e", + "EventName": "uncore_hisi_hha.rd_ddr_128b", + "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", + "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1f", + "EventName": "uncore_hisi_hha.wr_ddr_128b", + "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", + "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", + "Unit": "hisi_sccl,hha", + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json new file mode 100644 index 000000000000..ca48747642e1 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json @@ -0,0 +1,37 @@ +[ + { + "EventCode": "0x00", + "EventName": "uncore_hisi_l3c.rd_cpipe", + "BriefDescription": "Total read accesses", + "PublicDescription": "Total read accesses", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x01", + "EventName": "uncore_hisi_l3c.wr_cpipe", + "BriefDescription": "Total write accesses", + "PublicDescription": "Total write accesses", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x02", + "EventName": "uncore_hisi_l3c.rd_hit_cpipe", + "BriefDescription": "Total read hits", + "PublicDescription": "Total read hits", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x03", + "EventName": "uncore_hisi_l3c.wr_hit_cpipe", + "BriefDescription": "Total write hits", + "PublicDescription": "Total write hits", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x04", + "EventName": "uncore_hisi_l3c.victim_num", + "BriefDescription": "l3c precharge commands", + "PublicDescription": "l3c precharge commands", + "Unit": "hisi_sccl,l3c", + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 59cd8604b0bd..927fcddcb4aa 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -12,7 +12,10 @@ # # #Family-model,Version,Filename,EventType -0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core +0x00000000410fd030,v1,arm/cortex-a53,core +0x00000000420f1000,v1,arm/cortex-a53,core +0x00000000410fd070,v1,arm/cortex-a57-a72,core +0x00000000410fd080,v1,arm/cortex-a57-a72,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000480fd010,v1,hisilicon/hip08,core diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json index e7a3524b748f..68618152ea2c 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json @@ -4,7 +4,7 @@ "EventCode": "128", "EventName": "L1D_RO_EXCL_WRITES", "BriefDescription": "L1D Read-only Exclusive Writes", - "PublicDescription": "Counter:128 Name:L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line" + "PublicDescription": "L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line" }, { "Unit": "CPU-M-CF", diff --git a/tools/perf/pmu-events/arch/x86/bonnell/frontend.json b/tools/perf/pmu-events/arch/x86/bonnell/frontend.json index 935b7dcf067d..ef69540ab61d 100644 --- a/tools/perf/pmu-events/arch/x86/bonnell/frontend.json +++ b/tools/perf/pmu-events/arch/x86/bonnell/frontend.json @@ -77,7 +77,7 @@ "UMask": "0x1", "EventName": "UOPS.MS_CYCLES", "SampleAfterValue": "2000000", - "BriefDescription": "This event counts the cycles where 1 or more uops are issued by the micro-sequencer (MS), including microcode assists and inserted flows, and written to the IQ. ", + "BriefDescription": "This event counts the cycles where 1 or more uops are issued by the micro-sequencer (MS), including microcode assists and inserted flows, and written to the IQ.", "CounterMask": "1" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json b/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json index b2e681c78466..09c6de13de20 100644 --- a/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json @@ -189,7 +189,7 @@ "UMask": "0x8", "EventName": "BR_MISSP_TYPE_RETIRED.IND_CALL", "SampleAfterValue": "200000", - "BriefDescription": "Mispredicted indirect calls, including both register and memory indirect. " + "BriefDescription": "Mispredicted indirect calls, including both register and memory indirect." }, { "EventCode": "0x89", diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json index 00bfdb5c5acb..212b117a8ffb 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json @@ -1,164 +1,352 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", + "MetricGroup": "Branches;PGO", + "MetricName": "IpTB" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTB" + }, + { "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", - "MetricGroup": "Frontend", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means loads are more frequent)", + "MetricGroup": "Instruction_Type;L1_Bound", + "MetricName": "IpL" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store", + "MetricGroup": "Instruction_Type;Store_Bound", + "MetricName": "IpS" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch", + "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6", + "MetricName": "IpB" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instruction per (near) call", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS", + "MetricName": "FLOPc" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS_SMT", + "MetricName": "FLOPc_SMT" + }, + { + "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)", - "MetricGroup": "Unknown_Branches", - "MetricName": "BAClear_Cost" + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "Branch_Misprediction_Cost" }, { + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts_SMT", + "MetricName": "Branch_Misprediction_Cost_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "IpMispredict" + }, + { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { + "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ( DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED ) ) / cycles", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, { - "BriefDescription": "Average CPU Utilization", + "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ( DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED ) ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricGroup": "TLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, + { + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI_All" + }, + { + "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2HPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/broadwell/cache.json b/tools/perf/pmu-events/arch/x86/broadwell/cache.json index 0b080b0352d8..7938bf5689ab 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/cache.json @@ -56,10 +56,10 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x41", + "UMask": "0xc1", "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", "SampleAfterValue": "200003", "BriefDescription": "Demand Data Read requests that hit L2 cache", @@ -68,7 +68,7 @@ { "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x42", + "UMask": "0xc2", "EventName": "L2_RQSTS.RFO_HIT", "SampleAfterValue": "200003", "BriefDescription": "RFO requests that hit L2 cache.", @@ -77,7 +77,7 @@ { "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x44", + "UMask": "0xc4", "EventName": "L2_RQSTS.CODE_RD_HIT", "SampleAfterValue": "200003", "BriefDescription": "L2 cache hits when fetching instructions, code reads.", @@ -87,7 +87,7 @@ "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x50", + "UMask": "0xd0", "EventName": "L2_RQSTS.L2_PF_HIT", "SampleAfterValue": "200003", "BriefDescription": "L2 prefetch requests that hit L2 cache", @@ -433,7 +433,7 @@ }, { "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x41", @@ -445,7 +445,7 @@ }, { "PEBS": "1", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x42", @@ -771,2628 +771,2628 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010001 ", + "MSRValue": "0x0000010001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that have any response type.", + "BriefDescription": "Counts demand data reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020001 ", + "MSRValue": "0x0080020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020001 ", + "MSRValue": "0x0100020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020001 ", + "MSRValue": "0x0200020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020001 ", + "MSRValue": "0x0400020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020001 ", + "MSRValue": "0x1000020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020001 ", + "MSRValue": "0x3F80020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0001 ", + "MSRValue": "0x00803C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0001 ", + "MSRValue": "0x01003C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0001 ", + "MSRValue": "0x02003C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0001 ", + "MSRValue": "0x04003C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0001 ", + "MSRValue": "0x10003C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0001 ", + "MSRValue": "0x3F803C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010002 ", + "MSRValue": "0x0000010002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.", + "BriefDescription": "Counts all demand data writes (RFOs) have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0002 ", + "MSRValue": "0x00803C0002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0002 ", + "MSRValue": "0x01003C0002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0002 ", + "MSRValue": "0x02003C0002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0002 ", + "MSRValue": "0x04003C0002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0002 ", + "MSRValue": "0x10003C0002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0002 ", + "MSRValue": "0x3F803C0002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010004 ", + "MSRValue": "0x0000010004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that have any response type.", + "BriefDescription": "Counts all demand code reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020004 ", + "MSRValue": "0x0080020004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020004 ", + "MSRValue": "0x0100020004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020004 ", + "MSRValue": "0x0200020004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020004 ", + "MSRValue": "0x0400020004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020004 ", + "MSRValue": "0x1000020004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020004 ", + "MSRValue": "0x3F80020004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0004 ", + "MSRValue": "0x00803C0004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0004 ", + "MSRValue": "0x01003C0004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0004 ", + "MSRValue": "0x02003C0004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0004 ", + "MSRValue": "0x04003C0004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0004 ", + "MSRValue": "0x10003C0004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0004 ", + "MSRValue": "0x3F803C0004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3.", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts writebacks (modified to exclusive) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive) have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010008 ", + "MSRValue": "0x0000010008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts writebacks (modified to exclusive) that have any response type.", + "BriefDescription": "Counts writebacks (modified to exclusive) have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020008 ", + "MSRValue": "0x0080020008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020008 ", + "MSRValue": "0x0100020008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020008 ", + "MSRValue": "0x0200020008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020008 ", + "MSRValue": "0x0400020008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020008 ", + "MSRValue": "0x1000020008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020008 ", + "MSRValue": "0x3F80020008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0008 ", + "MSRValue": "0x00803C0008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0008 ", + "MSRValue": "0x01003C0008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0008 ", + "MSRValue": "0x02003C0008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0008 ", + "MSRValue": "0x04003C0008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0008 ", + "MSRValue": "0x10003C0008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0008 ", + "MSRValue": "0x3F803C0008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3.", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010010 ", + "MSRValue": "0x0000010010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020010 ", + "MSRValue": "0x0080020010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020010 ", + "MSRValue": "0x0100020010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020010 ", + "MSRValue": "0x0200020010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020010 ", + "MSRValue": "0x0400020010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020010 ", + "MSRValue": "0x1000020010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020010 ", + "MSRValue": "0x3F80020010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0010 ", + "MSRValue": "0x00803C0010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0010 ", + "MSRValue": "0x01003C0010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0010 ", + "MSRValue": "0x02003C0010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0010 ", + "MSRValue": "0x04003C0010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0010 ", + "MSRValue": "0x10003C0010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0010 ", + "MSRValue": "0x3F803C0010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010020 ", + "MSRValue": "0x0000010020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020020 ", + "MSRValue": "0x0080020020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020020 ", + "MSRValue": "0x0100020020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020020 ", + "MSRValue": "0x0200020020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020020 ", + "MSRValue": "0x0400020020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020020 ", + "MSRValue": "0x1000020020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020020 ", + "MSRValue": "0x3F80020020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0020 ", + "MSRValue": "0x00803C0020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0020 ", + "MSRValue": "0x01003C0020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0020 ", + "MSRValue": "0x02003C0020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0020 ", + "MSRValue": "0x04003C0020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0020 ", + "MSRValue": "0x10003C0020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0020 ", + "MSRValue": "0x3F803C0020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010040 ", + "MSRValue": "0x0000010040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020040 ", + "MSRValue": "0x0080020040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020040 ", + "MSRValue": "0x0100020040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020040 ", + "MSRValue": "0x0200020040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020040 ", + "MSRValue": "0x0400020040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020040 ", + "MSRValue": "0x1000020040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020040 ", + "MSRValue": "0x3F80020040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0040 ", + "MSRValue": "0x00803C0040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0040 ", + "MSRValue": "0x01003C0040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0040 ", + "MSRValue": "0x02003C0040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0040 ", + "MSRValue": "0x04003C0040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0040 ", + "MSRValue": "0x10003C0040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0040 ", + "MSRValue": "0x3F803C0040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010080 ", + "MSRValue": "0x0000010080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020080 ", + "MSRValue": "0x0080020080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020080 ", + "MSRValue": "0x0100020080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020080 ", + "MSRValue": "0x0200020080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020080 ", + "MSRValue": "0x0400020080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020080 ", + "MSRValue": "0x1000020080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020080 ", + "MSRValue": "0x3F80020080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0080 ", + "MSRValue": "0x00803C0080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0080 ", + "MSRValue": "0x01003C0080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0080 ", + "MSRValue": "0x02003C0080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0080 ", + "MSRValue": "0x04003C0080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0080 ", + "MSRValue": "0x10003C0080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0080 ", + "MSRValue": "0x3F803C0080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010100 ", + "MSRValue": "0x0000010100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020100 ", + "MSRValue": "0x0080020100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020100 ", + "MSRValue": "0x0100020100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020100 ", + "MSRValue": "0x0200020100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020100 ", + "MSRValue": "0x0400020100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020100 ", + "MSRValue": "0x1000020100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020100 ", + "MSRValue": "0x3F80020100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0100 ", + "MSRValue": "0x00803C0100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0100 ", + "MSRValue": "0x01003C0100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0100 ", + "MSRValue": "0x02003C0100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0100 ", + "MSRValue": "0x04003C0100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0100 ", + "MSRValue": "0x10003C0100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0100 ", + "MSRValue": "0x3F803C0100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010200 ", + "MSRValue": "0x0000010200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type.", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020200 ", + "MSRValue": "0x0080020200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020200 ", + "MSRValue": "0x0100020200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020200 ", + "MSRValue": "0x0200020200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020200 ", + "MSRValue": "0x0400020200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020200 ", + "MSRValue": "0x1000020200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020200 ", + "MSRValue": "0x3F80020200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0200 ", + "MSRValue": "0x00803C0200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0200 ", + "MSRValue": "0x01003C0200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0200 ", + "MSRValue": "0x02003C0200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0200 ", + "MSRValue": "0x04003C0200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0200 ", + "MSRValue": "0x10003C0200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0200 ", + "MSRValue": "0x3F803C0200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3.", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000018000 ", + "MSRValue": "0x0000018000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that have any response type.", + "BriefDescription": "Counts any other requests have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080028000 ", + "MSRValue": "0x0080028000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100028000 ", + "MSRValue": "0x0100028000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200028000 ", + "MSRValue": "0x0200028000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400028000 ", + "MSRValue": "0x0400028000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000028000 ", + "MSRValue": "0x1000028000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80028000 ", + "MSRValue": "0x3F80028000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c8000 ", + "MSRValue": "0x00803C8000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c8000 ", + "MSRValue": "0x01003C8000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts any other requests that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c8000 ", + "MSRValue": "0x02003C8000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c8000 ", + "MSRValue": "0x04003C8000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c8000 ", + "MSRValue": "0x10003C8000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c8000 ", + "MSRValue": "0x3F803C8000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3.", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010090 ", + "MSRValue": "0x0000010090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that have any response type.", + "BriefDescription": "Counts all prefetch data reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020090 ", + "MSRValue": "0x0080020090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020090 ", + "MSRValue": "0x0100020090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020090 ", + "MSRValue": "0x0200020090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020090 ", + "MSRValue": "0x0400020090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020090 ", + "MSRValue": "0x1000020090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020090 ", + "MSRValue": "0x3F80020090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0090 ", + "MSRValue": "0x00803C0090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0090 ", + "MSRValue": "0x01003C0090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0090 ", + "MSRValue": "0x02003C0090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0090 ", + "MSRValue": "0x04003C0090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0090 ", + "MSRValue": "0x10003C0090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0090 ", + "MSRValue": "0x3F803C0090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that hit in the L3.", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010120 ", + "MSRValue": "0x0000010120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch RFOs that have any response type.", + "BriefDescription": "Counts prefetch RFOs have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020120 ", + "MSRValue": "0x0080020120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020120 ", + "MSRValue": "0x0100020120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020120 ", + "MSRValue": "0x0200020120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020120 ", + "MSRValue": "0x0400020120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020120 ", + "MSRValue": "0x1000020120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020120 ", + "MSRValue": "0x3F80020120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0120 ", + "MSRValue": "0x00803C0120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0120 ", + "MSRValue": "0x01003C0120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0120 ", + "MSRValue": "0x02003C0120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0120 ", + "MSRValue": "0x04003C0120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0120 ", + "MSRValue": "0x10003C0120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0120 ", + "MSRValue": "0x3F803C0120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch RFOs that hit in the L3.", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010240 ", + "MSRValue": "0x0000010240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch code reads that have any response type.", + "BriefDescription": "Counts all prefetch code reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020240 ", + "MSRValue": "0x0080020240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020240 ", + "MSRValue": "0x0100020240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020240 ", + "MSRValue": "0x0200020240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020240 ", + "MSRValue": "0x0400020240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020240 ", + "MSRValue": "0x1000020240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020240 ", + "MSRValue": "0x3F80020240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0240 ", + "MSRValue": "0x00803C0240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0240 ", + "MSRValue": "0x01003C0240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0240 ", + "MSRValue": "0x02003C0240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0240 ", + "MSRValue": "0x04003C0240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0240 ", + "MSRValue": "0x10003C0240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0240 ", + "MSRValue": "0x3F803C0240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch code reads that hit in the L3.", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010091 ", + "MSRValue": "0x0000010091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that have any response type.", + "BriefDescription": "Counts all demand & prefetch data reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020091 ", + "MSRValue": "0x0080020091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020091 ", + "MSRValue": "0x0100020091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020091 ", + "MSRValue": "0x0200020091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020091 ", + "MSRValue": "0x0400020091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020091 ", + "MSRValue": "0x1000020091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020091 ", + "MSRValue": "0x3F80020091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0091 ", + "MSRValue": "0x00803C0091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0091 ", + "MSRValue": "0x01003C0091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0091 ", + "MSRValue": "0x02003C0091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0091 ", + "MSRValue": "0x04003C0091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0091 ", + "MSRValue": "0x10003C0091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0091 ", + "MSRValue": "0x3F803C0091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010122 ", + "MSRValue": "0x0000010122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.", + "BriefDescription": "Counts all demand & prefetch RFOs have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020122 ", + "MSRValue": "0x0080020122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020122 ", + "MSRValue": "0x0100020122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020122 ", + "MSRValue": "0x0200020122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020122 ", + "MSRValue": "0x0400020122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020122 ", + "MSRValue": "0x1000020122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f80020122 ", + "MSRValue": "0x3F80020122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00803c0122 ", + "MSRValue": "0x00803C0122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01003c0122 ", + "MSRValue": "0x01003C0122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02003c0122 ", + "MSRValue": "0x02003C0122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response.", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0122 ", + "MSRValue": "0x04003C0122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0122 ", + "MSRValue": "0x10003C0122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0122 ", + "MSRValue": "0x3F803C0122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json index 689d478dae93..15291239c128 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json @@ -1,24 +1,26 @@ [ { - "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x8", "Errata": "BDM30", "EventName": "OTHER_ASSISTS.AVX_TO_SSE", "SampleAfterValue": "100003", - "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.", + "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable (Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x10", "Errata": "BDM30", "EventName": "OTHER_ASSISTS.SSE_TO_AVX", "SampleAfterValue": "100003", - "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.", + "BriefDescription": "Number of transitions from legacy SSE to AVX-256 when penalty applicable (Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -45,7 +47,7 @@ "UMask": "0x3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. (RSQRT for single precision?)", "CounterHTOff": "0,1,2,3" }, { @@ -54,7 +56,7 @@ "UMask": "0x4", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "CounterHTOff": "0,1,2,3" }, { @@ -63,7 +65,7 @@ "UMask": "0x8", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "CounterHTOff": "0,1,2,3" }, { @@ -72,7 +74,7 @@ "UMask": "0x10", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "CounterHTOff": "0,1,2,3" }, { @@ -81,7 +83,7 @@ "UMask": "0x15", "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", "SampleAfterValue": "2000006", - "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "CounterHTOff": "0,1,2,3" }, { @@ -90,7 +92,7 @@ "UMask": "0x20", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "CounterHTOff": "0,1,2,3" }, { @@ -99,7 +101,7 @@ "UMask": "0x2a", "EventName": "FP_ARITH_INST_RETIRED.SINGLE", "SampleAfterValue": "2000005", - "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "CounterHTOff": "0,1,2,3" }, { @@ -108,57 +110,62 @@ "UMask": "0x3c", "EventName": "FP_ARITH_INST_RETIRED.PACKED", "SampleAfterValue": "2000004", - "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. (RSQRT for single-precision?)", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "FP_ASSIST.X87_OUTPUT", "SampleAfterValue": "100003", - "BriefDescription": "Number of X87 assists due to output value.", + "BriefDescription": "output - Numeric Overflow, Numeric Underflow, Inexact Result (Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "FP_ASSIST.X87_INPUT", "SampleAfterValue": "100003", - "BriefDescription": "Number of X87 assists due to input value.", + "BriefDescription": "input - Invalid Operation, Denormal Operand, SNaN Operand (Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "FP_ASSIST.SIMD_OUTPUT", "SampleAfterValue": "100003", - "BriefDescription": "Number of SIMD FP assists due to Output values", + "BriefDescription": "SSE* FP micro-code assist when output value is invalid. (Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts any input SSE* floating-point (FP) assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "FP_ASSIST.SIMD_INPUT", "SampleAfterValue": "100003", - "BriefDescription": "Number of SIMD FP assists due to input values", + "BriefDescription": "Any input SSE* FP Assist - (Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", + "PEBS": "1", + "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1. Uses PEBS.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x1e", "EventName": "FP_ASSIST.ANY", "SampleAfterValue": "100003", - "BriefDescription": "Cycles with any input/output SSE or FP assist", + "BriefDescription": "Counts any FP_ASSIST umask was incrementing (Precise Event)", "CounterMask": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json index 7142c76d7f11..aa4a5d762f21 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json @@ -211,7 +211,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -274,7 +274,7 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", + "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", "EventCode": "0xAB", "Counter": "0,1,2,3", "UMask": "0x2", diff --git a/tools/perf/pmu-events/arch/x86/broadwell/memory.json b/tools/perf/pmu-events/arch/x86/broadwell/memory.json index c9154cebbdf0..b6b5247d3d5a 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/memory.json @@ -311,7 +311,7 @@ }, { "PEBS": "2", - "PublicDescription": "This event counts loads with latency value being above four.", + "PublicDescription": "Counts randomly selected loads with latency value being above four.", "EventCode": "0xCD", "MSRValue": "0x4", "Counter": "3", @@ -320,13 +320,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "SampleAfterValue": "100003", - "BriefDescription": "Loads with latency value being above 4", + "BriefDescription": "Randomly selected loads with latency value being above 4", "TakenAlone": "1", "CounterHTOff": "3" }, { "PEBS": "2", - "PublicDescription": "This event counts loads with latency value being above eight.", + "PublicDescription": "Counts randomly selected loads with latency value being above eight.", "EventCode": "0xCD", "MSRValue": "0x8", "Counter": "3", @@ -335,13 +335,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "SampleAfterValue": "50021", - "BriefDescription": "Loads with latency value being above 8", + "BriefDescription": "Randomly selected loads with latency value being above 8", "TakenAlone": "1", "CounterHTOff": "3" }, { "PEBS": "2", - "PublicDescription": "This event counts loads with latency value being above 16.", + "PublicDescription": "Counts randomly selected loads with latency value being above 16.", "EventCode": "0xCD", "MSRValue": "0x10", "Counter": "3", @@ -350,13 +350,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "SampleAfterValue": "20011", - "BriefDescription": "Loads with latency value being above 16", + "BriefDescription": "Randomly selected loads with latency value being above 16", "TakenAlone": "1", "CounterHTOff": "3" }, { "PEBS": "2", - "PublicDescription": "This event counts loads with latency value being above 32.", + "PublicDescription": "Counts randomly selected loads with latency value being above 32.", "EventCode": "0xCD", "MSRValue": "0x20", "Counter": "3", @@ -365,13 +365,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "SampleAfterValue": "100007", - "BriefDescription": "Loads with latency value being above 32", + "BriefDescription": "Randomly selected loads with latency value being above 32", "TakenAlone": "1", "CounterHTOff": "3" }, { "PEBS": "2", - "PublicDescription": "This event counts loads with latency value being above 64.", + "PublicDescription": "Counts randomly selected loads with latency value being above 64.", "EventCode": "0xCD", "MSRValue": "0x40", "Counter": "3", @@ -380,13 +380,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "SampleAfterValue": "2003", - "BriefDescription": "Loads with latency value being above 64", + "BriefDescription": "Randomly selected loads with latency value being above 64", "TakenAlone": "1", "CounterHTOff": "3" }, { "PEBS": "2", - "PublicDescription": "This event counts loads with latency value being above 128.", + "PublicDescription": "Counts randomly selected loads with latency value being above 128.", "EventCode": "0xCD", "MSRValue": "0x80", "Counter": "3", @@ -395,13 +395,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "SampleAfterValue": "1009", - "BriefDescription": "Loads with latency value being above 128", + "BriefDescription": "Randomly selected loads with latency value being above 128", "TakenAlone": "1", "CounterHTOff": "3" }, { "PEBS": "2", - "PublicDescription": "This event counts loads with latency value being above 256.", + "PublicDescription": "Counts randomly selected loads with latency value being above 256.", "EventCode": "0xCD", "MSRValue": "0x100", "Counter": "3", @@ -410,13 +410,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "SampleAfterValue": "503", - "BriefDescription": "Loads with latency value being above 256", + "BriefDescription": "Randomly selected loads with latency value being above 256", "TakenAlone": "1", "CounterHTOff": "3" }, { "PEBS": "2", - "PublicDescription": "This event counts loads with latency value being above 512.", + "PublicDescription": "Counts randomly selected loads with latency value being above 512.", "EventCode": "0xCD", "MSRValue": "0x200", "Counter": "3", @@ -425,2620 +425,2620 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "SampleAfterValue": "101", - "BriefDescription": "Loads with latency value being above 512", + "BriefDescription": "Randomly selected loads with latency value being above 512", "TakenAlone": "1", "CounterHTOff": "3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020001 ", + "MSRValue": "0x2000020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0001 ", + "MSRValue": "0x20003C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000001 ", + "MSRValue": "0x0084000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000001 ", + "MSRValue": "0x0104000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000001 ", + "MSRValue": "0x0204000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000001 ", + "MSRValue": "0x0404000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000001 ", + "MSRValue": "0x1004000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000001 ", + "MSRValue": "0x2004000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000001 ", + "MSRValue": "0x3F84000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000001 ", + "MSRValue": "0x00BC000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000001 ", + "MSRValue": "0x013C000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000001 ", + "MSRValue": "0x023C000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000001 ", + "MSRValue": "0x043C000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0002 ", + "MSRValue": "0x20003C0002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000002 ", + "MSRValue": "0x3F84000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000002 ", + "MSRValue": "0x00BC000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000002 ", + "MSRValue": "0x013C000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000002 ", + "MSRValue": "0x023C000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000002 ", + "MSRValue": "0x043C000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020004 ", + "MSRValue": "0x2000020004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0004 ", + "MSRValue": "0x20003C0004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000004 ", + "MSRValue": "0x0084000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000004 ", + "MSRValue": "0x0104000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000004 ", + "MSRValue": "0x0204000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000004 ", + "MSRValue": "0x0404000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000004 ", + "MSRValue": "0x1004000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000004 ", + "MSRValue": "0x2004000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000004 ", + "MSRValue": "0x3F84000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000004 ", + "MSRValue": "0x00BC000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000004 ", + "MSRValue": "0x013C000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000004 ", + "MSRValue": "0x023C000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000004 ", + "MSRValue": "0x043C000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020008 ", + "MSRValue": "0x2000020008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0008 ", + "MSRValue": "0x20003C0008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000008 ", + "MSRValue": "0x0084000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000008 ", + "MSRValue": "0x0104000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000008 ", + "MSRValue": "0x0204000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000008 ", + "MSRValue": "0x0404000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000008 ", + "MSRValue": "0x1004000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000008 ", + "MSRValue": "0x2004000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000008 ", + "MSRValue": "0x3F84000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000008 ", + "MSRValue": "0x00BC000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000008 ", + "MSRValue": "0x013C000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000008 ", + "MSRValue": "0x023C000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts writebacks (modified to exclusive)", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000008 ", + "MSRValue": "0x043C000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "COREWB & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts writebacks (modified to exclusive)", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020010 ", + "MSRValue": "0x2000020010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0010 ", + "MSRValue": "0x20003C0010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000010 ", + "MSRValue": "0x0084000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000010 ", + "MSRValue": "0x0104000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000010 ", + "MSRValue": "0x0204000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000010 ", + "MSRValue": "0x0404000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000010 ", + "MSRValue": "0x1004000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000010 ", + "MSRValue": "0x2004000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000010 ", + "MSRValue": "0x3F84000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000010 ", + "MSRValue": "0x00BC000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000010 ", + "MSRValue": "0x013C000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000010 ", + "MSRValue": "0x023C000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000010 ", + "MSRValue": "0x043C000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020020 ", + "MSRValue": "0x2000020020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0020 ", + "MSRValue": "0x20003C0020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000020 ", + "MSRValue": "0x0084000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000020 ", + "MSRValue": "0x0104000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000020 ", + "MSRValue": "0x0204000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000020 ", + "MSRValue": "0x0404000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000020 ", + "MSRValue": "0x1004000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000020 ", + "MSRValue": "0x2004000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000020 ", + "MSRValue": "0x3F84000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000020 ", + "MSRValue": "0x00BC000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000020 ", + "MSRValue": "0x013C000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000020 ", + "MSRValue": "0x023C000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000020 ", + "MSRValue": "0x043C000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_RFO & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020040 ", + "MSRValue": "0x2000020040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0040 ", + "MSRValue": "0x20003C0040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000040 ", + "MSRValue": "0x0084000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000040 ", + "MSRValue": "0x0104000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000040 ", + "MSRValue": "0x0204000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000040 ", + "MSRValue": "0x0404000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000040 ", + "MSRValue": "0x1004000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000040 ", + "MSRValue": "0x2004000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000040 ", + "MSRValue": "0x3F84000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000040 ", + "MSRValue": "0x00BC000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000040 ", + "MSRValue": "0x013C000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000040 ", + "MSRValue": "0x023C000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000040 ", + "MSRValue": "0x043C000040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L2_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020080 ", + "MSRValue": "0x2000020080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0080 ", + "MSRValue": "0x20003C0080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000080 ", + "MSRValue": "0x0084000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000080 ", + "MSRValue": "0x0104000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000080 ", + "MSRValue": "0x0204000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000080 ", + "MSRValue": "0x0404000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000080 ", + "MSRValue": "0x1004000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000080 ", + "MSRValue": "0x2004000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000080 ", + "MSRValue": "0x3F84000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000080 ", + "MSRValue": "0x00BC000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000080 ", + "MSRValue": "0x013C000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000080 ", + "MSRValue": "0x023C000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000080 ", + "MSRValue": "0x043C000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020100 ", + "MSRValue": "0x2000020100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0100 ", + "MSRValue": "0x20003C0100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000100 ", + "MSRValue": "0x0084000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000100 ", + "MSRValue": "0x0104000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000100 ", + "MSRValue": "0x0204000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000100 ", + "MSRValue": "0x0404000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000100 ", + "MSRValue": "0x1004000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000100 ", + "MSRValue": "0x2004000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000100 ", + "MSRValue": "0x3F84000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000100 ", + "MSRValue": "0x00BC000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000100 ", + "MSRValue": "0x013C000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000100 ", + "MSRValue": "0x023C000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000100 ", + "MSRValue": "0x043C000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020200 ", + "MSRValue": "0x2000020200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0200 ", + "MSRValue": "0x20003C0200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000200 ", + "MSRValue": "0x0084000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000200 ", + "MSRValue": "0x0104000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000200 ", + "MSRValue": "0x0204000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000200 ", + "MSRValue": "0x0404000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000200 ", + "MSRValue": "0x1004000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000200 ", + "MSRValue": "0x2004000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000200 ", + "MSRValue": "0x3F84000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000200 ", + "MSRValue": "0x00BC000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000200 ", + "MSRValue": "0x013C000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000200 ", + "MSRValue": "0x023C000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000200 ", + "MSRValue": "0x043C000200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000028000 ", + "MSRValue": "0x2000028000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c8000 ", + "MSRValue": "0x20003C8000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084008000 ", + "MSRValue": "0x0084008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104008000 ", + "MSRValue": "0x0104008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204008000 ", + "MSRValue": "0x0204008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404008000 ", + "MSRValue": "0x0404008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004008000 ", + "MSRValue": "0x1004008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004008000 ", + "MSRValue": "0x2004008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84008000 ", + "MSRValue": "0x3F84008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts any other requests that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc008000 ", + "MSRValue": "0x00BC008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c008000 ", + "MSRValue": "0x013C008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts any other requests that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c008000 ", + "MSRValue": "0x023C008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c008000 ", + "MSRValue": "0x043C008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts any other requests", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020090 ", + "MSRValue": "0x2000020090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0090 ", + "MSRValue": "0x20003C0090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000090 ", + "MSRValue": "0x0084000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000090 ", + "MSRValue": "0x0104000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000090 ", + "MSRValue": "0x0204000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000090 ", + "MSRValue": "0x0404000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000090 ", + "MSRValue": "0x1004000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000090 ", + "MSRValue": "0x2004000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000090 ", + "MSRValue": "0x3F84000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000090 ", + "MSRValue": "0x00BC000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000090 ", + "MSRValue": "0x013C000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000090 ", + "MSRValue": "0x023C000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000090 ", + "MSRValue": "0x043C000090", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020120 ", + "MSRValue": "0x2000020120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0120 ", + "MSRValue": "0x20003C0120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000120 ", + "MSRValue": "0x0084000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000120 ", + "MSRValue": "0x0104000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000120 ", + "MSRValue": "0x0204000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000120 ", + "MSRValue": "0x0404000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000120 ", + "MSRValue": "0x1004000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000120 ", + "MSRValue": "0x2004000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000120 ", + "MSRValue": "0x3F84000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000120 ", + "MSRValue": "0x00BC000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000120 ", + "MSRValue": "0x013C000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000120 ", + "MSRValue": "0x023C000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000120 ", + "MSRValue": "0x043C000120", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020240 ", + "MSRValue": "0x2000020240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0240 ", + "MSRValue": "0x20003C0240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000240 ", + "MSRValue": "0x0084000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000240 ", + "MSRValue": "0x0104000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000240 ", + "MSRValue": "0x0204000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000240 ", + "MSRValue": "0x0404000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000240 ", + "MSRValue": "0x1004000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000240 ", + "MSRValue": "0x2004000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000240 ", + "MSRValue": "0x3F84000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000240 ", + "MSRValue": "0x00BC000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000240 ", + "MSRValue": "0x013C000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000240 ", + "MSRValue": "0x023C000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch code reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000240 ", + "MSRValue": "0x043C000240", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_PF_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all prefetch code reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020091 ", + "MSRValue": "0x2000020091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0091 ", + "MSRValue": "0x20003C0091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000091 ", + "MSRValue": "0x0084000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000091 ", + "MSRValue": "0x0104000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000091 ", + "MSRValue": "0x0204000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000091 ", + "MSRValue": "0x0404000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000091 ", + "MSRValue": "0x1004000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000091 ", + "MSRValue": "0x2004000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000091 ", + "MSRValue": "0x3F84000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000091 ", + "MSRValue": "0x00BC000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000091 ", + "MSRValue": "0x013C000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000091 ", + "MSRValue": "0x023C000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000091 ", + "MSRValue": "0x043C000091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand & prefetch data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020122 ", + "MSRValue": "0x2000020122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20003c0122 ", + "MSRValue": "0x20003C0122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address.", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000122 ", + "MSRValue": "0x0084000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000122 ", + "MSRValue": "0x0104000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000122 ", + "MSRValue": "0x0204000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000122 ", + "MSRValue": "0x0404000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000122 ", + "MSRValue": "0x1004000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000122 ", + "MSRValue": "0x2004000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f84000122 ", + "MSRValue": "0x3F84000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000122 ", + "MSRValue": "0x00BC000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information.", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000122 ", + "MSRValue": "0x013C000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000122 ", + "MSRValue": "0x023C000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response.", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000122 ", + "MSRValue": "0x043C000122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts all demand & prefetch RFOs", "Offcore": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json index 999cf3066363..bb25574b8d21 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json @@ -1,7 +1,6 @@ [ { "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", - "EventCode": "0x00", "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", @@ -11,7 +10,6 @@ }, { "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", @@ -20,7 +18,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "AnyThread": "1", @@ -31,7 +28,6 @@ }, { "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", - "EventCode": "0x00", "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", @@ -317,7 +313,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts stalls occurred due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", + "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", "EventCode": "0x87", "Counter": "0,1,2,3", "UMask": "0x1", @@ -786,8 +782,8 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "This event counts resource-related stall cycles.", + "EventCode": "0xa2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "RESOURCE_STALLS.ANY", @@ -973,6 +969,7 @@ "CounterHTOff": "2" }, { + "PublicDescription": "Number of Uops delivered by the LSD.", "EventCode": "0xA8", "Counter": "0,1,2,3", "UMask": "0x1", @@ -1147,7 +1144,8 @@ "CounterHTOff": "1" }, { - "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", "EventCode": "0xC0", "Counter": "0,1,2,3", "UMask": "0x2", @@ -1157,12 +1155,12 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PEBS": "1", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x40", "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", "SampleAfterValue": "100003", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -1178,26 +1176,28 @@ "Data_LA": "1" }, { - "PublicDescription": "This event counts cycles without actually retired uops.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts cycles without actually retired uops.", "EventCode": "0xC2", "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "UOPS_RETIRED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Cycles no executable uops retired (Precise Event)", "CounterMask": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "PEBS": "1", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to PEBS uops retired event.", "EventCode": "0xC2", "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "UOPS_RETIRED.TOTAL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 10 actually retired uops.", + "BriefDescription": "Number of cycles using always true condition applied to PEBS uops retired event.", "CounterMask": "10", "CounterHTOff": "0,1,2,3" }, @@ -1320,13 +1320,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts not taken branch instructions retired.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "BR_INST_RETIRED.NOT_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired.", + "BriefDescription": "Counts all not taken macro branch instructions retired. (Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -1341,14 +1342,15 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts far branch instructions retired.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x40", "Errata": "BDW98", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired.", + "BriefDescription": "Counts the number of far branch instructions retired.(Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json index 4ad425312bdc..bf243fe2a0ec 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json @@ -439,7 +439,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -451,7 +451,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json index 0d04bf9db000..e2f0540625a2 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json @@ -1,6 +1,5 @@ [ { - "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", "Counter": "Fixed counter 0", @@ -10,7 +9,6 @@ "CounterHTOff": "Fixed counter 0" }, { - "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state", "Counter": "Fixed counter 1", @@ -20,7 +18,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "Counter": "Fixed counter 1", @@ -30,7 +27,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", "Counter": "Fixed counter 2", @@ -322,7 +318,7 @@ "BriefDescription": "Stalls caused by changing prefix length of the instruction.", "Counter": "0,1,2,3", "EventName": "ILD_STALL.LCP", - "PublicDescription": "This event counts stalls occurred due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", + "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json index 5a7f1ec24200..c6f9762f32c0 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json @@ -1,164 +1,370 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", + "MetricGroup": "Branches;PGO", + "MetricName": "IpTB" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTB" + }, + { "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", - "MetricGroup": "Frontend", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means loads are more frequent)", + "MetricGroup": "Instruction_Type;L1_Bound", + "MetricName": "IpL" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store", + "MetricGroup": "Instruction_Type;Store_Bound", + "MetricName": "IpS" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch", + "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6", + "MetricName": "IpB" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instruction per (near) call", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS", + "MetricName": "FLOPc" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS_SMT", + "MetricName": "FLOPc_SMT" + }, + { + "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)", - "MetricGroup": "Unknown_Branches", - "MetricName": "BAClear_Cost" + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "Branch_Misprediction_Cost" + }, + { + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts_SMT", + "MetricName": "Branch_Misprediction_Cost_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "IpMispredict" }, { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7 * ( DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED ) ) / ( 2 * cycles )", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles))", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, { - "BriefDescription": "Average CPU Utilization", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7 * ( DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED ) ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricGroup": "TLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, + { + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI_All" + }, + { + "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2HPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { + "MetricExpr": "1000000000 * ( cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x35\\,umask\\=0x3\\,filter_opc\\=0x182@ ) / ( cbox_0@event\\=0x0@ / duration_time )", + "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", + "MetricGroup": "Memory_Lat", + "MetricName": "DRAM_Read_Latency" + }, + { + "MetricExpr": "cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182\\,thresh\\=1@", + "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_Parallel_Reads" + }, + { + "MetricExpr": "cbox_0@event\\=0x0@", + "BriefDescription": "Socket actual clocks when any core is active on that socket", + "MetricGroup": "", + "MetricName": "Socket_CLKS" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/cache.json b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json index 141b1080429d..75a3098d5775 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json @@ -57,17 +57,17 @@ }, { "EventCode": "0x24", - "UMask": "0x41", + "UMask": "0xc1", "BriefDescription": "Demand Data Read requests that hit L2 cache", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache.", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x24", - "UMask": "0x42", + "UMask": "0xc2", "BriefDescription": "RFO requests that hit L2 cache.", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.RFO_HIT", @@ -76,7 +76,7 @@ }, { "EventCode": "0x24", - "UMask": "0x44", + "UMask": "0xc4", "BriefDescription": "L2 cache hits when fetching instructions, code reads.", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.CODE_RD_HIT", @@ -85,7 +85,7 @@ }, { "EventCode": "0x24", - "UMask": "0x50", + "UMask": "0xd0", "BriefDescription": "L2 prefetch requests that hit L2 cache", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.L2_PF_HIT", @@ -396,24 +396,24 @@ { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops that miss the STLB.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This event counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)", + "BriefDescription": "Retired store uops that miss the STLB.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This event counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -421,37 +421,37 @@ { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops with locked access.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "Errata": "BDM35", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.", + "PublicDescription": "This event counts load uops with locked access retired to the architected path.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)", + "BriefDescription": "Retired load uops that split across a cacheline boundary.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", + "BriefDescription": "Retired store uops that split across a cacheline boundary.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -459,24 +459,24 @@ { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load uops. (Precise Event - PEBS)", + "BriefDescription": "All retired load uops.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", + "PublicDescription": "This event counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", + "BriefDescription": "All retired store uops.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", + "PublicDescription": "This event counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", "SampleAfterValue": "2000003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -484,69 +484,69 @@ { "EventCode": "0xD1", "UMask": "0x1", - "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops with L1 cache hits as data sources.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", + "PublicDescription": "This event counts retired load uops which data sources were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x2", - "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops with L2 cache hits as data sources.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "Errata": "BDM35", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", + "PublicDescription": "This event counts retired load uops which data sources were hits in the mid-level (L2) cache.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x4", - "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "Errata": "BDM100", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This event counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x8", - "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.", + "BriefDescription": "Retired load uops misses in L1 cache as data sources.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This event counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x10", - "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.", + "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This event counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x20", - "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -558,83 +558,84 @@ { "EventCode": "0xD1", "UMask": "0x40", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", + "PublicDescription": "This event counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x1", - "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS", "Errata": "BDM100", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", + "PublicDescription": "This event counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x2", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "Errata": "BDM100", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", + "PublicDescription": "This event counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "Errata": "BDM100", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", + "PublicDescription": "This event counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x8", - "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)", + "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE", "Errata": "BDM100", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This event counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x1", + "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "Errata": "BDE70, BDM100", - "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", + "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI).", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x4", - "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)", + "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -646,7 +647,7 @@ { "EventCode": "0xD3", "UMask": "0x10", - "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)", + "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -658,7 +659,7 @@ { "EventCode": "0xD3", "UMask": "0x20", - "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)", + "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -810,12 +811,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all requests that hit in the L3", - "MSRValue": "0x3f803c8fff", + "BriefDescription": "Counts all requests hit in the L3", + "MSRValue": "0x3F803C8FFF", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all requests hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -823,12 +824,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c07f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C07F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -836,12 +837,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c07f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C07F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -849,12 +850,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c0244", + "BriefDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C0244", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -862,12 +863,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c0122", + "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C0122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -875,12 +876,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c0122", + "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C0122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -888,12 +889,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c0091", + "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C0091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -901,12 +902,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c0091", + "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C0091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -914,12 +915,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3", - "MSRValue": "0x3f803c0200", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3", + "MSRValue": "0x3F803C0200", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -927,12 +928,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3", - "MSRValue": "0x3f803c0100", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3", + "MSRValue": "0x3F803C0100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -940,12 +941,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c0002", + "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C0002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -953,12 +954,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3", - "MSRValue": "0x3f803c0002", + "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3", + "MSRValue": "0x3F803C0002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json index d7b9d9c9c518..ba0e0c4e74eb 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json @@ -42,7 +42,7 @@ { "EventCode": "0xC7", "UMask": "0x3", - "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. (RSQRT for single precision?)", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR", "SampleAfterValue": "2000003", @@ -51,7 +51,7 @@ { "EventCode": "0xC7", "UMask": "0x4", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", @@ -60,7 +60,7 @@ { "EventCode": "0xC7", "UMask": "0x8", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", "SampleAfterValue": "2000003", @@ -69,7 +69,7 @@ { "EventCode": "0xC7", "UMask": "0x10", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", "SampleAfterValue": "2000003", @@ -78,7 +78,7 @@ { "EventCode": "0xC7", "UMask": "0x15", - "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", "SampleAfterValue": "2000006", @@ -87,7 +87,7 @@ { "EventCode": "0xc7", "UMask": "0x20", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", "SampleAfterValue": "2000003", @@ -96,7 +96,7 @@ { "EventCode": "0xC7", "UMask": "0x2a", - "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SINGLE", "SampleAfterValue": "2000005", @@ -105,7 +105,7 @@ { "EventCode": "0xC7", "UMask": "0x3c", - "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. (RSQRT for single-precision?)", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.PACKED", "SampleAfterValue": "2000004", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json index d79a5cfea44b..ecb413bb67ca 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json @@ -170,11 +170,11 @@ { "EventCode": "0xc8", "UMask": "0x4", - "BriefDescription": "Number of times HLE abort was triggered (PEBS)", + "BriefDescription": "Number of times HLE abort was triggered", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED", - "PublicDescription": "Number of times HLE abort was triggered (PEBS).", + "PublicDescription": "Number of times HLE abort was triggered.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -251,11 +251,11 @@ { "EventCode": "0xc9", "UMask": "0x4", - "BriefDescription": "Number of times RTM abort was triggered (PEBS)", + "BriefDescription": "Number of times RTM abort was triggered", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "RTM_RETIRED.ABORTED", - "PublicDescription": "Number of times RTM abort was triggered (PEBS).", + "PublicDescription": "Number of times RTM abort was triggered .", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -312,14 +312,14 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 4", + "BriefDescription": "Randomly selected loads with latency value being above 4", "PEBS": "2", "MSRValue": "0x4", "Counter": "3", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "Errata": "BDM100, BDM35", - "PublicDescription": "This event counts loads with latency value being above four.", + "PublicDescription": "Counts randomly selected loads with latency value being above four.", "TakenAlone": "1", "SampleAfterValue": "100003", "CounterHTOff": "3" @@ -327,14 +327,14 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 8", + "BriefDescription": "Randomly selected loads with latency value being above 8", "PEBS": "2", "MSRValue": "0x8", "Counter": "3", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "Errata": "BDM100, BDM35", - "PublicDescription": "This event counts loads with latency value being above eight.", + "PublicDescription": "Counts randomly selected loads with latency value being above eight.", "TakenAlone": "1", "SampleAfterValue": "50021", "CounterHTOff": "3" @@ -342,14 +342,14 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 16", + "BriefDescription": "Randomly selected loads with latency value being above 16", "PEBS": "2", "MSRValue": "0x10", "Counter": "3", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "Errata": "BDM100, BDM35", - "PublicDescription": "This event counts loads with latency value being above 16.", + "PublicDescription": "Counts randomly selected loads with latency value being above 16.", "TakenAlone": "1", "SampleAfterValue": "20011", "CounterHTOff": "3" @@ -357,14 +357,14 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 32", + "BriefDescription": "Randomly selected loads with latency value being above 32", "PEBS": "2", "MSRValue": "0x20", "Counter": "3", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "Errata": "BDM100, BDM35", - "PublicDescription": "This event counts loads with latency value being above 32.", + "PublicDescription": "Counts randomly selected loads with latency value being above 32.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "3" @@ -372,14 +372,14 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 64", + "BriefDescription": "Randomly selected loads with latency value being above 64", "PEBS": "2", "MSRValue": "0x40", "Counter": "3", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "Errata": "BDM100, BDM35", - "PublicDescription": "This event counts loads with latency value being above 64.", + "PublicDescription": "Counts randomly selected loads with latency value being above 64.", "TakenAlone": "1", "SampleAfterValue": "2003", "CounterHTOff": "3" @@ -387,14 +387,14 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 128", + "BriefDescription": "Randomly selected loads with latency value being above 128", "PEBS": "2", "MSRValue": "0x80", "Counter": "3", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "Errata": "BDM100, BDM35", - "PublicDescription": "This event counts loads with latency value being above 128.", + "PublicDescription": "Counts randomly selected loads with latency value being above 128.", "TakenAlone": "1", "SampleAfterValue": "1009", "CounterHTOff": "3" @@ -402,14 +402,14 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 256", + "BriefDescription": "Randomly selected loads with latency value being above 256", "PEBS": "2", "MSRValue": "0x100", "Counter": "3", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "Errata": "BDM100, BDM35", - "PublicDescription": "This event counts loads with latency value being above 256.", + "PublicDescription": "Counts randomly selected loads with latency value being above 256.", "TakenAlone": "1", "SampleAfterValue": "503", "CounterHTOff": "3" @@ -417,14 +417,14 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 512", + "BriefDescription": "Randomly selected loads with latency value being above 512", "PEBS": "2", "MSRValue": "0x200", "Counter": "3", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "Errata": "BDM100, BDM35", - "PublicDescription": "This event counts loads with latency value being above 512.", + "PublicDescription": "Counts randomly selected loads with latency value being above 512.", "TakenAlone": "1", "SampleAfterValue": "101", "CounterHTOff": "3" @@ -433,12 +433,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all requests that miss in the L3", - "MSRValue": "0x3fbfc08fff", + "BriefDescription": "Counts all requests miss in the L3", + "MSRValue": "0x3FBFC08FFF", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all requests miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -446,12 +446,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache", - "MSRValue": "0x087fc007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and clean or shared data is transferred from remote cache", + "MSRValue": "0x087FC007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and clean or shared data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -459,12 +459,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache", - "MSRValue": "0x103fc007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the modified data is transferred from remote cache", + "MSRValue": "0x103FC007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the modified data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -472,12 +472,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram", - "MSRValue": "0x063bc007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from remote dram", + "MSRValue": "0x063BC007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from remote dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -485,12 +485,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram", - "MSRValue": "0x06040007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from local dram", + "MSRValue": "0x06040007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -498,12 +498,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3", - "MSRValue": "0x3fbfc007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss in the L3", + "MSRValue": "0x3FBFC007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -511,12 +511,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram", "MSRValue": "0x0604000244", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -524,12 +524,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3", - "MSRValue": "0x3fbfc00244", + "BriefDescription": "Counts all demand & prefetch code reads miss in the L3", + "MSRValue": "0x3FBFC00244", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch code reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -537,12 +537,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram", "MSRValue": "0x0604000122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -550,12 +550,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3", - "MSRValue": "0x3fbfc00122", + "BriefDescription": "Counts all demand & prefetch RFOs miss in the L3", + "MSRValue": "0x3FBFC00122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -563,12 +563,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache", - "MSRValue": "0x087fc00091", + "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and clean or shared data is transferred from remote cache", + "MSRValue": "0x087FC00091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and clean or shared data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -576,12 +576,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache", - "MSRValue": "0x103fc00091", + "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the modified data is transferred from remote cache", + "MSRValue": "0x103FC00091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the modified data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -589,12 +589,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram", - "MSRValue": "0x063bc00091", + "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from remote dram", + "MSRValue": "0x063BC00091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from remote dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -602,12 +602,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram", "MSRValue": "0x0604000091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -615,12 +615,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3", - "MSRValue": "0x3fbfc00091", + "BriefDescription": "Counts all demand & prefetch data reads miss in the L3", + "MSRValue": "0x3FBFC00091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -628,12 +628,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3", - "MSRValue": "0x3fbfc00200", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3", + "MSRValue": "0x3FBFC00200", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -641,12 +641,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3", - "MSRValue": "0x3fbfc00100", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3", + "MSRValue": "0x3FBFC00100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -654,12 +654,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache", - "MSRValue": "0x103fc00002", + "BriefDescription": "Counts all demand data writes (RFOs) miss the L3 and the modified data is transferred from remote cache", + "MSRValue": "0x103FC00002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) miss the L3 and the modified data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -667,12 +667,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3", - "MSRValue": "0x3fbfc00002", + "BriefDescription": "Counts all demand data writes (RFOs) miss in the L3", + "MSRValue": "0x3FBFC00002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json index 0d04bf9db000..c2f6932a5817 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json @@ -1,6 +1,5 @@ [ { - "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", "Counter": "Fixed counter 0", @@ -10,7 +9,6 @@ "CounterHTOff": "Fixed counter 0" }, { - "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state", "Counter": "Fixed counter 1", @@ -20,7 +18,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "Counter": "Fixed counter 1", @@ -30,7 +27,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", "Counter": "Fixed counter 2", @@ -322,7 +318,7 @@ "BriefDescription": "Stalls caused by changing prefix length of the instruction.", "Counter": "0,1,2,3", "EventName": "ILD_STALL.LCP", - "PublicDescription": "This event counts stalls occurred due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", + "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -786,12 +782,12 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xa2", "UMask": "0x1", "BriefDescription": "Resource-related stall cycles", "Counter": "0,1,2,3", "EventName": "RESOURCE_STALLS.ANY", - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "PublicDescription": "This event counts resource-related stall cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1168,12 +1164,12 @@ { "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Actually retired uops. (Precise Event - PEBS)", + "BriefDescription": "Actually retired uops.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "PublicDescription": "This event counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1204,11 +1200,11 @@ { "EventCode": "0xC2", "UMask": "0x2", - "BriefDescription": "Retirement slots used. (Precise Event - PEBS)", + "BriefDescription": "Retirement slots used.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.", + "PublicDescription": "This event counts the number of retirement slots used.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1266,33 +1262,33 @@ { "EventCode": "0xC4", "UMask": "0x1", - "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)", + "BriefDescription": "Conditional branch instructions retired.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.CONDITIONAL", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", + "PublicDescription": "This event counts conditional branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC4", "UMask": "0x2", - "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)", + "BriefDescription": "Direct and indirect near call instructions retired.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", + "PublicDescription": "This event counts both direct and indirect near call instructions retired.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC4", "UMask": "0x2", - "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).", + "PublicDescription": "This event counts both direct and indirect macro near call instructions retired (captured in ring 3).", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1311,11 +1307,11 @@ { "EventCode": "0xC4", "UMask": "0x8", - "BriefDescription": "Return instructions retired. (Precise Event - PEBS)", + "BriefDescription": "Return instructions retired.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", + "PublicDescription": "This event counts return instructions retired.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1332,11 +1328,11 @@ { "EventCode": "0xC4", "UMask": "0x20", - "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)", + "BriefDescription": "Taken branch instructions retired.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "PublicDescription": "This event counts taken branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1364,11 +1360,11 @@ { "EventCode": "0xC5", "UMask": "0x1", - "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)", + "BriefDescription": "Mispredicted conditional branch instructions retired.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "PublicDescription": "This event counts mispredicted conditional branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1386,22 +1382,22 @@ { "EventCode": "0xC5", "UMask": "0x8", - "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)", + "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.RET", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.", + "PublicDescription": "This event counts mispredicted return instructions retired.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC5", "UMask": "0x20", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 71e9737f4614..a382b115633d 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -1,164 +1,394 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", - "MetricGroup": "Frontend", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", + "MetricGroup": "Branches;PGO", + "MetricName": "IpTB" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTB" + }, + { + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ))", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means loads are more frequent)", + "MetricGroup": "Instruction_Type;L1_Bound", + "MetricName": "IpL" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store", + "MetricGroup": "Instruction_Type;Store_Bound", + "MetricName": "IpS" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch", + "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6", + "MetricName": "IpB" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instruction per (near) call", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS", + "MetricName": "FLOPc" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS_SMT", + "MetricName": "FLOPc_SMT" + }, + { + "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)", - "MetricGroup": "Unknown_Branches", - "MetricName": "BAClear_Cost" + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "Branch_Misprediction_Cost" + }, + { + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts_SMT", + "MetricName": "Branch_Misprediction_Cost_SMT" }, { + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "IpMispredict" + }, + { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, { - "BriefDescription": "Average CPU Utilization", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricGroup": "TLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, + { + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Access_BW" + }, + { + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI_All" + }, + { + "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2HPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", + "MetricGroup": "Memory_Lat", + "MetricName": "DRAM_Read_Latency" + }, + { + "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@", + "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_Parallel_Reads" + }, + { + "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 1 == 1 else 0 else 0", + "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", + "MetricGroup": "Memory_Lat", + "MetricName": "MEM_PMM_Read_Latency" + }, + { + "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 1 == 1 else 0 else 0", + "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "PMM_Read_BW" + }, + { + "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 1 == 1 else 0 else 0", + "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "PMM_Write_BW" + }, + { + "MetricExpr": "cha_0@event\\=0x0@", + "BriefDescription": "Socket actual clocks when any core is active on that socket", + "MetricGroup": "", + "MetricName": "Socket_CLKS" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/goldmont/cache.json b/tools/perf/pmu-events/arch/x86/goldmont/cache.json index f8bbe087b0f8..52a105666afc 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/cache.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/cache.json @@ -77,7 +77,8 @@ "UMask": "0x21", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Locked load uops retired (Precise event capable)" + "BriefDescription": "Locked load uops retired (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -88,7 +89,8 @@ "UMask": "0x41", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -99,7 +101,8 @@ "UMask": "0x42", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -110,7 +113,8 @@ "UMask": "0x43", "EventName": "MEM_UOPS_RETIRED.SPLIT", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -121,7 +125,8 @@ "UMask": "0x81", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired (Precise event capable)" + "BriefDescription": "Load uops retired (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -132,7 +137,8 @@ "UMask": "0x82", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Store uops retired (Precise event capable)" + "BriefDescription": "Store uops retired (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -143,7 +149,8 @@ "UMask": "0x83", "EventName": "MEM_UOPS_RETIRED.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired (Precise event capable)" + "BriefDescription": "Memory uops retired (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -154,7 +161,8 @@ "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)" + "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -165,7 +173,8 @@ "UMask": "0x2", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that hit L2 (Precise event capable)" + "BriefDescription": "Load uops retired that hit L2 (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -176,7 +185,8 @@ "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)" + "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -187,7 +197,8 @@ "UMask": "0x10", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that missed L2 (Precise event capable)" + "BriefDescription": "Load uops retired that missed L2 (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -198,7 +209,8 @@ "UMask": "0x20", "EventName": "MEM_LOAD_UOPS_RETIRED.HITM", "SampleAfterValue": "200003", - "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)" + "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -209,7 +221,8 @@ "UMask": "0x40", "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Loads retired that hit WCB (Precise event capable)" + "BriefDescription": "Loads retired that hit WCB (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -220,26 +233,14 @@ "UMask": "0x80", "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Loads retired that came from DRAM (Precise event capable)" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x40000032b7 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" + "BriefDescription": "Loads retired that came from DRAM (Precise event capable)", + "Data_LA": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x36000032b7 ", + "MSRValue": "0x36000032b7", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.ANY", @@ -252,7 +253,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x10000032b7 ", + "MSRValue": "0x10000032b7", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HITM_OTHER_CORE", @@ -265,7 +266,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x04000032b7 ", + "MSRValue": "0x04000032b7", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -278,20 +279,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x02000032b7 ", + "MSRValue": "0x02000032b7", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x00000432b7 ", + "MSRValue": "0x00000432b7", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT", @@ -302,35 +303,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x00000132b7 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000000022 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000022 ", + "MSRValue": "0x3600000022", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.ANY", @@ -343,7 +318,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000000022 ", + "MSRValue": "0x1000000022", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE", @@ -356,7 +331,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000022 ", + "MSRValue": "0x0400000022", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -369,20 +344,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000022 ", + "MSRValue": "0x0200000022", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040022 ", + "MSRValue": "0x0000040022", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT", @@ -393,32 +368,6 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010022 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000003091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600003091", @@ -466,7 +415,7 @@ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -484,35 +433,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000013091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000003010 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600003010 ", + "MSRValue": "0x3600003010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.ANY", @@ -525,7 +448,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000003010 ", + "MSRValue": "0x1000003010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE", @@ -538,7 +461,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400003010 ", + "MSRValue": "0x0400003010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -551,20 +474,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200003010 ", + "MSRValue": "0x0200003010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000043010 ", + "MSRValue": "0x0000043010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT", @@ -575,48 +498,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000013010 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x3600008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.ANY", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000008000 ", + "MSRValue": "0x1000008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE", @@ -629,7 +513,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400008000 ", + "MSRValue": "0x0400008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -642,20 +526,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200008000 ", + "MSRValue": "0x0200008000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts requests to the uncore subsystem that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000048000 ", + "MSRValue": "0x0000048000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT", @@ -668,7 +552,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000018000 ", + "MSRValue": "0x0000018000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE", @@ -679,22 +563,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000004800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600004800 ", + "MSRValue": "0x3600004800", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY", @@ -705,48 +576,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x1000004800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0400004800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0200004800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000044800 ", + "MSRValue": "0x0000044800", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT", @@ -757,35 +589,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000014800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000004000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600004000 ", + "MSRValue": "0x3600004000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY", @@ -798,7 +604,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000004000 ", + "MSRValue": "0x1000004000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", @@ -811,7 +617,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400004000 ", + "MSRValue": "0x0400004000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -824,20 +630,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200004000 ", + "MSRValue": "0x0200004000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000044000 ", + "MSRValue": "0x0000044000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT", @@ -848,35 +654,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000014000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000002000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600002000 ", + "MSRValue": "0x3600002000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY", @@ -889,7 +669,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000002000 ", + "MSRValue": "0x1000002000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE", @@ -902,7 +682,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400002000 ", + "MSRValue": "0x0400002000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -915,20 +695,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200002000 ", + "MSRValue": "0x0200002000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000042000 ", + "MSRValue": "0x0000042000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT", @@ -939,35 +719,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000012000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000001000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600001000 ", + "MSRValue": "0x3600001000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY", @@ -980,7 +734,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000001000 ", + "MSRValue": "0x1000001000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE", @@ -993,7 +747,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400001000 ", + "MSRValue": "0x0400001000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -1006,20 +760,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200001000 ", + "MSRValue": "0x0200001000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000041000 ", + "MSRValue": "0x0000041000", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT", @@ -1030,35 +784,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000011000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000800 ", + "MSRValue": "0x3600000800", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY", @@ -1071,7 +799,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000000800 ", + "MSRValue": "0x1000000800", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", @@ -1084,7 +812,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000800 ", + "MSRValue": "0x0400000800", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -1097,20 +825,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000800 ", + "MSRValue": "0x0200000800", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040800 ", + "MSRValue": "0x0000040800", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT", @@ -1121,100 +849,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000000400 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x3600000400 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.ANY", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x1000000400 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0400000400 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0200000400 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. ", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts bus lock and split lock requests that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000040400 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests that hit the L2 cache.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000010400 ", + "MSRValue": "0x0000010400", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE", @@ -1225,113 +862,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000000200 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x3600000200 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.ANY", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x1000000200 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0400000200 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0200000200 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000040200 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010200 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000100 ", + "MSRValue": "0x3600000100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY", @@ -1342,87 +875,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x1000000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0400000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0200000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. ", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000080 ", + "MSRValue": "0x3600000080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY", @@ -1433,87 +888,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x1000000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0400000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0200000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. ", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000000020 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000020 ", + "MSRValue": "0x3600000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.ANY", @@ -1526,7 +903,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000000020 ", + "MSRValue": "0x1000000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE", @@ -1539,7 +916,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000020 ", + "MSRValue": "0x0400000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -1552,20 +929,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000020 ", + "MSRValue": "0x0200000020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040020 ", + "MSRValue": "0x0000040020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT", @@ -1576,35 +953,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010020 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000000010 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000010 ", + "MSRValue": "0x3600000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.ANY", @@ -1617,7 +968,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000000010 ", + "MSRValue": "0x1000000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE", @@ -1630,7 +981,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000010 ", + "MSRValue": "0x0400000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -1643,20 +994,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000010 ", + "MSRValue": "0x0200000010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040010 ", + "MSRValue": "0x0000040010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_HIT", @@ -1667,35 +1018,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010010 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x4000000008 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000008 ", + "MSRValue": "0x3600000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.ANY", @@ -1708,7 +1033,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000000008 ", + "MSRValue": "0x1000000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HITM_OTHER_CORE", @@ -1721,7 +1046,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000008 ", + "MSRValue": "0x0400000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -1734,20 +1059,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000008 ", + "MSRValue": "0x0200000008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040008 ", + "MSRValue": "0x0000040008", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.COREWB.L2_HIT", @@ -1758,22 +1083,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010008 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x4000000004 ", + "MSRValue": "0x4000000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING", @@ -1786,7 +1098,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000004 ", + "MSRValue": "0x3600000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.ANY", @@ -1797,22 +1109,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x1000000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000004 ", + "MSRValue": "0x0400000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -1825,20 +1124,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000004 ", + "MSRValue": "0x0200000004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040004 ", + "MSRValue": "0x0000040004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT", @@ -1849,22 +1148,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x4000000002 ", + "MSRValue": "0x4000000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING", @@ -1877,7 +1163,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000002 ", + "MSRValue": "0x3600000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.ANY", @@ -1890,7 +1176,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000000002 ", + "MSRValue": "0x1000000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE", @@ -1903,7 +1189,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000002 ", + "MSRValue": "0x0400000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -1916,20 +1202,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000002 ", + "MSRValue": "0x0200000002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040002 ", + "MSRValue": "0x0000040002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT", @@ -1940,22 +1226,9 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", "PublicDescription": "Counts demand cacheable data reads of full cache lines that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x4000000001 ", + "MSRValue": "0x4000000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING", @@ -1968,7 +1241,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000001 ", + "MSRValue": "0x3600000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.ANY", @@ -1981,7 +1254,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000000001 ", + "MSRValue": "0x1000000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE", @@ -1994,7 +1267,7 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000001 ", + "MSRValue": "0x0400000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", @@ -2007,20 +1280,20 @@ "CollectPEBSRecord": "1", "PublicDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000001 ", + "MSRValue": "0x0200000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040001 ", + "MSRValue": "0x0000040001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT", @@ -2028,18 +1301,5 @@ "SampleAfterValue": "100007", "BriefDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache.", "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x0000010001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem.", - "Offcore": "1" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/memory.json b/tools/perf/pmu-events/arch/x86/goldmont/memory.json index 690cebd12a94..197dc76d49dd 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/memory.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/memory.json @@ -30,265 +30,5 @@ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", "SampleAfterValue": "200003", "BriefDescription": "Machine clears due to memory ordering issue" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x20000032b7 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000022 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000003091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000003010 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000004800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000004000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000002000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000001000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000400 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000200 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000020 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000010 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000008 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6", - "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", - "EventCode": "0xB7", - "MSRValue": "0x2000000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address.", - "Offcore": "1" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json index 254788af8ab6..6342368accf8 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json @@ -1,7 +1,6 @@ [ { "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", @@ -10,7 +9,6 @@ }, { "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.CORE", @@ -19,7 +17,6 @@ }, { "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", @@ -188,7 +185,7 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.", + "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel\u00ae architecture processors.", "EventCode": "0xC3", "Counter": "0,1,2,3", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json index 9805198d3f5f..343d66bbd777 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json @@ -48,7 +48,8 @@ "UMask": "0x11", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)" + "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -59,7 +60,8 @@ "UMask": "0x12", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)" + "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -70,6 +72,7 @@ "UMask": "0x13", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)" + "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)", + "Data_LA": "1" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json index b4791b443a66..5a6ac8285ad4 100644 --- a/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json +++ b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json @@ -92,7 +92,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Locked load uops retired (Precise event capable)" + "BriefDescription": "Locked load uops retired (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -104,7 +105,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -116,7 +118,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -128,7 +131,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -140,7 +144,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired (Precise event capable)" + "BriefDescription": "Load uops retired (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -152,7 +157,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Store uops retired (Precise event capable)" + "BriefDescription": "Store uops retired (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -164,7 +170,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired (Precise event capable)" + "BriefDescription": "Memory uops retired (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -176,7 +183,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)" + "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -188,7 +196,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that hit L2 (Precise event capable)" + "BriefDescription": "Load uops retired that hit L2 (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -200,7 +209,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)" + "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -212,7 +222,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that missed L2 (Precise event capable)" + "BriefDescription": "Load uops retired that missed L2 (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -224,7 +235,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.HITM", "SampleAfterValue": "200003", - "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)" + "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -236,7 +248,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Loads retired that hit WCB (Precise event capable)" + "BriefDescription": "Loads retired that hit WCB (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -248,7 +261,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Loads retired that came from DRAM (Precise event capable)" + "BriefDescription": "Loads retired that came from DRAM (Precise event capable)", + "Data_LA": "1" }, { "CollectPEBSRecord": "1", @@ -292,7 +306,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -367,7 +381,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -442,7 +456,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -517,7 +531,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -592,7 +606,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -667,7 +681,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -742,7 +756,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -817,7 +831,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -892,7 +906,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -967,7 +981,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -1042,7 +1056,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -1117,7 +1131,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -1192,7 +1206,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -1267,7 +1281,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -1342,7 +1356,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { @@ -1417,7 +1431,7 @@ "PDIR_COUNTER": "na", "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module.", "Offcore": "1" }, { diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json index ccf1aed69197..e3fa1a0ba71b 100644 --- a/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json @@ -3,7 +3,6 @@ "PEBS": "2", "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", "Counter": "Fixed counter 0", "UMask": "0x1", "PEBScounters": "32", @@ -15,7 +14,6 @@ { "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "PEBScounters": "33", @@ -27,7 +25,6 @@ { "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", "Counter": "Fixed counter 2", "UMask": "0x3", "PEBScounters": "34", @@ -231,7 +228,7 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.", + "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel\u00ae architecture processors.", "EventCode": "0xC3", "Counter": "0,1,2,3", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json index 0b53a3b0dfb8..0d32fd26ded1 100644 --- a/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json @@ -189,7 +189,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)" + "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -201,7 +202,8 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)" + "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)", + "Data_LA": "1" }, { "PEBS": "2", @@ -213,6 +215,7 @@ "PEBScounters": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)" + "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)", + "Data_LA": "1" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswell/cache.json b/tools/perf/pmu-events/arch/x86/haswell/cache.json index da4d6ddd4f92..7fb0ad8d8ca1 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/cache.json +++ b/tools/perf/pmu-events/arch/x86/haswell/cache.json @@ -63,10 +63,10 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Demand data read requests that hit L2 cache.", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x41", + "UMask": "0xc1", "Errata": "HSD78", "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", "SampleAfterValue": "200003", @@ -77,7 +77,7 @@ "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x42", + "UMask": "0xc2", "EventName": "L2_RQSTS.RFO_HIT", "SampleAfterValue": "200003", "BriefDescription": "RFO requests that hit L2 cache", @@ -87,7 +87,7 @@ "PublicDescription": "Number of instruction fetches that hit the L2 cache.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x44", + "UMask": "0xc4", "EventName": "L2_RQSTS.CODE_RD_HIT", "SampleAfterValue": "200003", "BriefDescription": "L2 cache hits when fetching instructions, code reads.", @@ -97,7 +97,7 @@ "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x50", + "UMask": "0xd0", "EventName": "L2_RQSTS.L2_PF_HIT", "SampleAfterValue": "200003", "BriefDescription": "L2 prefetch requests that hit L2 cache", @@ -610,7 +610,7 @@ "Errata": "HSD29, HSD25, HSM26, HSM30", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -623,7 +623,7 @@ "Errata": "HSD29, HSD25, HSM26, HSM30", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -792,7 +792,6 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "", "EventCode": "0xf4", "Counter": "0,1,2,3", "UMask": "0x10", @@ -802,262 +801,262 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all requests hit in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c8fff", + "MSRValue": "0x3F803C8FFF", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.L3_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all requests that hit in the L3", + "BriefDescription": "Counts all requests hit in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c07f7", + "MSRValue": "0x10003C07F7", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "BriefDescription": "hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c07f7", + "MSRValue": "0x04003C07F7", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "BriefDescription": "hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0244", + "MSRValue": "0x04003C0244", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "BriefDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0122", + "MSRValue": "0x10003C0122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0122", + "MSRValue": "0x04003C0122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0091", + "MSRValue": "0x10003C0091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0091", + "MSRValue": "0x04003C0091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0200", + "MSRValue": "0x3F803C0200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0100", + "MSRValue": "0x3F803C0100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0080", + "MSRValue": "0x3F803C0080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads hit in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0040", + "MSRValue": "0x3F803C0040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads hit in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0020", + "MSRValue": "0x3F803C0020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0010", + "MSRValue": "0x3F803C0010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0004", + "MSRValue": "0x10003C0004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "BriefDescription": "Counts all demand code reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0004", + "MSRValue": "0x04003C0004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "BriefDescription": "Counts all demand code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0002", + "MSRValue": "0x10003C0002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0002", + "MSRValue": "0x04003C0002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0001", + "MSRValue": "0x10003C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "BriefDescription": "Counts demand data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04003c0001", + "MSRValue": "0x04003C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "BriefDescription": "Counts demand data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "Offcore": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/haswell/floating-point.json b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json index f9843e5a9b42..f5a3beaa19fc 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json @@ -1,22 +1,26 @@ [ { + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x8", "Errata": "HSD56, HSM57", "EventName": "OTHER_ASSISTS.AVX_TO_SSE", "SampleAfterValue": "100003", - "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.", + "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x10", "Errata": "HSD56, HSM57", "EventName": "OTHER_ASSISTS.SSE_TO_AVX", "SampleAfterValue": "100003", - "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.", + "BriefDescription": "Number of transitions from legacy SSE to AVX-256 when penalty applicable", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -30,53 +34,58 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of X87 FP assists due to output values.", + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "FP_ASSIST.X87_OUTPUT", "SampleAfterValue": "100003", - "BriefDescription": "Number of X87 assists due to output value.", + "BriefDescription": "output - Numeric Overflow, Numeric Underflow, Inexact Result", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of X87 FP assists due to input values.", + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "FP_ASSIST.X87_INPUT", "SampleAfterValue": "100003", - "BriefDescription": "Number of X87 assists due to input value.", + "BriefDescription": "input - Invalid Operation, Denormal Operand, SNaN Operand", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of SIMD FP assists due to output values.", + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "FP_ASSIST.SIMD_OUTPUT", "SampleAfterValue": "100003", - "BriefDescription": "Number of SIMD FP assists due to Output values", + "BriefDescription": "SSE* FP micro-code assist when output value is invalid.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of SIMD FP assists due to input values.", + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "FP_ASSIST.SIMD_INPUT", "SampleAfterValue": "100003", - "BriefDescription": "Number of SIMD FP assists due to input values", + "BriefDescription": "Any input SSE* FP Assist", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles with any input/output SSE* or FP assists.", + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x1e", "EventName": "FP_ASSIST.ANY", "SampleAfterValue": "100003", - "BriefDescription": "Cycles with any input/output SSE or FP assist", + "BriefDescription": "Counts any FP_ASSIST umask was incrementing", "CounterMask": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json index 5ab5c78fe580..21b27488b621 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json +++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json @@ -1,158 +1,322 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", + "MetricGroup": "Branches;PGO", + "MetricName": "IpTB" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTB" + }, + { "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", - "MetricGroup": "Frontend", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means loads are more frequent)", + "MetricGroup": "Instruction_Type;L1_Bound", + "MetricName": "IpL" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store", + "MetricGroup": "Instruction_Type;Store_Bound", + "MetricName": "IpS" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch", + "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6", + "MetricName": "IpB" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instruction per (near) call", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)", - "MetricGroup": "Unknown_Branches", - "MetricName": "BAClear_Cost" + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "IpMispredict" }, { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, { - "BriefDescription": "Average CPU Utilization", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricGroup": "TLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, + { + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2HPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/haswell/memory.json b/tools/perf/pmu-events/arch/x86/haswell/memory.json index e5f9fa6655b3..ef13ed88e2ea 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/memory.json +++ b/tools/perf/pmu-events/arch/x86/haswell/memory.json @@ -298,7 +298,7 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "SampleAfterValue": "100003", - "BriefDescription": "Loads with latency value being above 4.", + "BriefDescription": "Randomly selected loads with latency value being above 4.", "TakenAlone": "1", "CounterHTOff": "3" }, @@ -312,7 +312,7 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "SampleAfterValue": "50021", - "BriefDescription": "Loads with latency value being above 8.", + "BriefDescription": "Randomly selected loads with latency value being above 8.", "TakenAlone": "1", "CounterHTOff": "3" }, @@ -326,7 +326,7 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "SampleAfterValue": "20011", - "BriefDescription": "Loads with latency value being above 16.", + "BriefDescription": "Randomly selected loads with latency value being above 16.", "TakenAlone": "1", "CounterHTOff": "3" }, @@ -340,7 +340,7 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "SampleAfterValue": "100003", - "BriefDescription": "Loads with latency value being above 32.", + "BriefDescription": "Randomly selected loads with latency value being above 32.", "TakenAlone": "1", "CounterHTOff": "3" }, @@ -354,7 +354,7 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "SampleAfterValue": "2003", - "BriefDescription": "Loads with latency value being above 64.", + "BriefDescription": "Randomly selected loads with latency value being above 64.", "TakenAlone": "1", "CounterHTOff": "3" }, @@ -368,7 +368,7 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "SampleAfterValue": "1009", - "BriefDescription": "Loads with latency value being above 128.", + "BriefDescription": "Randomly selected loads with latency value being above 128.", "TakenAlone": "1", "CounterHTOff": "3" }, @@ -382,7 +382,7 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "SampleAfterValue": "503", - "BriefDescription": "Loads with latency value being above 256.", + "BriefDescription": "Randomly selected loads with latency value being above 256.", "TakenAlone": "1", "CounterHTOff": "3" }, @@ -396,280 +396,280 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "SampleAfterValue": "101", - "BriefDescription": "Loads with latency value being above 512.", + "BriefDescription": "Randomly selected loads with latency value being above 512.", "TakenAlone": "1", "CounterHTOff": "3" }, { - "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all requests miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc08fff", + "MSRValue": "0x3FFFC08FFF", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all requests that miss in the L3", + "BriefDescription": "Counts all requests miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "miss the L3 and the data is returned from local dram", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01004007f7", + "MSRValue": "0x01004007F7", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram", + "BriefDescription": "miss the L3 and the data is returned from local dram", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc007f7", + "MSRValue": "0x3FFFC007F7", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3", + "BriefDescription": "miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400244", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch code reads miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00244", + "MSRValue": "0x3FFFC00244", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3", + "BriefDescription": "Counts all demand & prefetch code reads miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00122", + "MSRValue": "0x3FFFC00122", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3", + "BriefDescription": "Counts all demand & prefetch RFOs miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00091", + "MSRValue": "0x3FFFC00091", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3", + "BriefDescription": "Counts all demand & prefetch data reads miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00200", + "MSRValue": "0x3FFFC00200", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00100", + "MSRValue": "0x3FFFC00100", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00080", + "MSRValue": "0x3FFFC00080", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00040", + "MSRValue": "0x3FFFC00040", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00020", + "MSRValue": "0x3FFFC00020", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00010", + "MSRValue": "0x3FFFC00010", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads miss the L3 and the data is returned from local dram", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand code reads miss the L3 and the data is returned from local dram", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00004", + "MSRValue": "0x3FFFC00004", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss in the L3", + "BriefDescription": "Counts all demand code reads miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) miss the L3 and the data is returned from local dram", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand data writes (RFOs) miss the L3 and the data is returned from local dram", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00002", + "MSRValue": "0x3FFFC00002", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3", + "BriefDescription": "Counts all demand data writes (RFOs) miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads miss the L3 and the data is returned from local dram", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts demand data reads miss the L3 and the data is returned from local dram", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads miss in the L3", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00001", + "MSRValue": "0x3FFFC00001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss in the L3", + "BriefDescription": "Counts demand data reads miss in the L3", "Offcore": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/haswell/pipeline.json b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json index a4dcfce4a512..734d3873729e 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json @@ -1,7 +1,6 @@ [ { "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", - "EventCode": "0x00", "Counter": "Fixed counter 0", "UMask": "0x1", "Errata": "HSD140, HSD143", @@ -12,7 +11,6 @@ }, { "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", @@ -21,7 +19,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "AnyThread": "1", @@ -32,7 +29,6 @@ }, { "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.", - "EventCode": "0x00", "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", @@ -1071,7 +1067,8 @@ "CounterHTOff": "1" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", "EventCode": "0xC0", "Counter": "0,1,2,3", "UMask": "0x2", @@ -1081,13 +1078,13 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.", + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x40", "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", "SampleAfterValue": "100003", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -1102,28 +1099,34 @@ "Data_LA": "1" }, { + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xC2", "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "UOPS_RETIRED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Cycles no executable uops retired", "CounterMask": "1", "CounterHTOff": "0,1,2,3" }, { + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xC2", "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "UOPS_RETIRED.TOTAL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 10 actually retired uops.", + "BriefDescription": "Number of cycles using always true condition applied to PEBS uops retired event.", "CounterMask": "10", "CounterHTOff": "0,1,2,3" }, { + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xC2", "Invert": "1", "Counter": "0,1,2,3", @@ -1131,7 +1134,7 @@ "AnyThread": "1", "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Cycles no executable uops retired on core", "CounterMask": "1", "CounterHTOff": "0,1,2,3" }, @@ -1245,13 +1248,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts the number of not taken branch instructions retired.", + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "BR_INST_RETIRED.NOT_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired.", + "BriefDescription": "Counts all not taken macro branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -1265,13 +1269,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of far branches retired.", + "PEBS": "1", + "PublicDescription": "", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x40", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "100003", - "BriefDescription": "Far branch instructions retired.", + "BriefDescription": "Counts the number of far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { diff --git a/tools/perf/pmu-events/arch/x86/haswellx/cache.json b/tools/perf/pmu-events/arch/x86/haswellx/cache.json index b2fbd617306a..a9e62d4357af 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/cache.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/cache.json @@ -64,18 +64,18 @@ }, { "EventCode": "0x24", - "UMask": "0x41", + "UMask": "0xc1", "BriefDescription": "Demand Data Read requests that hit L2 cache", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", "Errata": "HSD78", - "PublicDescription": "Demand data read requests that hit L2 cache.", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x24", - "UMask": "0x42", + "UMask": "0xc2", "BriefDescription": "RFO requests that hit L2 cache", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.RFO_HIT", @@ -85,7 +85,7 @@ }, { "EventCode": "0x24", - "UMask": "0x44", + "UMask": "0xc4", "BriefDescription": "L2 cache hits when fetching instructions, code reads.", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.CODE_RD_HIT", @@ -95,7 +95,7 @@ }, { "EventCode": "0x24", - "UMask": "0x50", + "UMask": "0xd0", "BriefDescription": "L2 prefetch requests that hit L2 cache", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.L2_PF_HIT", @@ -416,7 +416,7 @@ { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load uops that miss the STLB. (precise Event)", + "BriefDescription": "Retired load uops that miss the STLB.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -428,7 +428,7 @@ { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store uops that miss the STLB. (precise Event)", + "BriefDescription": "Retired store uops that miss the STLB.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -441,7 +441,7 @@ { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load uops with locked access. (precise Event)", + "BriefDescription": "Retired load uops with locked access.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -453,34 +453,32 @@ { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)", + "BriefDescription": "Retired load uops that split across a cacheline boundary.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "Errata": "HSD29, HSM30", - "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)", + "BriefDescription": "Retired store uops that split across a cacheline boundary.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "Errata": "HSD29, HSM30", "L1_Hit_Indication": "1", - "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load uops. (precise Event)", + "BriefDescription": "All retired load uops.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -492,14 +490,13 @@ { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "All retired store uops. (precise Event)", + "BriefDescription": "All retired store uops.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "Errata": "HSD29, HSM30", "L1_Hit_Indication": "1", - "PublicDescription": "This event counts all store uops retired. This is a precise event.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -530,13 +527,13 @@ { "EventCode": "0xD1", "UMask": "0x4", - "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30", - "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.", + "PublicDescription": "Retired load uops with L3 cache hits as data sources.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, @@ -549,19 +546,20 @@ "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "Errata": "HSM30", - "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.", + "PublicDescription": "Retired load uops missed L1 cache as data sources.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x10", - "BriefDescription": "Retired load uops with L2 cache misses as data sources.", + "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "Errata": "HSD29, HSM30", + "PublicDescription": "Retired load uops missed L2. Unknown data source excluded.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, @@ -574,6 +572,7 @@ "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS", "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30", + "PublicDescription": "Retired load uops missed L3. Excludes unknown data source .", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -604,26 +603,24 @@ { "EventCode": "0xD2", "UMask": "0x2", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "Errata": "HSD29, HSD25, HSM26, HSM30", - "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "Errata": "HSD29, HSD25, HSM26, HSM30", - "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, @@ -642,19 +639,20 @@ { "EventCode": "0xD3", "UMask": "0x1", + "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "Errata": "HSD74, HSD29, HSD25, HSM30", - "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x4", - "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)", + "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -666,7 +664,7 @@ { "EventCode": "0xD3", "UMask": "0x10", - "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)", + "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -678,7 +676,7 @@ { "EventCode": "0xD3", "UMask": "0x20", - "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)", + "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -833,7 +831,6 @@ "BriefDescription": "Split locks in SQ", "Counter": "0,1,2,3", "EventName": "SQ_MISC.SPLIT_LOCK", - "PublicDescription": "", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -841,12 +838,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c0001", + "BriefDescription": "Counts demand data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C0001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -854,12 +851,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c0001", + "BriefDescription": "Counts demand data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C0001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -867,12 +864,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c0002", + "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C0002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -880,12 +877,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c0002", + "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C0002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -893,12 +890,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c0004", + "BriefDescription": "Counts all demand code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C0004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -906,12 +903,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c0004", + "BriefDescription": "Counts all demand code reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C0004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -919,12 +916,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3", - "MSRValue": "0x3f803c0010", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3", + "MSRValue": "0x3F803C0010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -932,12 +929,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3", - "MSRValue": "0x3f803c0020", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3", + "MSRValue": "0x3F803C0020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -945,12 +942,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3", - "MSRValue": "0x3f803c0040", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads hit in the L3", + "MSRValue": "0x3F803C0040", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -958,12 +955,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3", - "MSRValue": "0x3f803c0080", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3", + "MSRValue": "0x3F803C0080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -971,12 +968,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3", - "MSRValue": "0x3f803c0100", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3", + "MSRValue": "0x3F803C0100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -984,12 +981,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3", - "MSRValue": "0x3f803c0200", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3", + "MSRValue": "0x3F803C0200", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -997,12 +994,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c0091", + "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C0091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1010,12 +1007,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c0091", + "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C0091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1023,12 +1020,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c0122", + "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C0122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1036,12 +1033,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c0122", + "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C0122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1049,12 +1046,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c0244", + "BriefDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C0244", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1062,12 +1059,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "MSRValue": "0x04003c07f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "MSRValue": "0x04003C07F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1075,12 +1072,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "MSRValue": "0x10003c07f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "MSRValue": "0x10003C07F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1088,12 +1085,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all requests that hit in the L3", - "MSRValue": "0x3f803c8fff", + "BriefDescription": "Counts all requests hit in the L3", + "MSRValue": "0x3F803C8FFF", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all requests hit in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json index 5ab5c78fe580..e5aac148c941 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json @@ -1,158 +1,340 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", + "MetricGroup": "Branches;PGO", + "MetricName": "IpTB" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTB" + }, + { "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", - "MetricGroup": "Frontend", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means loads are more frequent)", + "MetricGroup": "Instruction_Type;L1_Bound", + "MetricName": "IpL" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store", + "MetricGroup": "Instruction_Type;Store_Bound", + "MetricName": "IpS" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch", + "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6", + "MetricName": "IpB" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instruction per (near) call", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)", - "MetricGroup": "Unknown_Branches", - "MetricName": "BAClear_Cost" + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "IpMispredict" }, { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, { - "BriefDescription": "Average CPU Utilization", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricGroup": "TLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, + { + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2HPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { + "MetricExpr": "1000000000 * ( cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x35\\,umask\\=0x3\\,filter_opc\\=0x182@ ) / ( cbox_0@event\\=0x0@ / duration_time )", + "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", + "MetricGroup": "Memory_Lat", + "MetricName": "DRAM_Read_Latency" + }, + { + "MetricExpr": "cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182\\,thresh\\=1@", + "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_Parallel_Reads" + }, + { + "MetricExpr": "cbox_0@event\\=0x0@", + "BriefDescription": "Socket actual clocks when any core is active on that socket", + "MetricGroup": "", + "MetricName": "Socket_CLKS" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/haswellx/memory.json b/tools/perf/pmu-events/arch/x86/haswellx/memory.json index 56b0f24b8029..a42d5ce86b6f 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/memory.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/memory.json @@ -291,7 +291,7 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 4.", + "BriefDescription": "Randomly selected loads with latency value being above 4.", "PEBS": "2", "MSRValue": "0x4", "Counter": "3", @@ -305,7 +305,7 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 8.", + "BriefDescription": "Randomly selected loads with latency value being above 8.", "PEBS": "2", "MSRValue": "0x8", "Counter": "3", @@ -319,7 +319,7 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 16.", + "BriefDescription": "Randomly selected loads with latency value being above 16.", "PEBS": "2", "MSRValue": "0x10", "Counter": "3", @@ -333,7 +333,7 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 32.", + "BriefDescription": "Randomly selected loads with latency value being above 32.", "PEBS": "2", "MSRValue": "0x20", "Counter": "3", @@ -347,7 +347,7 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 64.", + "BriefDescription": "Randomly selected loads with latency value being above 64.", "PEBS": "2", "MSRValue": "0x40", "Counter": "3", @@ -361,7 +361,7 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 128.", + "BriefDescription": "Randomly selected loads with latency value being above 128.", "PEBS": "2", "MSRValue": "0x80", "Counter": "3", @@ -375,7 +375,7 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 256.", + "BriefDescription": "Randomly selected loads with latency value being above 256.", "PEBS": "2", "MSRValue": "0x100", "Counter": "3", @@ -389,7 +389,7 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Loads with latency value being above 512.", + "BriefDescription": "Randomly selected loads with latency value being above 512.", "PEBS": "2", "MSRValue": "0x200", "Counter": "3", @@ -404,12 +404,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that miss in the L3", - "MSRValue": "0x3fbfc00001", + "BriefDescription": "Counts demand data reads miss in the L3", + "MSRValue": "0x3FBFC00001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -417,12 +417,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts demand data reads miss the L3 and the data is returned from local dram", "MSRValue": "0x0600400001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -430,12 +430,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3", - "MSRValue": "0x3fbfc00002", + "BriefDescription": "Counts all demand data writes (RFOs) miss in the L3", + "MSRValue": "0x3FBFC00002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -443,12 +443,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand data writes (RFOs) miss the L3 and the data is returned from local dram", "MSRValue": "0x0600400002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -456,12 +456,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache", - "MSRValue": "0x103fc00002", + "BriefDescription": "Counts all demand data writes (RFOs) miss the L3 and the modified data is transferred from remote cache", + "MSRValue": "0x103FC00002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) miss the L3 and the modified data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -469,12 +469,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that miss in the L3", - "MSRValue": "0x3fbfc00004", + "BriefDescription": "Counts all demand code reads miss in the L3", + "MSRValue": "0x3FBFC00004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -482,12 +482,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand code reads miss the L3 and the data is returned from local dram", "MSRValue": "0x0600400004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -495,12 +495,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3", - "MSRValue": "0x3fbfc00010", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads miss in the L3", + "MSRValue": "0x3FBFC00010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -508,12 +508,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3", - "MSRValue": "0x3fbfc00020", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs miss in the L3", + "MSRValue": "0x3FBFC00020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -521,12 +521,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3", - "MSRValue": "0x3fbfc00040", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads miss in the L3", + "MSRValue": "0x3FBFC00040", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -534,12 +534,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3", - "MSRValue": "0x3fbfc00080", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads miss in the L3", + "MSRValue": "0x3FBFC00080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -547,12 +547,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3", - "MSRValue": "0x3fbfc00100", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3", + "MSRValue": "0x3FBFC00100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -560,12 +560,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3", - "MSRValue": "0x3fbfc00200", + "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3", + "MSRValue": "0x3FBFC00200", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -573,12 +573,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3", - "MSRValue": "0x3fbfc00091", + "BriefDescription": "Counts all demand & prefetch data reads miss in the L3", + "MSRValue": "0x3FBFC00091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -586,12 +586,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram", "MSRValue": "0x0600400091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -599,12 +599,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram", - "MSRValue": "0x063f800091", + "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from remote dram", + "MSRValue": "0x063F800091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from remote dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -612,12 +612,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache", - "MSRValue": "0x103fc00091", + "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the modified data is transferred from remote cache", + "MSRValue": "0x103FC00091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the modified data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -625,12 +625,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache", - "MSRValue": "0x083fc00091", + "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and clean or shared data is transferred from remote cache", + "MSRValue": "0x083FC00091", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and clean or shared data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -638,12 +638,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3", - "MSRValue": "0x3fbfc00122", + "BriefDescription": "Counts all demand & prefetch RFOs miss in the L3", + "MSRValue": "0x3FBFC00122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -651,12 +651,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram", "MSRValue": "0x0600400122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -664,12 +664,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3", - "MSRValue": "0x3fbfc00244", + "BriefDescription": "Counts all demand & prefetch code reads miss in the L3", + "MSRValue": "0x3FBFC00244", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch code reads miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -677,12 +677,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram", + "BriefDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram", "MSRValue": "0x0600400244", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -690,12 +690,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3", - "MSRValue": "0x3fbfc007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss in the L3", + "MSRValue": "0x3FBFC007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -703,12 +703,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram", - "MSRValue": "0x06004007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from local dram", + "MSRValue": "0x06004007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from local dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -716,12 +716,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram", - "MSRValue": "0x063f8007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from remote dram", + "MSRValue": "0x063F8007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from remote dram", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -729,12 +729,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache", - "MSRValue": "0x103fc007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the modified data is transferred from remote cache", + "MSRValue": "0x103FC007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the modified data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -742,12 +742,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache", - "MSRValue": "0x083fc007f7", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and clean or shared data is transferred from remote cache", + "MSRValue": "0x083FC007F7", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and clean or shared data is transferred from remote cache", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -755,12 +755,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all requests that miss in the L3", - "MSRValue": "0x3fbfc08fff", + "BriefDescription": "Counts all requests miss in the L3", + "MSRValue": "0x3FBFC08FFF", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all requests miss in the L3", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json index 8a18bfe9e3e4..26f2888341ee 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json @@ -1,6 +1,5 @@ [ { - "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", "Counter": "Fixed counter 0", @@ -11,7 +10,6 @@ "CounterHTOff": "Fixed counter 0" }, { - "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state.", "Counter": "Fixed counter 1", @@ -21,7 +19,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "Counter": "Fixed counter 1", @@ -31,7 +28,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", "Counter": "Fixed counter 2", @@ -1098,6 +1094,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "UOPS_RETIRED.ALL", + "PublicDescription": "Counts the number of micro-ops retired. Use Cmask=1 and invert to count active cycles or stalled cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1142,6 +1139,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "This event counts the number of retirement slots used each cycle. There are potentially 4 slots that can be used each cycle - meaning, 4 uops or 4 instructions could retire each cycle.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1201,6 +1199,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.CONDITIONAL", + "PublicDescription": "Counts the number of conditional branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1241,6 +1240,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PublicDescription": "Counts the number of near return instructions retired.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1261,6 +1261,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PublicDescription": "Number of near taken branches retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -1312,6 +1313,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PublicDescription": "Number of near branch instructions retired that were taken but mispredicted.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json index 999a01bc6467..5f6cb2abc384 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json @@ -1012,7 +1012,7 @@ "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address ", + "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address", "CounterHTOff": "0,1,2,3" }, { @@ -1036,7 +1036,7 @@ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data reads ", + "BriefDescription": "Counts all demand data reads", "CounterHTOff": "0,1,2,3" }, { @@ -1048,7 +1048,7 @@ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand rfo's ", + "BriefDescription": "Counts all demand rfo's", "CounterHTOff": "0,1,2,3" }, { @@ -1084,7 +1084,7 @@ "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch prefetch RFOs ", + "BriefDescription": "Counts all demand & prefetch prefetch RFOs", "CounterHTOff": "0,1,2,3" }, { @@ -1096,7 +1096,7 @@ "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) ", + "BriefDescription": "Counts all data/code/rfo references (demand & prefetch)", "CounterHTOff": "0,1,2,3" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json index 7c2679514efb..bc4d5fc284a0 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json @@ -1,164 +1,340 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", - "MetricGroup": "Frontend", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", + "MetricGroup": "Branches;PGO", + "MetricName": "IpTB" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTB" + }, + { + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means loads are more frequent)", + "MetricGroup": "Instruction_Type;L1_Bound", + "MetricName": "IpL" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store", + "MetricGroup": "Instruction_Type;Store_Bound", + "MetricName": "IpS" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch", + "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6", + "MetricName": "IpB" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instruction per (near) call", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS", + "MetricName": "FLOPc" + }, + { + "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS_SMT", + "MetricName": "FLOPc_SMT" + }, + { + "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)", - "MetricGroup": "Unknown_Branches", - "MetricName": "BAClear_Cost" + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "IpMispredict" }, { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, { - "BriefDescription": "Average CPU Utilization", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricGroup": "TLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, + { + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2HPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json index 0afbfd95ea30..2a0aad91d83d 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json @@ -1,6 +1,5 @@ [ { - "EventCode": "0x00", "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", @@ -9,7 +8,6 @@ "CounterHTOff": "Fixed counter 0" }, { - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", @@ -19,7 +17,6 @@ }, { "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "AnyThread": "1", @@ -29,7 +26,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json index 7c2679514efb..f3874b5f9995 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json @@ -1,164 +1,346 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", - "MetricGroup": "Frontend", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", + "MetricGroup": "Branches;PGO", + "MetricName": "IpTB" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTB" + }, + { + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means loads are more frequent)", + "MetricGroup": "Instruction_Type;L1_Bound", + "MetricName": "IpL" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store", + "MetricGroup": "Instruction_Type;Store_Bound", + "MetricName": "IpS" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch", + "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6", + "MetricName": "IpB" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instruction per (near) call", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS", + "MetricName": "FLOPc" + }, + { + "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS_SMT", + "MetricName": "FLOPc_SMT" + }, + { + "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)", - "MetricGroup": "Unknown_Branches", - "MetricName": "BAClear_Cost" + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "IpMispredict" }, { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, { - "BriefDescription": "Average CPU Utilization", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricGroup": "TLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, + { + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2HPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { + "MetricExpr": "cbox_0@event\\=0x0@", + "BriefDescription": "Socket actual clocks when any core is active on that socket", + "MetricGroup": "", + "MetricName": "Socket_CLKS" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json index 0afbfd95ea30..2a0aad91d83d 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json @@ -1,6 +1,5 @@ [ { - "EventCode": "0x00", "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", @@ -9,7 +8,6 @@ "CounterHTOff": "Fixed counter 0" }, { - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", @@ -19,7 +17,6 @@ }, { "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "AnyThread": "1", @@ -29,7 +26,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/cache.json b/tools/perf/pmu-events/arch/x86/jaketown/cache.json index ee22e4a5e30d..52dc6ef40e63 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/cache.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/cache.json @@ -31,7 +31,7 @@ }, { "PEBS": "1", - "PublicDescription": "This event counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x41", @@ -42,7 +42,7 @@ }, { "PEBS": "1", - "PublicDescription": "This event counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x42", @@ -179,7 +179,7 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts L1D data line replacements. Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier. ", + "PublicDescription": "This event counts L1D data line replacements. Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier.", "EventCode": "0x51", "Counter": "0,1,2,3", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json index fd7d7c438226..98c73e430b05 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json @@ -1,140 +1,232 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", - "MetricGroup": "Frontend", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS", + "MetricName": "FLOPc" + }, + { + "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS_SMT", + "MetricName": "FLOPc_SMT" + }, + { + "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Average CPU Utilization", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { + "MetricExpr": "cbox_0@event\\=0x0@", + "BriefDescription": "Socket actual clocks when any core is active on that socket", + "MetricGroup": "", + "MetricName": "Socket_CLKS" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json b/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json index 34a519d9bfa0..783a5b4a67b1 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json @@ -1,7 +1,6 @@ [ { - "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. ", - "EventCode": "0x00", + "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers.", "Counter": "Fixed counter 1", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", @@ -10,8 +9,7 @@ "CounterHTOff": "Fixed counter 1" }, { - "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ", - "EventCode": "0x00", + "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "Counter": "Fixed counter 2", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", @@ -20,8 +18,7 @@ "CounterHTOff": "Fixed counter 2" }, { - "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ", - "EventCode": "0x00", + "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "Counter": "Fixed counter 3", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", @@ -778,7 +775,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.", + "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.", "EventCode": "0x03", "Counter": "0,1,2,3", "UMask": "0x2", @@ -1098,7 +1095,6 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", "Counter": "Fixed counter 2", "UMask": "0x2", "AnyThread": "1", diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/cache.json b/tools/perf/pmu-events/arch/x86/knightslanding/cache.json index e434ec723001..e847b0fd696d 100644 --- a/tools/perf/pmu-events/arch/x86/knightslanding/cache.json +++ b/tools/perf/pmu-events/arch/x86/knightslanding/cache.json @@ -32,16 +32,16 @@ "BriefDescription": "Counts the number of L2 cache misses" }, { - "PublicDescription": "This event counts the number of core cycles the fetch stalls because of an icache miss. This is a cumulative count of cycles the NIP stalled for all icache misses. ", + "PublicDescription": "This event counts the number of core cycles the fetch stalls because of an icache miss. This is a cumulative count of cycles the NIP stalled for all icache misses.", "EventCode": "0x86", "Counter": "0,1", "UMask": "0x4", "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES", "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of core cycles the fetch stalls because of an icache miss. This is a cummulative count of core cycles the fetch stalled for all icache misses. " + "BriefDescription": "Counts the number of core cycles the fetch stalls because of an icache miss. This is a cummulative count of core cycles the fetch stalled for all icache misses." }, { - "PublicDescription": "This event counts the number of load micro-ops retired that miss in L1 Data cache. Note that prefetch misses will not be counted. ", + "PublicDescription": "This event counts the number of load micro-ops retired that miss in L1 Data cache. Note that prefetch misses will not be counted.", "EventCode": "0x04", "Counter": "0,1", "UMask": "0x1", @@ -115,29 +115,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000070 ", + "MSRValue": "0x4000000070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Prefetch requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts any Prefetch requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400070 ", + "MSRValue": "0x1000400070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400070 ", + "MSRValue": "0x0800400070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE_E_F", @@ -148,29 +148,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080070 ", + "MSRValue": "0x1000080070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080070 ", + "MSRValue": "0x0800080070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010070 ", + "MSRValue": "0x0000010070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.ANY_RESPONSE", @@ -181,29 +181,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x40000032f7 ", + "MSRValue": "0x40000032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts any Read request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x10004032f7 ", + "MSRValue": "0x10004032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x08004032f7 ", + "MSRValue": "0x08004032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE_E_F", @@ -214,29 +214,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x10000832f7 ", + "MSRValue": "0x10000832f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x08000832f7 ", + "MSRValue": "0x08000832f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x00000132f7 ", + "MSRValue": "0x00000132f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE", @@ -247,29 +247,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000044 ", + "MSRValue": "0x4000000044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400044 ", + "MSRValue": "0x1000400044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400044 ", + "MSRValue": "0x0800400044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE_E_F", @@ -280,29 +280,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080044 ", + "MSRValue": "0x1000080044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080044 ", + "MSRValue": "0x0800080044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010044 ", + "MSRValue": "0x0000010044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.ANY_RESPONSE", @@ -313,29 +313,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000022 ", + "MSRValue": "0x4000000022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts Demand cacheable data write requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400022 ", + "MSRValue": "0x1000400022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400022 ", + "MSRValue": "0x0800400022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE_E_F", @@ -346,29 +346,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080022 ", + "MSRValue": "0x1000080022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080022 ", + "MSRValue": "0x0800080022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010022 ", + "MSRValue": "0x0000010022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE", @@ -379,29 +379,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000003091 ", + "MSRValue": "0x4000003091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000403091 ", + "MSRValue": "0x1000403091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800403091 ", + "MSRValue": "0x0800403091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE_E_F", @@ -412,29 +412,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000083091 ", + "MSRValue": "0x1000083091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800083091 ", + "MSRValue": "0x0800083091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000013091 ", + "MSRValue": "0x0000013091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE", @@ -445,29 +445,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000008000 ", + "MSRValue": "0x4000008000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts any request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000408000 ", + "MSRValue": "0x1000408000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800408000 ", + "MSRValue": "0x0800408000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE_E_F", @@ -478,29 +478,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000088000 ", + "MSRValue": "0x1000088000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800088000 ", + "MSRValue": "0x0800088000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000018000 ", + "MSRValue": "0x0000018000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE", @@ -511,7 +511,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0000014800 ", + "MSRValue": "0x0000014800", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", @@ -522,7 +522,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0000014000 ", + "MSRValue": "0x0000014000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE", @@ -533,29 +533,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000002000 ", + "MSRValue": "0x4000002000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts L1 data HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000402000 ", + "MSRValue": "0x1000402000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800402000 ", + "MSRValue": "0x0800402000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE_E_F", @@ -566,29 +566,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000082000 ", + "MSRValue": "0x1000082000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800082000 ", + "MSRValue": "0x0800082000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000012000 ", + "MSRValue": "0x0000012000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE", @@ -599,29 +599,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000001000 ", + "MSRValue": "0x4000001000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts Software Prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000401000 ", + "MSRValue": "0x1000401000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800401000 ", + "MSRValue": "0x0800401000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE_E_F", @@ -632,29 +632,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000081000 ", + "MSRValue": "0x1000081000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800081000 ", + "MSRValue": "0x0800081000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000011000 ", + "MSRValue": "0x0000011000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.ANY_RESPONSE", @@ -665,7 +665,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0000010800 ", + "MSRValue": "0x0000010800", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE", @@ -676,29 +676,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000400 ", + "MSRValue": "0x4000000400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts Bus locks and split lock requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400400 ", + "MSRValue": "0x1000400400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400400 ", + "MSRValue": "0x0800400400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE_E_F", @@ -709,29 +709,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080400 ", + "MSRValue": "0x1000080400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080400 ", + "MSRValue": "0x0800080400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010400 ", + "MSRValue": "0x0000010400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE", @@ -742,29 +742,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000200 ", + "MSRValue": "0x4000000200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400200 ", + "MSRValue": "0x1000400200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400200 ", + "MSRValue": "0x0800400200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_FAR_TILE_E_F", @@ -775,29 +775,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080200 ", + "MSRValue": "0x1000080200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080200 ", + "MSRValue": "0x0800080200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010200 ", + "MSRValue": "0x0000010200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.ANY_RESPONSE", @@ -808,18 +808,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000400100 ", + "MSRValue": "0x1000400100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400100 ", + "MSRValue": "0x0800400100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE_E_F", @@ -830,29 +830,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080100 ", + "MSRValue": "0x1000080100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080100 ", + "MSRValue": "0x0800080100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010100 ", + "MSRValue": "0x0000010100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE", @@ -863,29 +863,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000080 ", + "MSRValue": "0x4000000080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400080 ", + "MSRValue": "0x1000400080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400080 ", + "MSRValue": "0x0800400080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE_E_F", @@ -896,29 +896,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080080 ", + "MSRValue": "0x1000080080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080080 ", + "MSRValue": "0x0800080080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010080 ", + "MSRValue": "0x0000010080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE", @@ -929,29 +929,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000040 ", + "MSRValue": "0x4000000040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 code HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts L2 code HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400040 ", + "MSRValue": "0x1000400040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400040 ", + "MSRValue": "0x0800400040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE_E_F", @@ -962,29 +962,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080040 ", + "MSRValue": "0x1000080040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080040 ", + "MSRValue": "0x0800080040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010040 ", + "MSRValue": "0x0000010040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.ANY_RESPONSE", @@ -995,18 +995,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000400020 ", + "MSRValue": "0x1000400020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400020 ", + "MSRValue": "0x0800400020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_FAR_TILE_E_F", @@ -1017,29 +1017,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080020 ", + "MSRValue": "0x1000080020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080020 ", + "MSRValue": "0x0800080020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000020020 ", + "MSRValue": "0x0000020020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE", @@ -1050,7 +1050,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0000010020 ", + "MSRValue": "0x0000010020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", @@ -1061,29 +1061,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000004 ", + "MSRValue": "0x4000000004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts demand code reads and prefetch code reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400004 ", + "MSRValue": "0x1000400004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400004 ", + "MSRValue": "0x0800400004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE_E_F", @@ -1094,29 +1094,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080004 ", + "MSRValue": "0x1000080004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080004 ", + "MSRValue": "0x0800080004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010004 ", + "MSRValue": "0x0000010004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", @@ -1127,29 +1127,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000002 ", + "MSRValue": "0x4000000002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts Demand cacheable data writes that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400002 ", + "MSRValue": "0x1000400002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400002 ", + "MSRValue": "0x0800400002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE_E_F", @@ -1160,29 +1160,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080002 ", + "MSRValue": "0x1000080002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080002 ", + "MSRValue": "0x0800080002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010002 ", + "MSRValue": "0x0000010002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", @@ -1193,29 +1193,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x4000000001 ", + "MSRValue": "0x4000000001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING", "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1000400001 ", + "MSRValue": "0x1000400001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_FAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800400001 ", + "MSRValue": "0x0800400001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_FAR_TILE_E_F", @@ -1226,29 +1226,29 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1000080001 ", + "MSRValue": "0x1000080001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_NEAR_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0800080001 ", + "MSRValue": "0x0800080001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_NEAR_TILE_E_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0000010001 ", + "MSRValue": "0x0000010001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", @@ -1259,722 +1259,722 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0002000001 ", + "MSRValue": "0x0002000001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000002 ", + "MSRValue": "0x0002000002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000004 ", + "MSRValue": "0x0002000004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000020 ", + "MSRValue": "0x0002000020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000080 ", + "MSRValue": "0x0002000080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000100 ", + "MSRValue": "0x0002000100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000200 ", + "MSRValue": "0x0002000200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000400 ", + "MSRValue": "0x0002000400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002001000 ", + "MSRValue": "0x0002001000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002002000 ", + "MSRValue": "0x0002002000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002008000 ", + "MSRValue": "0x0002008000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002003091 ", + "MSRValue": "0x0002003091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000022 ", + "MSRValue": "0x0002000022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000044 ", + "MSRValue": "0x0002000044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x00020032f7 ", + "MSRValue": "0x00020032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0002000070 ", + "MSRValue": "0x0002000070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_M", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in M state ", + "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in M state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000001 ", + "MSRValue": "0x0004000001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000002 ", + "MSRValue": "0x0004000002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000004 ", + "MSRValue": "0x0004000004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000020 ", + "MSRValue": "0x0004000020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000040 ", + "MSRValue": "0x0004000040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000080 ", + "MSRValue": "0x0004000080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000100 ", + "MSRValue": "0x0004000100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000200 ", + "MSRValue": "0x0004000200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000400 ", + "MSRValue": "0x0004000400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004001000 ", + "MSRValue": "0x0004001000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004002000 ", + "MSRValue": "0x0004002000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004008000 ", + "MSRValue": "0x0004008000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004003091 ", + "MSRValue": "0x0004003091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000022 ", + "MSRValue": "0x0004000022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000044 ", + "MSRValue": "0x0004000044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x00040032f7 ", + "MSRValue": "0x00040032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0004000070 ", + "MSRValue": "0x0004000070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_E", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in E state ", + "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in E state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000001 ", + "MSRValue": "0x0008000001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000002 ", + "MSRValue": "0x0008000002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000004 ", + "MSRValue": "0x0008000004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000020 ", + "MSRValue": "0x0008000020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000080 ", + "MSRValue": "0x0008000080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000100 ", + "MSRValue": "0x0008000100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000200 ", + "MSRValue": "0x0008000200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000400 ", + "MSRValue": "0x0008000400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008001000 ", + "MSRValue": "0x0008001000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008002000 ", + "MSRValue": "0x0008002000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008008000 ", + "MSRValue": "0x0008008000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008003091 ", + "MSRValue": "0x0008003091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000022 ", + "MSRValue": "0x0008000022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0008000044 ", + "MSRValue": "0x0008000044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x00080032f7 ", + "MSRValue": "0x00080032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_S", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in S state ", + "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in S state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000001 ", + "MSRValue": "0x0010000001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000002 ", + "MSRValue": "0x0010000002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000004 ", + "MSRValue": "0x0010000004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000020 ", + "MSRValue": "0x0010000020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000040 ", + "MSRValue": "0x0010000040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000080 ", + "MSRValue": "0x0010000080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000100 ", + "MSRValue": "0x0010000100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000200 ", + "MSRValue": "0x0010000200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000400 ", + "MSRValue": "0x0010000400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010001000 ", + "MSRValue": "0x0010001000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010002000 ", + "MSRValue": "0x0010002000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010008000 ", + "MSRValue": "0x0010008000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010003091 ", + "MSRValue": "0x0010003091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000022 ", + "MSRValue": "0x0010000022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000044 ", + "MSRValue": "0x0010000044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x00100032f7 ", + "MSRValue": "0x00100032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0010000070 ", + "MSRValue": "0x0010000070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_F", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in F state ", + "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in F state", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x1800180002 ", + "MSRValue": "0x1800180002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE", @@ -1985,7 +1985,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180004 ", + "MSRValue": "0x1800180004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE", @@ -1996,7 +1996,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180020 ", + "MSRValue": "0x1800180020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE", @@ -2007,7 +2007,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180040 ", + "MSRValue": "0x1800180040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE", @@ -2018,7 +2018,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180080 ", + "MSRValue": "0x1800180080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE", @@ -2029,7 +2029,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180100 ", + "MSRValue": "0x1800180100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE", @@ -2040,7 +2040,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180200 ", + "MSRValue": "0x1800180200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE", @@ -2051,7 +2051,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180400 ", + "MSRValue": "0x1800180400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE", @@ -2062,7 +2062,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800181000 ", + "MSRValue": "0x1800181000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE", @@ -2073,7 +2073,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800182000 ", + "MSRValue": "0x1800182000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE", @@ -2084,7 +2084,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800188000 ", + "MSRValue": "0x1800188000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE", @@ -2095,7 +2095,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800183091 ", + "MSRValue": "0x1800183091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE", @@ -2106,7 +2106,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180022 ", + "MSRValue": "0x1800180022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE", @@ -2117,7 +2117,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180044 ", + "MSRValue": "0x1800180044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE", @@ -2128,7 +2128,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x18001832f7 ", + "MSRValue": "0x18001832f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE", @@ -2139,7 +2139,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800180070 ", + "MSRValue": "0x1800180070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE", @@ -2150,7 +2150,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800400002 ", + "MSRValue": "0x1800400002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE", @@ -2161,7 +2161,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800400004 ", + "MSRValue": "0x1800400004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE", @@ -2172,7 +2172,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800400040 ", + "MSRValue": "0x1800400040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE", @@ -2183,7 +2183,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800400080 ", + "MSRValue": "0x1800400080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE", @@ -2194,7 +2194,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800400100 ", + "MSRValue": "0x1800400100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE", @@ -2205,7 +2205,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800400400 ", + "MSRValue": "0x1800400400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE", @@ -2216,7 +2216,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800401000 ", + "MSRValue": "0x1800401000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE", @@ -2227,7 +2227,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800402000 ", + "MSRValue": "0x1800402000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE", @@ -2238,7 +2238,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800408000 ", + "MSRValue": "0x1800408000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE", @@ -2249,7 +2249,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800403091 ", + "MSRValue": "0x1800403091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE", @@ -2260,7 +2260,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800400022 ", + "MSRValue": "0x1800400022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE", @@ -2271,7 +2271,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800400044 ", + "MSRValue": "0x1800400044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE", @@ -2282,7 +2282,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x18004032f7 ", + "MSRValue": "0x18004032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE", @@ -2293,7 +2293,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x1800400070 ", + "MSRValue": "0x1800400070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE", diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/memory.json b/tools/perf/pmu-events/arch/x86/knightslanding/memory.json index 700652566200..c6bb16ba0f86 100644 --- a/tools/perf/pmu-events/arch/x86/knightslanding/memory.json +++ b/tools/perf/pmu-events/arch/x86/knightslanding/memory.json @@ -9,18 +9,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400070 ", + "MSRValue": "0x0100400070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Prefetch requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts any Prefetch requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200070 ", + "MSRValue": "0x0080200070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM_NEAR", @@ -31,18 +31,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000070 ", + "MSRValue": "0x0101000070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Prefetch requests that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts any Prefetch requests that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800070 ", + "MSRValue": "0x0080800070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.DDR_NEAR", @@ -53,18 +53,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x01004032f7 ", + "MSRValue": "0x01004032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts any Read request that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x00802032f7 ", + "MSRValue": "0x00802032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM_NEAR", @@ -75,18 +75,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x01010032f7 ", + "MSRValue": "0x01010032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any Read request that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts any Read request that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x00808032f7 ", + "MSRValue": "0x00808032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR_NEAR", @@ -97,18 +97,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400044 ", + "MSRValue": "0x0100400044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200044 ", + "MSRValue": "0x0080200044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM_NEAR", @@ -119,18 +119,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000044 ", + "MSRValue": "0x0101000044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800044 ", + "MSRValue": "0x0080800044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR_NEAR", @@ -141,18 +141,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400022 ", + "MSRValue": "0x0100400022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts Demand cacheable data write requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200022 ", + "MSRValue": "0x0080200022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM_NEAR", @@ -163,18 +163,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000022 ", + "MSRValue": "0x0101000022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data write requests that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts Demand cacheable data write requests that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800022 ", + "MSRValue": "0x0080800022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR_NEAR", @@ -185,18 +185,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100403091 ", + "MSRValue": "0x0100403091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080203091 ", + "MSRValue": "0x0080203091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM_NEAR", @@ -207,18 +207,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101003091 ", + "MSRValue": "0x0101003091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080803091 ", + "MSRValue": "0x0080803091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR_NEAR", @@ -229,18 +229,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100408000 ", + "MSRValue": "0x0100408000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts any request that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080208000 ", + "MSRValue": "0x0080208000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM_NEAR", @@ -251,18 +251,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101008000 ", + "MSRValue": "0x0101008000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any request that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts any request that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080808000 ", + "MSRValue": "0x0080808000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR_NEAR", @@ -273,18 +273,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100402000 ", + "MSRValue": "0x0100402000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080202000 ", + "MSRValue": "0x0080202000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.MCDRAM_NEAR", @@ -295,18 +295,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101002000 ", + "MSRValue": "0x0101002000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080802000 ", + "MSRValue": "0x0080802000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR_NEAR", @@ -317,18 +317,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100401000 ", + "MSRValue": "0x0100401000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts Software Prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080201000 ", + "MSRValue": "0x0080201000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM_NEAR", @@ -339,18 +339,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101001000 ", + "MSRValue": "0x0101001000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Software Prefetches that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts Software Prefetches that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080801000 ", + "MSRValue": "0x0080801000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR_NEAR", @@ -361,18 +361,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400400 ", + "MSRValue": "0x0100400400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200400 ", + "MSRValue": "0x0080200400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM_NEAR", @@ -383,18 +383,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000400 ", + "MSRValue": "0x0101000400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800400 ", + "MSRValue": "0x0080800400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR_NEAR", @@ -405,18 +405,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400200 ", + "MSRValue": "0x0100400200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200200 ", + "MSRValue": "0x0080200200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM_NEAR", @@ -427,18 +427,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000200 ", + "MSRValue": "0x0101000200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800200 ", + "MSRValue": "0x0080800200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR_NEAR", @@ -449,18 +449,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400100 ", + "MSRValue": "0x0100400100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM_FAR", "MSRIndex": "0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200100 ", + "MSRValue": "0x0080200100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM_NEAR", @@ -471,18 +471,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000100 ", + "MSRValue": "0x0101000100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.DDR_FAR", "MSRIndex": "0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800100 ", + "MSRValue": "0x0080800100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.DDR_NEAR", @@ -493,7 +493,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x2000020080 ", + "MSRValue": "0x2000020080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.NON_DRAM", @@ -504,18 +504,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400080 ", + "MSRValue": "0x0100400080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200080 ", + "MSRValue": "0x0080200080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM_NEAR", @@ -526,18 +526,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000080 ", + "MSRValue": "0x0101000080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800080 ", + "MSRValue": "0x0080800080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR_NEAR", @@ -548,18 +548,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400040 ", + "MSRValue": "0x0100400040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200040 ", + "MSRValue": "0x0080200040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.MCDRAM_NEAR", @@ -570,18 +570,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000040 ", + "MSRValue": "0x0101000040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800040 ", + "MSRValue": "0x0080800040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR_NEAR", @@ -592,7 +592,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x2000020020 ", + "MSRValue": "0x2000020020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.NON_DRAM", @@ -603,18 +603,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400020 ", + "MSRValue": "0x0100400020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200020 ", + "MSRValue": "0x0080200020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM_NEAR", @@ -625,18 +625,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000020 ", + "MSRValue": "0x0101000020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800020 ", + "MSRValue": "0x0080800020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR_NEAR", @@ -647,18 +647,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400004 ", + "MSRValue": "0x0100400004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200004 ", + "MSRValue": "0x0080200004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM_NEAR", @@ -669,18 +669,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000004 ", + "MSRValue": "0x0101000004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800004 ", + "MSRValue": "0x0080800004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR_NEAR", @@ -691,18 +691,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400002 ", + "MSRValue": "0x0100400002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200002 ", + "MSRValue": "0x0080200002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM_NEAR", @@ -713,18 +713,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000002 ", + "MSRValue": "0x0101000002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800002 ", + "MSRValue": "0x0080800002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR_NEAR", @@ -735,18 +735,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0100400001 ", + "MSRValue": "0x0100400001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080200001 ", + "MSRValue": "0x0080200001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM_NEAR", @@ -757,18 +757,18 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0101000001 ", + "MSRValue": "0x0101000001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR_FAR", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from DRAM Far. ", + "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from DRAM Far.", "Offcore": "1" }, { "EventCode": "0xB7", - "MSRValue": "0x0080800001 ", + "MSRValue": "0x0080800001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR_NEAR", @@ -779,7 +779,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600001 ", + "MSRValue": "0x0180600001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM", @@ -790,7 +790,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600002 ", + "MSRValue": "0x0180600002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM", @@ -801,7 +801,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600004 ", + "MSRValue": "0x0180600004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM", @@ -812,7 +812,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600020 ", + "MSRValue": "0x0180600020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM", @@ -823,7 +823,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600080 ", + "MSRValue": "0x0180600080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM", @@ -834,7 +834,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600100 ", + "MSRValue": "0x0180600100", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM", @@ -845,7 +845,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600200 ", + "MSRValue": "0x0180600200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM", @@ -856,7 +856,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600400 ", + "MSRValue": "0x0180600400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM", @@ -867,7 +867,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180601000 ", + "MSRValue": "0x0180601000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM", @@ -878,7 +878,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180608000 ", + "MSRValue": "0x0180608000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM", @@ -889,7 +889,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180603091 ", + "MSRValue": "0x0180603091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM", @@ -900,7 +900,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600022 ", + "MSRValue": "0x0180600022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM", @@ -911,7 +911,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600044 ", + "MSRValue": "0x0180600044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM", @@ -922,7 +922,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x01806032f7 ", + "MSRValue": "0x01806032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM", @@ -933,7 +933,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0180600070 ", + "MSRValue": "0x0180600070", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM", @@ -944,7 +944,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800001 ", + "MSRValue": "0x0181800001", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR", @@ -955,7 +955,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800002 ", + "MSRValue": "0x0181800002", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR", @@ -966,7 +966,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800004 ", + "MSRValue": "0x0181800004", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR", @@ -977,7 +977,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800020 ", + "MSRValue": "0x0181800020", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR", @@ -988,7 +988,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800040 ", + "MSRValue": "0x0181800040", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR", @@ -999,7 +999,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800080 ", + "MSRValue": "0x0181800080", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR", @@ -1010,7 +1010,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800200 ", + "MSRValue": "0x0181800200", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR", @@ -1021,7 +1021,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800400 ", + "MSRValue": "0x0181800400", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR", @@ -1032,7 +1032,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181801000 ", + "MSRValue": "0x0181801000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR", @@ -1043,7 +1043,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181802000 ", + "MSRValue": "0x0181802000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR", @@ -1054,7 +1054,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181808000 ", + "MSRValue": "0x0181808000", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR", @@ -1065,7 +1065,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181803091 ", + "MSRValue": "0x0181803091", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR", @@ -1076,7 +1076,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800022 ", + "MSRValue": "0x0181800022", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR", @@ -1087,7 +1087,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x0181800044 ", + "MSRValue": "0x0181800044", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR", @@ -1098,7 +1098,7 @@ }, { "EventCode": "0xB7", - "MSRValue": "0x01818032f7 ", + "MSRValue": "0x01818032f7", "Counter": "0,1", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR", diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json b/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json index bb5494cfb5ae..92e4ef2e22c6 100644 --- a/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json @@ -144,7 +144,7 @@ "BriefDescription": "Counts the number of micro-ops retired that are from the complex flows issued by the micro-sequencer (MS)." }, { - "PublicDescription": "This event counts the number of micro-ops (uops) retired. The processor decodes complex macro instructions into a sequence of simpler uops. Most instructions are composed of one or two uops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. ", + "PublicDescription": "This event counts the number of micro-ops (uops) retired. The processor decodes complex macro instructions into a sequence of simpler uops. Most instructions are composed of one or two uops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists.", "EventCode": "0xC2", "Counter": "0,1", "UMask": "0x10", @@ -218,7 +218,7 @@ "UMask": "0x20", "EventName": "NO_ALLOC_CYCLES.RAT_STALL", "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and a RATstall (caused by reservation station full) is asserted. " + "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and a RATstall (caused by reservation station full) is asserted." }, { "PublicDescription": "This event counts the number of core cycles when no uops are allocated, the instruction queue is empty and the alloc pipe is stalled waiting for instructions to be fetched.", @@ -251,7 +251,7 @@ "UMask": "0x1f", "EventName": "RS_FULL_STALL.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Counts the total number of core cycles the Alloc pipeline is stalled when any one of the reservation stations is full. " + "BriefDescription": "Counts the total number of core cycles the Alloc pipeline is stalled when any one of the reservation stations is full." }, { "EventCode": "0xC0", @@ -268,11 +268,10 @@ "UMask": "0x1", "EventName": "CYCLES_DIV_BUSY.ALL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles the number of core cycles when divider is busy. Does not imply a stall waiting for the divider. " + "BriefDescription": "Cycles the number of core cycles when divider is busy. Does not imply a stall waiting for the divider." }, { "PublicDescription": "This event counts the number of instructions that retire. For instructions that consist of multiple micro-ops, this event counts exactly once, as the last micro-op of the instruction retires. The event continues counting while instructions retire, including during interrupt service routines caused by hardware interrupts, faults or traps.", - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", @@ -296,8 +295,7 @@ "BriefDescription": "Counts the number of unhalted reference clock cycles" }, { - "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter\r\n", - "EventCode": "0x00", + "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter", "Counter": "Fixed counter 2", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", @@ -305,7 +303,6 @@ "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles" }, { - "EventCode": "0x00", "Counter": "Fixed counter 3", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", @@ -343,7 +340,7 @@ "UMask": "0x1", "EventName": "RECYCLEQ.LD_BLOCK_ST_FORWARD", "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of occurences a retired load gets blocked because its address partially overlaps with a store ", + "BriefDescription": "Counts the number of occurences a retired load gets blocked because its address partially overlaps with a store", "Data_LA": "1" }, { diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json b/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json index f31594507f8c..9e493977771f 100644 --- a/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json @@ -36,7 +36,7 @@ "EdgeDetect": "1" }, { - "PublicDescription": "This event counts every cycle when an I-side (walks due to an instruction fetch) page walk is in progress. ", + "PublicDescription": "This event counts every cycle when an I-side (walks due to an instruction fetch) page walk is in progress.", "EventCode": "0x05", "Counter": "0,1", "UMask": "0x2", diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/cache.json b/tools/perf/pmu-events/arch/x86/sandybridge/cache.json index 16b04a20bc12..bb79e89c2049 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/cache.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/cache.json @@ -1,207 +1,200 @@ [ { - "PEBS": "1", - "EventCode": "0xD0", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x11", - "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that miss the STLB.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests that hit L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xD0", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x12", - "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that miss the STLB.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x3", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xD0", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops with locked access.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that hit L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", - "EventCode": "0xD0", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8", + "EventName": "L2_RQSTS.RFO_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that miss L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", - "EventCode": "0xD0", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", - "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", - "CounterHTOff": "0,1,2,3" + "UMask": "0xc", + "EventName": "L2_RQSTS.ALL_RFO", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests to L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts the number of load uops retired", - "EventCode": "0xD0", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "SampleAfterValue": "2000003", - "BriefDescription": "All retired load uops.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x10", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts the number of store uops retired.", - "EventCode": "0xD0", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "SampleAfterValue": "2000003", - "BriefDescription": "All retired store uops.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x20", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache misses when fetching instructions.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xD1", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Retired load uops with L1 cache hits as data sources.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x30", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "SampleAfterValue": "200003", + "BriefDescription": "L2 code requests.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xD1", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops with L2 cache hits as data sources.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x40", + "EventName": "L2_RQSTS.PF_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts retired load uops that hit in the last-level (L3) cache without snoops required.", - "EventCode": "0xD1", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT", - "SampleAfterValue": "50021", - "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.", - "CounterHTOff": "0,1,2,3" + "UMask": "0x80", + "EventName": "L2_RQSTS.PF_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xD1", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", - "CounterHTOff": "0,1,2,3" + "UMask": "0xc0", + "EventName": "L2_RQSTS.ALL_PF", + "SampleAfterValue": "200003", + "BriefDescription": "Requests from L2 hardware prefetchers.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xD2", + "EventCode": "0x27", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.", - "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package). Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line. In this case, a snoop was required, and another L2 had the line in a non-modified state.", - "EventCode": "0xD2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.", - "CounterHTOff": "0,1,2,3" + "EventName": "L2_STORE_LOCK_RQSTS.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "RFOs that miss cache lines.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package). Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line. In this case, a snoop was required, and another L2 had the line in a modified state, so the line had to be invalidated in that L2 cache and transferred to the requesting L2.", - "EventCode": "0xD2", + "EventCode": "0x27", "Counter": "0,1,2,3", "UMask": "0x4", - "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.", - "CounterHTOff": "0,1,2,3" + "EventName": "L2_STORE_LOCK_RQSTS.HIT_E", + "SampleAfterValue": "200003", + "BriefDescription": "RFOs that hit cache lines in E state.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xD2", + "EventCode": "0x27", "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.", - "CounterHTOff": "0,1,2,3" + "EventName": "L2_STORE_LOCK_RQSTS.HIT_M", + "SampleAfterValue": "200003", + "BriefDescription": "RFOs that hit cache lines in M state.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts retired demand loads that missed the last-level (L3) cache. This means that the load is usually satisfied from memory in a client system or possibly from the remote socket in a server. Demand loads are non speculative load uops.", - "EventCode": "0xD4", + "EventCode": "0x27", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops with unknown information as data source in cache serviced the load.", - "CounterHTOff": "0,1,2,3" + "UMask": "0xf", + "EventName": "L2_STORE_LOCK_RQSTS.ALL", + "SampleAfterValue": "200003", + "BriefDescription": "RFOs that access cache lines in any state.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts L1D data line replacements. Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier. ", - "EventCode": "0x51", + "EventCode": "0x28", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "L1D.REPLACEMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D data line replacements.", + "EventName": "L2_L1D_WB_RQSTS.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x51", + "EventCode": "0x28", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "L1D.ALLOCATED_IN_M", - "SampleAfterValue": "2000003", - "BriefDescription": "Allocated L1D data cache lines in M state.", + "EventName": "L2_L1D_WB_RQSTS.HIT_S", + "SampleAfterValue": "200003", + "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in S state.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x51", + "EventCode": "0x28", "Counter": "0,1,2,3", "UMask": "0x4", - "EventName": "L1D.EVICTION", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D data cache lines in M state evicted due to replacement.", + "EventName": "L2_L1D_WB_RQSTS.HIT_E", + "SampleAfterValue": "200003", + "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x51", + "EventCode": "0x28", "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "L1D.ALL_M_REPLACEMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line replacement.", + "EventName": "L2_L1D_WB_RQSTS.HIT_M", + "SampleAfterValue": "200003", + "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x28", + "Counter": "0,1,2,3", + "UMask": "0xf", + "EventName": "L2_L1D_WB_RQSTS.ALL", + "SampleAfterValue": "200003", + "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x2E", + "Counter": "0,1,2,3", + "UMask": "0x41", + "EventName": "LONGEST_LAT_CACHE.MISS", + "SampleAfterValue": "100003", + "BriefDescription": "Core-originated cacheable demand requests missed LLC.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x2E", + "Counter": "0,1,2,3", + "UMask": "0x4f", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "SampleAfterValue": "100003", + "BriefDescription": "Core-originated cacheable demand requests that refer to LLC.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -224,12 +217,61 @@ "CounterHTOff": "2" }, { - "EventCode": "0x63", + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION", + "EventName": "L1D_PEND_MISS.FB_FULL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when L1D is locked.", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts L1D data line replacements. Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier.", + "EventCode": "0x51", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "L1D.REPLACEMENT", + "SampleAfterValue": "2000003", + "BriefDescription": "L1D data line replacements.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x51", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "L1D.ALLOCATED_IN_M", + "SampleAfterValue": "2000003", + "BriefDescription": "Allocated L1D data cache lines in M state.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x51", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "L1D.EVICTION", + "SampleAfterValue": "2000003", + "BriefDescription": "L1D data cache lines in M state evicted due to replacement.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x51", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "L1D.ALL_M_REPLACEMENT", + "SampleAfterValue": "2000003", + "BriefDescription": "Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line replacement.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -254,6 +296,16 @@ { "EventCode": "0x60", "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_C6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", "SampleAfterValue": "2000003", @@ -263,6 +315,16 @@ { "EventCode": "0x60", "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "SampleAfterValue": "2000003", @@ -280,6 +342,15 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "EventCode": "0x63", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when L1D is locked.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "EventCode": "0xB0", "Counter": "0,1,2,3", "UMask": "0x1", @@ -325,148 +396,182 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x24", + "EventCode": "0xBF", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests that hit L2 cache.", + "UMask": "0x5", + "EventName": "L1D_BLOCKS.BANK_CONFLICT_CYCLES", + "SampleAfterValue": "100003", + "BriefDescription": "Cycles when dispatched loads are cancelled due to L1D bank conflicts with other load ports.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x24", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that hit L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x11", + "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", + "SampleAfterValue": "100003", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x24", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that miss L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x12", + "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", + "SampleAfterValue": "100003", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x24", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x21", + "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", + "SampleAfterValue": "100007", + "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x24", + "PEBS": "1", + "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K). (Precise Event - PEBS)", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache misses when fetching instructions.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x41", + "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", + "SampleAfterValue": "100003", + "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x24", + "PEBS": "1", + "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K). (Precise Event - PEBS)", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "L2_RQSTS.PF_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x42", + "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", + "SampleAfterValue": "100003", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x24", + "PEBS": "1", + "PublicDescription": "This event counts the number of load uops retired (Precise Event)", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "L2_RQSTS.PF_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x81", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "SampleAfterValue": "2000003", + "BriefDescription": "All retired load uops. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x27", + "PEBS": "1", + "PublicDescription": "This event counts the number of store uops retired. (Precise Event - PEBS)", + "EventCode": "0xD0", + "Counter": "0,1,2,3", + "UMask": "0x82", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "SampleAfterValue": "2000003", + "BriefDescription": "All retired store uops. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" + }, + { + "PEBS": "1", + "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "L2_STORE_LOCK_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFOs that miss cache lines.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", + "SampleAfterValue": "2000003", + "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x27", + "PEBS": "1", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_STORE_LOCK_RQSTS.HIT_E", - "SampleAfterValue": "200003", - "BriefDescription": "RFOs that hit cache lines in E state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x27", + "PEBS": "1", + "PublicDescription": "This event counts retired load uops that hit in the last-level (L3) cache without snoops required. (Precise Event - PEBS)", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "L2_STORE_LOCK_RQSTS.HIT_M", - "SampleAfterValue": "200003", - "BriefDescription": "RFOs that hit cache lines in M state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4", + "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT", + "SampleAfterValue": "50021", + "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x27", + "PEBS": "1", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0xf", - "EventName": "L2_STORE_LOCK_RQSTS.ALL", - "SampleAfterValue": "200003", - "BriefDescription": "RFOs that access cache lines in any state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40", + "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", + "SampleAfterValue": "100003", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x28", + "PEBS": "1", + "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "L2_L1D_WB_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.).", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS", + "SampleAfterValue": "20011", + "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x28", + "PEBS": "1", + "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package). Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line. In this case, a snoop was required, and another L2 had the line in a non-modified state. (Precise Event - PEBS)", + "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "L2_L1D_WB_RQSTS.HIT_S", - "SampleAfterValue": "200003", - "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in S state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT", + "SampleAfterValue": "20011", + "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x28", + "PEBS": "1", + "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package). Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line. In this case, a snoop was required, and another L2 had the line in a modified state, so the line had to be invalidated in that L2 cache and transferred to the requesting L2. (Precise Event - PEBS)", + "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x4", - "EventName": "L2_L1D_WB_RQSTS.HIT_E", - "SampleAfterValue": "200003", - "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM", + "SampleAfterValue": "20011", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x28", + "PEBS": "1", + "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "L2_L1D_WB_RQSTS.HIT_M", - "SampleAfterValue": "200003", - "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE", + "SampleAfterValue": "100003", + "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x28", + "PEBS": "1", + "PublicDescription": "This event counts retired demand loads that missed the last-level (L3) cache. This means that the load is usually satisfied from memory in a client system or possibly from the remote socket in a server. Demand loads are non speculative load uops. (Precise Event - PEBS)", + "EventCode": "0xD4", "Counter": "0,1,2,3", - "UMask": "0xf", - "EventName": "L2_L1D_WB_RQSTS.ALL", - "SampleAfterValue": "200003", - "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2", + "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS", + "SampleAfterValue": "100007", + "BriefDescription": "Retired load uops with unknown information as data source in cache serviced the load. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xF0", @@ -623,24 +728,6 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x2E", - "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "LONGEST_LAT_CACHE.MISS", - "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests missed LLC.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x2E", - "Counter": "0,1,2,3", - "UMask": "0x4f", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests that refer to LLC.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { "EventCode": "0xF4", "Counter": "0,1,2,3", "UMask": "0x10", @@ -650,93 +737,6 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "L2_RQSTS.ALL_RFO", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests to L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "SampleAfterValue": "200003", - "BriefDescription": "L2 code requests.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xc0", - "EventName": "L2_RQSTS.ALL_PF", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from L2 hardware prefetchers.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xBF", - "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "L1D_BLOCKS.BANK_CONFLICT_CYCLES", - "SampleAfterValue": "100003", - "BriefDescription": "Cycles when dispatched loads are cancelled due to L1D bank conflicts with other load ports.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_C6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0244", "Counter": "0,1,2,3", @@ -1825,7 +1825,7 @@ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = DATA_INTO_CORE and RESPONSE = ANY_RESPONSE", + "BriefDescription": "REQUEST = DATA_INTO_CORE and RESPONSE = ANY_RESPONSE", "CounterHTOff": "0,1,2,3" }, { @@ -1837,7 +1837,7 @@ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_M.HITM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_M and SNOOP = HITM", + "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_M and SNOOP = HITM", "CounterHTOff": "0,1,2,3" }, { @@ -1849,7 +1849,7 @@ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = PF_RFO and RESPONSE = ANY_RESPONSE", + "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_RESPONSE", "CounterHTOff": "0,1,2,3" }, { @@ -1861,7 +1861,7 @@ "EventName": "OFFCORE_RESPONSE.PF_L_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = PF_LLC_DATA_RD and RESPONSE = ANY_RESPONSE", + "BriefDescription": "REQUEST = PF_LLC_DATA_RD and RESPONSE = ANY_RESPONSE", "CounterHTOff": "0,1,2,3" }, { @@ -1873,7 +1873,7 @@ "EventName": "OFFCORE_RESPONSE.PF_L_IFETCH.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = PF_LLC_IFETCH and RESPONSE = ANY_RESPONSE", + "BriefDescription": "REQUEST = PF_LLC_IFETCH and RESPONSE = ANY_RESPONSE", "CounterHTOff": "0,1,2,3" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json b/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json index 982eda48785e..ce26537c7d47 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json @@ -1,68 +1,5 @@ [ { - "EventCode": "0xC1", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OTHER_ASSISTS.AVX_STORE", - "SampleAfterValue": "100003", - "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC1", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OTHER_ASSISTS.AVX_TO_SSE", - "SampleAfterValue": "100003", - "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC1", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "OTHER_ASSISTS.SSE_TO_AVX", - "SampleAfterValue": "100003", - "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xCA", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "FP_ASSIST.X87_OUTPUT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of X87 assists due to output value.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xCA", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "FP_ASSIST.X87_INPUT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of X87 assists due to input value.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xCA", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "FP_ASSIST.SIMD_OUTPUT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of SIMD FP assists due to Output values.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xCA", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "FP_ASSIST.SIMD_INPUT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of SIMD FP assists due to input values.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { "EventCode": "0x10", "Counter": "0,1,2,3", "UMask": "0x1", @@ -126,6 +63,69 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "EventCode": "0xC1", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "OTHER_ASSISTS.AVX_STORE", + "SampleAfterValue": "100003", + "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC1", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "OTHER_ASSISTS.AVX_TO_SSE", + "SampleAfterValue": "100003", + "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "OTHER_ASSISTS.SSE_TO_AVX", + "SampleAfterValue": "100003", + "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "FP_ASSIST.X87_OUTPUT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of X87 assists due to output value.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "FP_ASSIST.X87_INPUT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of X87 assists due to input value.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "FP_ASSIST.SIMD_OUTPUT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of SIMD FP assists due to Output values.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "FP_ASSIST.SIMD_INPUT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of SIMD FP assists due to input values.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x1e", diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json b/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json index 1b7b1dd36c68..e58ed14a204c 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json @@ -1,24 +1,5 @@ [ { - "EventCode": "0x80", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ICACHE.HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes unchacheable accesses.", - "EventCode": "0x80", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ICACHE.MISSES", - "SampleAfterValue": "200003", - "BriefDescription": "Instruction cache, streaming buffer and victim cache misses.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x2", @@ -39,159 +20,201 @@ { "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_UOPS", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance. See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more information.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops not delivered to the back-end per cycle, per thread, when the back-end was not stalled. In the ideal case 4 uops can be delivered each cycle. The event counts the undelivered uops - so if 3 were delivered in one cycle, the counter would be incremented by 1 for that cycle (4 - 3). If the back-end is stalled, the count for this event is not incremented even when uops were not delivered, because the back-end would not have been able to accept them. This event is used in determining the front-end bound category of the top-down pipeline slots characterization.", - "EventCode": "0x9C", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "UMask": "0x10", + "EdgeDetect": "1", + "EventName": "IDQ.MS_DSB_OCCUR", "SampleAfterValue": "2000003", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled .", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x9C", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "UMask": "0x18", + "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops.", "CounterMask": "4", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x9C", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "UMask": "0x18", + "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xAB", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "DSB2MITE_SWITCHES.COUNT", + "UMask": "0x20", + "EventName": "IDQ.MS_MITE_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches.", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the cycles attributed to a switch from the Decoded Stream Buffer (DSB), which holds decoded instructions, to the legacy decode pipeline. It excludes cycles when the back-end cannot accept new micro-ops. The penalty for these switches is potentially several cycles of instruction starvation, where no micro-ops are delivered to the back-end.", - "EventCode": "0xAB", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "UMask": "0x24", + "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", + "BriefDescription": "Cycles MITE is delivering 4 Uops.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xAC", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "DSB_FILL.OTHER_CANCEL", + "UMask": "0x24", + "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cases of cancelling valid DSB fill not because of exceeding way limit.", + "BriefDescription": "Cycles MITE is delivering any Uop.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xAC", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "DSB_FILL.EXCEED_DSB_LINES", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines.", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance. See the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual for more information.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_CYCLES", + "UMask": "0x3c", + "EventName": "IDQ.MITE_ALL_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", - "CounterMask": "1", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x79", + "EventCode": "0x80", "Counter": "0,1,2,3", - "UMask": "0x10", - "EdgeDetect": "1", - "EventName": "IDQ.MS_DSB_OCCUR", + "UMask": "0x1", + "EventName": "ICACHE.HIT", "SampleAfterValue": "2000003", - "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.", - "CounterMask": "1", + "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes unchacheable accesses.", + "EventCode": "0x80", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "ICACHE.MISSES", + "SampleAfterValue": "200003", + "BriefDescription": "Instruction cache, streaming buffer and victim cache misses.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts the number of uops not delivered to the back-end per cycle, per thread, when the back-end was not stalled. In the ideal case 4 uops can be delivered each cycle. The event counts the undelivered uops - so if 3 were delivered in one cycle, the counter would be incremented by 1 for that cycle (4 - 3). If the back-end is stalled, the count for this event is not incremented even when uops were not delivered, because the back-end would not have been able to accept them. This event is used in determining the front-end bound category of the top-down pipeline slots characterization.", + "EventCode": "0x9C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled .", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0x9C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0x9C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3" + }, + { "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -223,83 +246,60 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", + "EventCode": "0x9C", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "UMask": "0x1", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop.", + "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x79", + "EventCode": "0xAB", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", + "UMask": "0x1", + "EventName": "DSB2MITE_SWITCHES.COUNT", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering 4 Uops.", - "CounterMask": "4", + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x79", + "PublicDescription": "This event counts the cycles attributed to a switch from the Decoded Stream Buffer (DSB), which holds decoded instructions, to the legacy decode pipeline. It excludes cycles when the back-end cannot accept new micro-ops. The penalty for these switches is potentially several cycles of instruction starvation, where no micro-ops are delivered to the back-end.", + "EventCode": "0xAB", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", + "UMask": "0x2", + "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering any Uop.", - "CounterMask": "1", + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xAC", "Counter": "0,1,2,3", - "UMask": "0xa", - "EventName": "DSB_FILL.ALL_CANCEL", + "UMask": "0x2", + "EventName": "DSB_FILL.OTHER_CANCEL", "SampleAfterValue": "2000003", - "BriefDescription": "Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding way limit.", + "BriefDescription": "Cases of cancelling valid DSB fill not because of exceeding way limit.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x9C", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x79", + "EventCode": "0xAC", "Counter": "0,1,2,3", - "UMask": "0x3c", - "EventName": "IDQ.MITE_ALL_UOPS", + "UMask": "0x8", + "EventName": "DSB_FILL.EXCEED_DSB_LINES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.", + "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x79", + "EventCode": "0xAC", "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", + "UMask": "0xa", + "EventName": "DSB_FILL.ALL_CANCEL", "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "CounterMask": "1", + "BriefDescription": "Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding way limit.", "CounterHTOff": "0,1,2,3,4,5,6,7" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/memory.json b/tools/perf/pmu-events/arch/x86/sandybridge/memory.json index e6dfa89d00f3..78c1a987f9a2 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/memory.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/memory.json @@ -1,5 +1,32 @@ [ { + "EventCode": "0x05", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "MISALIGN_MEM_REF.LOADS", + "SampleAfterValue": "2000003", + "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x05", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "MISALIGN_MEM_REF.STORES", + "SampleAfterValue": "2000003", + "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xBE", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "PAGE_WALKS.LLC_MISS", + "SampleAfterValue": "100003", + "BriefDescription": "Number of any page walk that had a miss in LLC. Does not necessary cause a SUSPEND.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from memory disambiguation, external snoops, or cross SMT-HW-thread snoop (stores) hitting load buffers. Machine clears can have a significant performance impact if they are happening frequently.", "EventCode": "0xC3", "Counter": "0,1,2,3", @@ -126,33 +153,6 @@ "CounterHTOff": "3" }, { - "EventCode": "0xBE", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "PAGE_WALKS.LLC_MISS", - "SampleAfterValue": "100003", - "BriefDescription": "Number of any page walk that had a miss in LLC. Does not necessary cause a SUSPEND.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x05", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MISALIGN_MEM_REF.LOADS", - "SampleAfterValue": "2000003", - "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x05", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MISALIGN_MEM_REF.STORES", - "SampleAfterValue": "2000003", - "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { "EventCode": "0xB7, 0xBB", "MSRValue": "0x300400244", "Counter": "0,1,2,3", @@ -367,7 +367,7 @@ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_MISS_LOCAL.DRAM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = ANY_REQUEST and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", + "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", "CounterHTOff": "0,1,2,3" }, { @@ -379,7 +379,7 @@ "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS_LOCAL.ANY_LLC_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = DATA_IN_SOCKET and RESPONSE = LLC_MISS_LOCAL and SNOOP = ANY_LLC_HIT", + "BriefDescription": "REQUEST = DATA_IN_SOCKET and RESPONSE = LLC_MISS_LOCAL and SNOOP = ANY_LLC_HIT", "CounterHTOff": "0,1,2,3" }, { @@ -391,7 +391,7 @@ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_MISS_LOCAL.DRAM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = DEMAND_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", + "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", "CounterHTOff": "0,1,2,3" }, { @@ -403,7 +403,7 @@ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_MISS_LOCAL.DRAM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = PF_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", + "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", "CounterHTOff": "0,1,2,3" }, { @@ -415,7 +415,7 @@ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_MISS_LOCAL.DRAM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = PF_RFO and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", + "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", "CounterHTOff": "0,1,2,3" }, { @@ -427,7 +427,7 @@ "EventName": "OFFCORE_RESPONSE.PF_L_DATA_RD.LLC_MISS_LOCAL.DRAM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = PF_LLC_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", + "BriefDescription": "REQUEST = PF_LLC_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", "CounterHTOff": "0,1,2,3" }, { @@ -439,7 +439,7 @@ "EventName": "OFFCORE_RESPONSE.PF_L_IFETCH.LLC_MISS_LOCAL.DRAM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": " REQUEST = PF_LLC_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", + "BriefDescription": "REQUEST = PF_LLC_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM", "CounterHTOff": "0,1,2,3" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/other.json b/tools/perf/pmu-events/arch/x86/sandybridge/other.json index 64b195b82c50..874eb40a2e0f 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/other.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/other.json @@ -9,6 +9,15 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "EventCode": "0x4E", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "HW_PRE_REQ.DL1_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Hardware Prefetch requests that miss the L1D cache. This accounts for both L1 streamer and IP-based (IPP) HW prefetchers. A request is being counted each time it access the cache & miss it, including if a block is applicable or if hit the Fill Buffer for .", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "EventCode": "0x5C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -38,15 +47,6 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x4E", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "HW_PRE_REQ.DL1_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Hardware Prefetch requests that miss the L1D cache. This accounts for both L1 streamer and IP-based (IPP) HW prefetchers. A request is being counted each time it access the cache & miss it, including if a block is applicable or if hit the Fill Buffer for .", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { "EventCode": "0x63", "Counter": "0,1,2,3", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json b/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json index 34a519d9bfa0..b7150f65f16d 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json @@ -1,289 +1,307 @@ [ { - "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. ", - "EventCode": "0x00", - "Counter": "Fixed counter 1", + "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", + "Counter": "Fixed counter 2", + "UMask": "0x3", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "CounterHTOff": "Fixed counter 2" + }, + { + "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers.", + "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { - "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ", - "EventCode": "0x00", - "Counter": "Fixed counter 2", + "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" }, { - "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ", - "EventCode": "0x00", - "Counter": "Fixed counter 3", - "UMask": "0x3", - "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" - }, - { - "EventCode": "0x88", - "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL", - "SampleAfterValue": "200003", - "BriefDescription": "Not taken macro-conditional branches.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x88", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired macro-conditional branches.", + "UMask": "0x1", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "SampleAfterValue": "100003", + "BriefDescription": "Loads delayed due to SB blocks, preceding store operations with known addresses but unknown data.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store. See the table of not supported store forwards in the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.", + "UMask": "0x2", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "SampleAfterValue": "100003", + "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0x84", - "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.", + "UMask": "0x8", + "EventName": "LD_BLOCKS.NO_SR", + "SampleAfterValue": "100003", + "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0x88", - "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.", + "UMask": "0x10", + "EventName": "LD_BLOCKS.ALL_BLOCK", + "SampleAfterValue": "100003", + "BriefDescription": "Number of cases where any load ends up with a valid block-code written to the load buffer (including blocks due to Memory Order Buffer (MOB), Data Cache Unit (DCU), TLB, but load has no DCU miss).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K. This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline. The enhanced address check typically has a performance penalty of 5 cycles.", + "EventCode": "0x07", "Counter": "0,1,2,3", - "UMask": "0x90", - "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired direct near calls.", + "UMask": "0x1", + "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", + "SampleAfterValue": "100003", + "BriefDescription": "False dependencies in MOB due to partial compare.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "EventCode": "0x07", "Counter": "0,1,2,3", - "UMask": "0xa0", - "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired indirect calls.", + "UMask": "0x8", + "EventName": "LD_BLOCKS_PARTIAL.ALL_STA_BLOCK", + "SampleAfterValue": "100003", + "BriefDescription": "This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "EventCode": "0x0D", "Counter": "0,1,2,3", - "UMask": "0xc1", - "EventName": "BR_INST_EXEC.ALL_CONDITIONAL", - "SampleAfterValue": "200003", - "BriefDescription": "Speculative and retired macro-conditional branches.", + "UMask": "0x3", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "EventCode": "0x0D", "Counter": "0,1,2,3", - "UMask": "0xc2", - "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP", - "SampleAfterValue": "200003", - "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.", + "UMask": "0x3", + "EdgeDetect": "1", + "EventName": "INT_MISC.RECOVERY_STALLS_COUNT", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "EventCode": "0x0D", "Counter": "0,1,2,3", - "UMask": "0xc4", - "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET", - "SampleAfterValue": "200003", - "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.", + "UMask": "0x3", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "EventCode": "0x0D", "Counter": "0,1,2,3", - "UMask": "0xc8", - "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN", - "SampleAfterValue": "200003", - "BriefDescription": "Speculative and retired indirect return branches.", + "UMask": "0x40", + "EventName": "INT_MISC.RAT_STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "PublicDescription": "This event counts the number of Uops issued by the front-end of the pipeilne to the back-end.", + "EventCode": "0x0E", "Counter": "0,1,2,3", - "UMask": "0xd0", - "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Speculative and retired direct near calls.", + "UMask": "0x1", + "EventName": "UOPS_ISSUED.ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x89", + "EventCode": "0x0E", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL", - "SampleAfterValue": "200003", - "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x89", + "EventCode": "0x0E", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x89", + "EventCode": "0x14", "Counter": "0,1,2,3", - "UMask": "0x84", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.", + "UMask": "0x1", + "EventName": "ARITH.FPU_DIV_ACTIVE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when divider is busy executing divide operations.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x89", + "PublicDescription": "This event counts the number of the divide operations executed.", + "EventCode": "0x14", "Counter": "0,1,2,3", - "UMask": "0x88", - "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "ARITH.FPU_DIV", + "SampleAfterValue": "100003", + "BriefDescription": "Divide operations executed.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x89", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x90", - "EventName": "BR_MISP_EXEC.TAKEN_DIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired mispredicted direct near calls.", + "UMask": "0x0", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Thread cycles when thread is not in halt state.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x89", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0xa0", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x89", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0xc1", - "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL", - "SampleAfterValue": "200003", - "BriefDescription": "Speculative and retired mispredicted macro conditional branches.", + "UMask": "0x1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x89", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0xc4", - "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET", - "SampleAfterValue": "200003", - "BriefDescription": "Mispredicted indirect branches excluding calls and returns.", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x89", + "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0xd0", - "EventName": "BR_MISP_EXEC.ALL_DIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Speculative and retired mispredicted direct near calls.", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Thread cycles when thread is not in halt state.", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.UOPS", + "UMask": "0x2", + "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of Uops delivered by the LSD.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA8", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_ACTIVE", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "1", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x87", + "EventCode": "0x4C", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "ILD_STALL.LCP", - "SampleAfterValue": "2000003", - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "EventName": "LOAD_HIT_PRE.SW_PF", + "SampleAfterValue": "100003", + "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x87", + "EventCode": "0x4C", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ILD_STALL.IQ_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Stall cycles because IQ is full.", + "UMask": "0x2", + "EventName": "LOAD_HIT_PRE.HW_PF", + "SampleAfterValue": "100003", + "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", + "EventCode": "0x59", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "INT_MISC.RAT_STALL_CYCLES", + "UMask": "0x20", + "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread.", + "BriefDescription": "Increments the number of flags-merge uops in flight each cycle.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "This event counts the number of cycles spent executing performance-sensitive flags-merging uops. For example, shift CL (merge_arith_flags). For more details, See the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual.", "EventCode": "0x59", "Counter": "0,1,2,3", "UMask": "0x20", - "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP", + "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Increments the number of flags-merge uops in flight each cycle.", + "BriefDescription": "Performance sensitive flags-merging uops added by Sandy Bridge u-arch.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles with at least one slow LEA uop being allocated. A uop is generally considered as slow LEA if it has three sources (for example, two sources and immediate) regardless of whether it is a result of LEA instruction or not. Examples of the slow LEA uop are or uops with base, index, and offset source operands using base and index reqisters, where base is EBR/RBP/R13, using RIP relative or 16-bit addressing modes. See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more details about slow LEA instructions.", + "PublicDescription": "This event counts the number of cycles with at least one slow LEA uop being allocated. A uop is generally considered as slow LEA if it has three sources (for example, two sources and immediate) regardless of whether it is a result of LEA instruction or not. Examples of the slow LEA uop are or uops with base, index, and offset source operands using base and index reqisters, where base is EBR/RBP/R13, using RIP relative or 16-bit addressing modes. See the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual for more details about slow LEA instructions.", "EventCode": "0x59", "Counter": "0,1,2,3", "UMask": "0x40", @@ -302,48 +320,21 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RESOURCE_STALLS.ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Resource-related stall cycles.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "RESOURCE_STALLS.LB", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts the cycles of stall due to lack of load buffers.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA2", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "RESOURCE_STALLS.RS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA2", + "EventCode": "0x5B", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "RESOURCE_STALLS.SB", + "UMask": "0xc", + "EventName": "RESOURCE_STALLS2.ALL_FL_EMPTY", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles with either free list is empty.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0x5B", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "RESOURCE_STALLS.ROB", + "UMask": "0xf", + "EventName": "RESOURCE_STALLS2.ALL_PRF_CONTROL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "BriefDescription": "Resource stalls2 control structures full for physical registers.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -356,702 +347,663 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of Uops issued by the front-end of the pipeilne to the back-end.", - "EventCode": "0x0E", + "EventCode": "0x5B", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.ANY", + "UMask": "0x4f", + "EventName": "RESOURCE_STALLS2.OOO_RSRC", "SampleAfterValue": "2000003", - "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS).", + "BriefDescription": "Resource stalls out of order resources full.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0E", - "Invert": "1", + "EventCode": "0x5E", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_ISSUED.STALL_CYCLES", + "EventName": "RS_EVENTS.EMPTY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0E", + "EventCode": "0x5E", "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x5E", + "EventCode": "0x87", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "RS_EVENTS.EMPTY_CYCLES", + "EventName": "ILD_STALL.LCP", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.", + "BriefDescription": "Stalls caused by changing prefix length of the instruction.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xCC", + "EventCode": "0x87", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "UMask": "0x4", + "EventName": "ILD_STALL.IQ_FULL", "SampleAfterValue": "2000003", - "BriefDescription": "Count cases of saving new LBR.", + "BriefDescription": "Stall cycles because IQ is full.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear. Machine clears can have a significant performance impact if they are happening frequently.", - "EventCode": "0xC3", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MACHINE_CLEARS.SMC", - "SampleAfterValue": "100003", - "BriefDescription": "Self-modifying code (SMC) detected.", + "UMask": "0x41", + "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL", + "SampleAfterValue": "200003", + "BriefDescription": "Not taken macro-conditional branches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", - "EventCode": "0xC3", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "MACHINE_CLEARS.MASKMOV", - "SampleAfterValue": "100003", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "UMask": "0x81", + "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired macro-conditional branches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "INST_RETIRED.ANY_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of instructions retired. General Counter - architectural event.", + "UMask": "0x82", + "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts the number of micro-ops retired.", - "EventCode": "0xC2", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_RETIRED.ALL", - "SampleAfterValue": "2000003", - "BriefDescription": "Actually retired uops.", + "UMask": "0x84", + "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts the number of retirement slots used each cycle. There are potentially 4 slots that can be used each cycle - meaning, 4 micro-ops or 4 instructions could retire each cycle. This event is used in determining the 'Retiring' category of the Top-Down pipeline slots characterization.", - "EventCode": "0xC2", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used.", + "UMask": "0x88", + "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "Invert": "1", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_RETIRED.STALL_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x90", + "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired direct near calls.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "Invert": "1", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 10 actually retired uops.", - "CounterMask": "10", - "CounterHTOff": "0,1,2,3" + "UMask": "0xa0", + "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired indirect calls.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC4", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired.", + "UMask": "0xc1", + "EventName": "BR_INST_EXEC.ALL_CONDITIONAL", + "SampleAfterValue": "200003", + "BriefDescription": "Speculative and retired macro-conditional branches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC4", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired.", + "UMask": "0xc2", + "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP", + "SampleAfterValue": "200003", + "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC4", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", + "UMask": "0xc4", + "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET", + "SampleAfterValue": "200003", + "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC4", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired.", + "UMask": "0xc8", + "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN", + "SampleAfterValue": "200003", + "BriefDescription": "Speculative and retired indirect return branches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC4", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired.", + "UMask": "0xd0", + "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Speculative and retired direct near calls.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC4", + "EventCode": "0x88", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired.", + "UMask": "0xff", + "EventName": "BR_INST_EXEC.ALL_BRANCHES", + "SampleAfterValue": "200003", + "BriefDescription": "Speculative and retired branches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC4", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired.", + "UMask": "0x41", + "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL", + "SampleAfterValue": "200003", + "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "EventCode": "0xC4", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS).", - "CounterHTOff": "0,1,2,3" + "UMask": "0x81", + "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC5", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired.", + "UMask": "0x84", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC5", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "BR_MISP_RETIRED.NEAR_CALL", - "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect mispredicted near call instructions retired.", + "UMask": "0x88", + "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC5", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All mispredicted macro branch instructions retired.", + "UMask": "0x90", + "EventName": "BR_MISP_EXEC.TAKEN_DIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired mispredicted direct near calls.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC5", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "BR_MISP_RETIRED.NOT_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted not taken branch instructions retired.", + "UMask": "0xa0", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC5", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted taken branch instructions retired.", + "UMask": "0xc1", + "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL", + "SampleAfterValue": "200003", + "BriefDescription": "Speculative and retired mispredicted macro conditional branches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "PublicDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", - "EventCode": "0xC5", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS).", - "CounterHTOff": "0,1,2,3" + "UMask": "0xc4", + "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET", + "SampleAfterValue": "200003", + "BriefDescription": "Mispredicted indirect branches excluding calls and returns.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC1", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OTHER_ASSISTS.ITLB_MISS_RETIRED", - "SampleAfterValue": "100003", - "BriefDescription": "Retired instructions experiencing ITLB misses.", + "UMask": "0xd0", + "EventName": "BR_MISP_EXEC.ALL_DIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Speculative and retired mispredicted direct near calls.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x14", + "EventCode": "0x89", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ARITH.FPU_DIV_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when divider is busy executing divide operations.", + "UMask": "0xff", + "EventName": "BR_MISP_EXEC.ALL_BRANCHES", + "SampleAfterValue": "200003", + "BriefDescription": "Speculative and retired mispredicted macro conditional branches.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of the divide operations executed.", - "EventCode": "0x14", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "ARITH.FPU_DIV", - "SampleAfterValue": "100003", - "BriefDescription": "Divide operations executed.", - "CounterMask": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are dispatched to port 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_DISPATCHED.THREAD", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Uops dispatched per thread.", + "BriefDescription": "Cycles per core when uops are dispatched to port 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "UOPS_DISPATCHED.CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Uops dispatched from any thread.", + "BriefDescription": "Cycles per thread when uops are dispatched to port 1.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 0.", + "BriefDescription": "Cycles per core when uops are dispatched to port 1.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "UMask": "0xc", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 1.", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "UMask": "0xc", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 4.", + "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 2.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "UMask": "0x30", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 5.", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.CYCLES_NO_DISPATCH", + "UMask": "0x30", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Each cycle there was no dispatch for this thread, increment by 1. Note this is connect to Umask 2. No dispatch can be deduced from the UOPS_EXECUTED event.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Each cycle there was a miss-pending demand load this thread, increment by 1. Note this is in DCU and connected to Umask 1. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.", - "CounterMask": "2", - "CounterHTOff": "2" + "BriefDescription": "Cycles per thread when uops are dispatched to port 4.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", + "UMask": "0x40", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Each cycle there was a MLC-miss pending demand load this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0.", - "CounterMask": "1", + "BriefDescription": "Cycles per core when uops are dispatched to port 4.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", "SampleAfterValue": "2000003", - "BriefDescription": "Each cycle there was a miss-pending demand load this thread and no uops dispatched, increment by 1. Note this is in DCU and connected to Umask 1 and 2. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.", - "CounterMask": "6", - "CounterHTOff": "2" + "BriefDescription": "Cycles per thread when uops are dispatched to port 5.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", + "UMask": "0x80", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Each cycle there was a MLC-miss pending demand load and no uops dispatched on this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0 and 2.", - "CounterMask": "5", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per core when uops are dispatched to port 5.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x4C", + "EventCode": "0xA2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "LOAD_HIT_PRE.SW_PF", - "SampleAfterValue": "100003", - "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch.", + "EventName": "RESOURCE_STALLS.ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Resource-related stall cycles.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x4C", + "EventCode": "0xA2", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "LOAD_HIT_PRE.HW_PF", - "SampleAfterValue": "100003", - "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x03", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LD_BLOCKS.DATA_UNKNOWN", - "SampleAfterValue": "100003", - "BriefDescription": "Loads delayed due to SB blocks, preceding store operations with known addresses but unknown data.", + "EventName": "RESOURCE_STALLS.LB", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts the cycles of stall due to lack of load buffers.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.", - "EventCode": "0x03", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "LD_BLOCKS.STORE_FORWARD", - "SampleAfterValue": "100003", - "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding.", + "UMask": "0x4", + "EventName": "RESOURCE_STALLS.RS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x03", + "EventCode": "0xA2", "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "LD_BLOCKS.NO_SR", - "SampleAfterValue": "100003", - "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "EventName": "RESOURCE_STALLS.SB", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x03", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "LD_BLOCKS.ALL_BLOCK", - "SampleAfterValue": "100003", - "BriefDescription": "Number of cases where any load ends up with a valid block-code written to the load buffer (including blocks due to Memory Order Buffer (MOB), Data Cache Unit (DCU), TLB, but load has no DCU miss).", + "UMask": "0xa", + "EventName": "RESOURCE_STALLS.LB_SB", + "SampleAfterValue": "2000003", + "BriefDescription": "Resource stalls due to load or store buffers all being in use.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K. This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline. The enhanced address check typically has a performance penalty of 5 cycles.", - "EventCode": "0x07", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", - "SampleAfterValue": "100003", - "BriefDescription": "False dependencies in MOB due to partial compare.", + "UMask": "0xe", + "EventName": "RESOURCE_STALLS.MEM_RS", + "SampleAfterValue": "2000003", + "BriefDescription": "Resource stalls due to memory buffers or Reservation Station (RS) being fully utilized.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x07", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "LD_BLOCKS_PARTIAL.ALL_STA_BLOCK", - "SampleAfterValue": "100003", - "BriefDescription": "This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.", + "UMask": "0x10", + "EventName": "RESOURCE_STALLS.ROB", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles stalled due to re-order buffer full.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB6", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "AGU_BYPASS_CANCEL.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "This event counts executed load operations with all the following traits: 1. addressing of the format [base + offset], 2. the offset is between 1 and 2047, 3. the address specified in the base register is in one page and the address [base+offset] is in an.", + "UMask": "0xf0", + "EventName": "RESOURCE_STALLS.OOO_RSRC", + "SampleAfterValue": "2000003", + "BriefDescription": "Resource stalls due to Rob being full, FCSW, MXCSR and OTHER.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "EventCode": "0xA3", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", + "BriefDescription": "Each cycle there was a MLC-miss pending demand load this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "Counter": "0,1,2,3", + "EventCode": "0xA3", + "Counter": "2", "UMask": "0x2", - "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Each cycle there was a miss-pending demand load this thread, increment by 1. Note this is in DCU and connected to Umask 1. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.", + "CounterMask": "2", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", + "UMask": "0x4", + "EventName": "CYCLE_ACTIVITY.CYCLES_NO_DISPATCH", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 0.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Each cycle there was no dispatch for this thread, increment by 1. Note this is connect to Umask 2. No dispatch can be deduced from the UOPS_EXECUTED event.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 1.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Each cycle there was a MLC-miss pending demand load and no uops dispatched on this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0 and 2.", + "CounterMask": "5", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x40", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 4.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Each cycle there was a miss-pending demand load this thread and no uops dispatched, increment by 1. Note this is in DCU and connected to Umask 1 and 2. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.", + "CounterMask": "6", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x80", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", + "UMask": "0x1", + "EventName": "LSD.UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 5.", + "BriefDescription": "Number of Uops delivered by the LSD.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "UMask": "0x1", + "EventName": "LSD.CYCLES_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2.", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3.", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0xc", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", + "UMask": "0x1", + "EventName": "UOPS_DISPATCHED.THREAD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 2.", + "BriefDescription": "Uops dispatched per thread.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x30", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", + "UMask": "0x2", + "EventName": "UOPS_DISPATCHED.CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3.", + "BriefDescription": "Uops dispatched from any thread.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "EventCode": "0xC0", - "Counter": "1", - "UMask": "0x1", - "EventName": "INST_RETIRED.PREC_DIST", - "SampleAfterValue": "2000003", - "BriefDescription": "Instructions retired. (Precise Event - PEBS).", - "TakenAlone": "1", - "CounterHTOff": "1" - }, - { - "EventCode": "0x5B", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0xf", - "EventName": "RESOURCE_STALLS2.ALL_PRF_CONTROL", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Resource stalls2 control structures full for physical registers.", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x5B", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "RESOURCE_STALLS2.ALL_FL_EMPTY", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with either free list is empty.", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "CounterMask": "2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "RESOURCE_STALLS.MEM_RS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", "SampleAfterValue": "2000003", - "BriefDescription": "Resource stalls due to memory buffers or Reservation Station (RS) being fully utilized.", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "CounterMask": "3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0xf0", - "EventName": "RESOURCE_STALLS.OOO_RSRC", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", "SampleAfterValue": "2000003", - "BriefDescription": "Resource stalls due to Rob being full, FCSW, MXCSR and OTHER.", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x5B", + "EventCode": "0xB1", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x4f", - "EventName": "RESOURCE_STALLS2.OOO_RSRC", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "SampleAfterValue": "2000003", - "BriefDescription": "Resource stalls out of order resources full.", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xB6", "Counter": "0,1,2,3", - "UMask": "0xa", - "EventName": "RESOURCE_STALLS.LB_SB", - "SampleAfterValue": "2000003", - "BriefDescription": "Resource stalls due to load or store buffers all being in use.", + "UMask": "0x1", + "EventName": "AGU_BYPASS_CANCEL.COUNT", + "SampleAfterValue": "100003", + "BriefDescription": "This event counts executed load operations with all the following traits: 1. addressing of the format [base + offset], 2. the offset is between 1 and 2047, 3. the address specified in the base register is in one page and the address [base+offset] is in an.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", + "EventCode": "0xC0", "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "INT_MISC.RECOVERY_CYCLES", + "UMask": "0x0", + "EventName": "INST_RETIRED.ANY_P", "SampleAfterValue": "2000003", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).", - "CounterMask": "1", + "BriefDescription": "Number of instructions retired. General Counter - architectural event.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles spent executing performance-sensitive flags-merging uops. For example, shift CL (merge_arith_flags). For more details, See the Intel? 64 and IA-32 Architectures Optimization Reference Manual.", - "EventCode": "0x59", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES", + "PEBS": "2", + "EventCode": "0xC0", + "Counter": "1", + "UMask": "0x1", + "EventName": "INST_RETIRED.PREC_DIST", "SampleAfterValue": "2000003", - "BriefDescription": "Performance sensitive flags-merging uops added by Sandy Bridge u-arch.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Instructions retired. (Precise Event - PEBS).", + "TakenAlone": "1", + "CounterHTOff": "1" }, { - "EventCode": "0x0D", + "EventCode": "0xC1", "Counter": "0,1,2,3", - "UMask": "0x3", - "EdgeDetect": "1", - "EventName": "INT_MISC.RECOVERY_STALLS_COUNT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).", - "CounterMask": "1", + "UMask": "0x2", + "EventName": "OTHER_ASSISTS.ITLB_MISS_RETIRED", + "SampleAfterValue": "100003", + "BriefDescription": "Retired instructions experiencing ITLB misses.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xE6", + "PEBS": "1", + "PublicDescription": "This event counts the number of micro-ops retired. (Precise Event)", + "EventCode": "0xC2", "Counter": "0,1,2,3", - "UMask": "0x1f", - "EventName": "BACLEARS.ANY", - "SampleAfterValue": "100003", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "UMask": "0x1", + "EventName": "UOPS_RETIRED.ALL", + "SampleAfterValue": "2000003", + "BriefDescription": "Actually retired uops. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x88", + "EventCode": "0xC2", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0xff", - "EventName": "BR_INST_EXEC.ALL_BRANCHES", - "SampleAfterValue": "200003", - "BriefDescription": "Speculative and retired branches.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "UOPS_RETIRED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles without actually retired uops.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x89", + "EventCode": "0xC2", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0xff", - "EventName": "BR_MISP_EXEC.ALL_BRANCHES", - "SampleAfterValue": "200003", - "BriefDescription": "Speculative and retired mispredicted macro conditional branches.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with less than 10 actually retired uops.", + "CounterMask": "10", + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xC2", @@ -1065,13 +1017,14 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA8", + "PEBS": "1", + "PublicDescription": "This event counts the number of retirement slots used each cycle. There are potentially 4 slots that can be used each cycle - meaning, 4 micro-ops or 4 instructions could retire each cycle. This event is used in determining the 'Retiring' category of the Top-Down pipeline slots characterization. (Precise Event - PEBS)", + "EventCode": "0xC2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "4", + "BriefDescription": "Retirement slots used. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -1086,135 +1039,188 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x5E", - "Invert": "1", + "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear. Machine clears can have a significant performance impact if they are happening frequently.", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", + "UMask": "0x4", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "100003", + "BriefDescription": "Self-modifying code (SMC) detected.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "MACHINE_CLEARS.MASKMOV", + "SampleAfterValue": "100003", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "1", + "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", + "PEBS": "1", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x3", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "CounterMask": "1", + "UMask": "0x2", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "PEBS": "1", + "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CounterMask": "1", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "PEBS": "2", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CounterMask": "2", + "UMask": "0x4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" + }, + { + "PEBS": "1", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "SampleAfterValue": "100007", + "BriefDescription": "Return instructions retired. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CounterMask": "3", + "UMask": "0x10", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Not taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "PEBS": "1", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CounterMask": "4", + "UMask": "0x20", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "Invert": "1", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "UMask": "0x40", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "SampleAfterValue": "100007", + "BriefDescription": "Far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", - "EventCode": "0x3C", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", + "UMask": "0x0", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "1", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "1", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "EventName": "BR_MISP_RETIRED.NEAR_CALL", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect mispredicted near call instructions retired. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "2", + "PublicDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3" + }, + { + "PEBS": "1", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "BR_MISP_RETIRED.NOT_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted not taken branch instructions retired.(Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "1", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "BR_MISP_RETIRED.TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted taken branch instructions retired. (Precise Event - PEBS).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xCC", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "BriefDescription": "Count cases of saving new LBR.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xE6", + "Counter": "0,1,2,3", + "UMask": "0x1f", + "EventName": "BACLEARS.ANY", + "SampleAfterValue": "100003", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json index fd7d7c438226..cfeba5067bab 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json @@ -1,140 +1,226 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", - "MetricGroup": "Frontend", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS", + "MetricName": "FLOPc" + }, + { + "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS_SMT", + "MetricName": "FLOPc_SMT" + }, + { + "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Average CPU Utilization", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json b/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json index a654ab771fce..b8eccce5d75d 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json @@ -1,131 +1,131 @@ [ { - "EventCode": "0xAE", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ITLB.ITLB_FLUSH", - "SampleAfterValue": "100007", - "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x4F", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "EPT.WALK_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycle count for an Extended Page table walk. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", + "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", "SampleAfterValue": "100003", - "BriefDescription": "Misses at all ITLB levels that cause page walks.", + "BriefDescription": "Load misses in all DTLB levels that cause page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x85", + "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "ITLB_MISSES.WALK_COMPLETED", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", "SampleAfterValue": "100003", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", + "BriefDescription": "Load misses at all DTLB levels that cause completed page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event count cycles when Page Miss Handler (PMH) is servicing page walks caused by ITLB misses.", - "EventCode": "0x85", + "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB load misses.", + "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x4", - "EventName": "ITLB_MISSES.WALK_DURATION", + "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", "SampleAfterValue": "2000003", "BriefDescription": "Cycles when PMH is busy with page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x85", + "PublicDescription": "This event counts load operations that miss the first DTLB level but hit the second and do not cause any page walks. The penalty in this case is approximately 7 cycles.", + "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x10", - "EventName": "ITLB_MISSES.STLB_HIT", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x08", + "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", "SampleAfterValue": "100003", - "BriefDescription": "Load misses in all DTLB levels that cause page walks.", + "BriefDescription": "Store misses in all DTLB levels that cause page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x08", + "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", "SampleAfterValue": "100003", - "BriefDescription": "Load misses at all DTLB levels that cause completed page walks.", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB load misses.", - "EventCode": "0x08", + "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x4", - "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", + "EventName": "DTLB_STORE_MISSES.WALK_DURATION", "SampleAfterValue": "2000003", "BriefDescription": "Cycles when PMH is busy with page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts load operations that miss the first DTLB level but hit the second and do not cause any page walks. The penalty in this case is approximately 7 cycles.", - "EventCode": "0x08", + "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x10", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x49", + "EventCode": "0x4F", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "EPT.WALK_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycle count for an Extended Page table walk. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", + "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause page walks.", + "BriefDescription": "Misses at all ITLB levels that cause page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x49", + "EventCode": "0x85", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "EventName": "ITLB_MISSES.WALK_COMPLETED", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x49", + "PublicDescription": "This event count cycles when Page Miss Handler (PMH) is servicing page walks caused by ITLB misses.", + "EventCode": "0x85", "Counter": "0,1,2,3", "UMask": "0x4", - "EventName": "DTLB_STORE_MISSES.WALK_DURATION", + "EventName": "ITLB_MISSES.WALK_DURATION", "SampleAfterValue": "2000003", "BriefDescription": "Cycles when PMH is busy with page walks.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x49", + "EventCode": "0x85", "Counter": "0,1,2,3", "UMask": "0x10", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "EventName": "ITLB_MISSES.STLB_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xAE", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "ITLB.ITLB_FLUSH", + "SampleAfterValue": "100007", + "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { diff --git a/tools/perf/pmu-events/arch/x86/silvermont/cache.json b/tools/perf/pmu-events/arch/x86/silvermont/cache.json index 82be7d1b8b81..805ef1436539 100644 --- a/tools/perf/pmu-events/arch/x86/silvermont/cache.json +++ b/tools/perf/pmu-events/arch/x86/silvermont/cache.json @@ -36,7 +36,7 @@ "BriefDescription": "L2 cache request misses" }, { - "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.\r\nCounts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events. \r\n", + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.\r\nCounts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events.", "EventCode": "0x86", "Counter": "0,1", "UMask": "0x4", diff --git a/tools/perf/pmu-events/arch/x86/silvermont/other.json b/tools/perf/pmu-events/arch/x86/silvermont/other.json new file mode 100644 index 000000000000..47814046fa9d --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/silvermont/other.json @@ -0,0 +1,20 @@ +[ + { + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss. Note: this event is not the same as page walk cycles to retrieve an instruction translation.", + "EventCode": "0x86", + "Counter": "0,1", + "UMask": "0x2", + "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles code-fetch stalled due to an outstanding ITLB miss." + }, + { + "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events.", + "EventCode": "0x86", + "Counter": "0,1", + "UMask": "0x3f", + "EventName": "FETCH_STALL.ALL", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles code-fetch stalled due to any reason." + } +]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json b/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json index 7468af99190a..1ed62ad4cf77 100644 --- a/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json @@ -210,7 +210,7 @@ "UMask": "0x4", "EventName": "NO_ALLOC_CYCLES.MISPREDICTS", "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire. After the misprediction is detected, the front end will start immediately but the allocate pipe stalls until the mispredicted " + "BriefDescription": "Counts the number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire. After the misprediction is detected, the front end will start immediately but the allocate pipe stalls until the mispredicted" }, { "EventCode": "0xCA", @@ -275,7 +275,6 @@ }, { "PublicDescription": "This event counts the number of instructions that retire. For instructions that consist of multiple micro-ops, this event counts exactly once, as the last micro-op of the instruction retires. The event continues counting while instructions retire, including during interrupt service routines caused by hardware interrupts, faults or traps. Background: Modern microprocessors employ extensive pipelining and speculative techniques. Since sometimes an instruction is started but never completed, the notion of \"retirement\" is introduced. A retired instruction is one that commits its states. Or stated differently, an instruction might be abandoned at some point. No instruction is truly finished until it retires. This counter measures the number of completed instructions. The fixed event is INST_RETIRED.ANY and the programmable event is INST_RETIRED.ANY_P.", - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", @@ -284,7 +283,6 @@ }, { "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. In systems with a constant core frequency, this event can give you a measurement of the elapsed time while the core was not in halt state by dividing the event count by the core frequency. This event is architecturally defined and is a designated fixed counter. CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.CORE_P use the core frequency which may change from time to time. CPU_CLK_UNHALTE.REF_TSC and CPU_CLK_UNHALTED.REF are not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. The fixed events are CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.REF_TSC and the programmable events are CPU_CLK_UNHALTED.CORE_P and CPU_CLK_UNHALTED.REF.", - "EventCode": "0x00", "Counter": "Fixed counter 2", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.CORE", @@ -293,7 +291,6 @@ }, { "PublicDescription": "Counts the number of reference cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. Divide this event count by core frequency to determine the elapsed time while the core was not in halt state. Divide this event count by core frequency to determine the elapsed time while the core was not in halt state. This event is architecturally defined and is a designated fixed counter. CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.CORE_P use the core frequency which may change from time to time. CPU_CLK_UNHALTE.REF_TSC and CPU_CLK_UNHALTED.REF are not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. The fixed events are CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.REF_TSC and the programmable events are CPU_CLK_UNHALTED.CORE_P and CPU_CLK_UNHALTED.REF.", - "EventCode": "0x00", "Counter": "Fixed counter 3", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", diff --git a/tools/perf/pmu-events/arch/x86/skylake/cache.json b/tools/perf/pmu-events/arch/x86/skylake/cache.json index 54bfe9e4045c..720458139049 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylake/cache.json @@ -60,10 +60,10 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x41", + "UMask": "0xc1", "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", "SampleAfterValue": "200003", "BriefDescription": "Demand Data Read requests that hit L2 cache", @@ -73,7 +73,7 @@ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x42", + "UMask": "0xc2", "EventName": "L2_RQSTS.RFO_HIT", "SampleAfterValue": "200003", "BriefDescription": "RFO requests that hit L2 cache", @@ -83,7 +83,7 @@ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x44", + "UMask": "0xc4", "EventName": "L2_RQSTS.CODE_RD_HIT", "SampleAfterValue": "200003", "BriefDescription": "L2 cache hits when fetching instructions, code reads.", @@ -482,7 +482,7 @@ }, { "PEBS": "1", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", @@ -554,7 +554,7 @@ }, { "PEBS": "1", - "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x40", @@ -661,13 +661,13 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.", + "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", "EventCode": "0xF2", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "L2_LINES_OUT.USELESS_PREF", "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -690,249 +690,2238 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC01C8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10001C8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x04001C8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x02001C8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x01001C8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00801C8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00401C8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0108000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000108000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400108000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200108000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100108000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080108000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040108000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0088000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000088000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400088000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200088000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100088000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080088000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040088000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0048000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000048000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400048000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200048000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100048000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080048000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040048000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0028000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000028000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400028000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400001 ", + "MSRValue": "0x0200028000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100028000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080028000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040028000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests have any response type.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0000018000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests have any response type.", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC01C0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10001C0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x04001C0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x02001C0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x01001C0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00801C0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00401C0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0100004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000100004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400100004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200100004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100100004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080100004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040100004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0080004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000080004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400080004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200080004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100080004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080080004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040080004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0040004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000040004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400040004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200040004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100040004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080040004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040040004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0020004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000020004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400020004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200020004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100020004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080020004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040020004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0000010004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC01C0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10001C0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x04001C0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x02001C0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x01001C0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00801C0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00401C0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0100002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000100002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400100002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200100002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100100002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080100002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040100002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0080002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000080002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400080002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200080002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100080002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080080002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040080002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0040002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000040002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400040002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200040002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100040002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080040002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040040002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0020002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000020002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400020002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200020002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100020002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080020002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040020002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs) have any response type.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0000010002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs) have any response type.", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400001 ", + "MSRValue": "0x1000400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400001 ", + "MSRValue": "0x0400400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400001 ", + "MSRValue": "0x0200400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400001 ", + "MSRValue": "0x0100400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400001 ", + "MSRValue": "0x0080400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040400001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0001 ", + "MSRValue": "0x3FC01C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0001 ", + "MSRValue": "0x10001C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0001 ", + "MSRValue": "0x04001C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0001 ", + "MSRValue": "0x02001C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0001 ", + "MSRValue": "0x01001C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0001 ", + "MSRValue": "0x00801C0001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00401C0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0100001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000100001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400100001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200100001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100100001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020001 ", + "MSRValue": "0x0080100001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040100001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0080001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000080001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400080001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200080001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100080001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080080001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040080001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0040001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1000040001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0400040001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0200040001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0100040001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0080040001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040040001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC0020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020001 ", + "MSRValue": "0x1000020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HITM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020001 ", + "MSRValue": "0x0400020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020001 ", + "MSRValue": "0x0200020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020001 ", + "MSRValue": "0x0100020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020001 ", + "MSRValue": "0x0080020001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0040020001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads have any response type.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010001 ", + "MSRValue": "0x0000010001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that have any response type.", + "BriefDescription": "Counts demand data reads have any response type.", "Offcore": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json index 578dff5bd823..7fa95a35e3ca 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json @@ -177,7 +177,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -242,7 +242,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", "EventCode": "0xAB", "Counter": "0,1,2,3", "UMask": "0x2", @@ -253,7 +253,7 @@ }, { "PEBS": "1", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", "EventCode": "0xC6", "MSRValue": "0x11", "Counter": "0,1,2,3", @@ -360,7 +360,7 @@ }, { "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", "EventCode": "0xC6", "MSRValue": "0x400806", "Counter": "0,1,2,3", @@ -374,7 +374,7 @@ }, { "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", "EventCode": "0xC6", "MSRValue": "0x401006", "Counter": "0,1,2,3", @@ -388,7 +388,7 @@ }, { "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", "EventCode": "0xC6", "MSRValue": "0x402006", "Counter": "0,1,2,3", @@ -454,7 +454,7 @@ }, { "PEBS": "1", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", "EventCode": "0xC6", "MSRValue": "0x100206", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/skylake/memory.json b/tools/perf/pmu-events/arch/x86/skylake/memory.json index 3bd8b712c889..f197b4c7695b 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylake/memory.json @@ -215,7 +215,7 @@ "UMask": "0x4", "EventName": "HLE_RETIRED.ABORTED", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one). ", + "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -237,6 +237,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "EventCode": "0xC8", "Counter": "0,1,2,3", "UMask": "0x20", @@ -292,7 +293,7 @@ "UMask": "0x4", "EventName": "RTM_RETIRED.ABORTED", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one). ", + "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -346,7 +347,7 @@ }, { "PEBS": "2", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", "EventCode": "0xCD", "MSRValue": "0x4", "Counter": "0,1,2,3", @@ -354,13 +355,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "SampleAfterValue": "100003", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "2", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", "EventCode": "0xCD", "MSRValue": "0x8", "Counter": "0,1,2,3", @@ -368,13 +369,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "SampleAfterValue": "50021", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "2", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", "EventCode": "0xCD", "MSRValue": "0x10", "Counter": "0,1,2,3", @@ -382,13 +383,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "SampleAfterValue": "20011", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "2", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", "EventCode": "0xCD", "MSRValue": "0x20", "Counter": "0,1,2,3", @@ -396,13 +397,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "SampleAfterValue": "100007", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "2", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", "EventCode": "0xCD", "MSRValue": "0x40", "Counter": "0,1,2,3", @@ -410,13 +411,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "SampleAfterValue": "2003", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "2", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", "EventCode": "0xCD", "MSRValue": "0x80", "Counter": "0,1,2,3", @@ -424,13 +425,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "SampleAfterValue": "1009", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "2", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", "EventCode": "0xCD", "MSRValue": "0x100", "Counter": "0,1,2,3", @@ -438,13 +439,13 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "SampleAfterValue": "503", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "2", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", "EventCode": "0xCD", "MSRValue": "0x200", "Counter": "0,1,2,3", @@ -452,163 +453,1151 @@ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "SampleAfterValue": "101", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts any other requests", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000001 ", + "MSRValue": "0x3FFC408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x203C408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x103C408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x043C408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x023C408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x013C408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00BC408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x007C408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC4008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2004008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1004008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0404008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0204008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0104008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0084008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0044008000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000408000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x20001C8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000108000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000088000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000048000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts any other requests", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000028000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts any other requests", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FFC400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x203C400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x103C400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x043C400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x023C400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x013C400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00BC400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x007C400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC4000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2004000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1004000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0404000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0204000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0104000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0084000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0044000004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x20001C0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000100004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000080004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000040004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000020004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FFC400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x203C400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x103C400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x043C400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x023C400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x013C400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00BC400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x007C400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC4000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2004000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1004000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0404000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0204000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0104000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0084000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0044000002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000400002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x20001C0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000100002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000080002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000040002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts all demand data writes (RFOs)", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000020002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs)", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FFC400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x203C400001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000001 ", + "MSRValue": "0x103C400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HITM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000001 ", + "MSRValue": "0x043C400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000001 ", + "MSRValue": "0x023C400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000001 ", + "MSRValue": "0x013C400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000001 ", + "MSRValue": "0x00BC400001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000001 ", + "MSRValue": "0x007C400001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3FC4000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2004000001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000001 ", + "MSRValue": "0x1004000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000001 ", + "MSRValue": "0x0404000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000001 ", + "MSRValue": "0x0204000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000001 ", + "MSRValue": "0x0104000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000001 ", + "MSRValue": "0x0084000001", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x0044000001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000400001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x20001C0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000100001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000080001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000040001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Counts demand data reads", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2000020001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", + "MSRIndex": "0x1a6, 0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "Counts demand data reads", "Offcore": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json index bc6d2afbcd8a..4a891fbbc4bb 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json @@ -1,7 +1,6 @@ [ { "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", - "EventCode": "0x00", "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", @@ -11,7 +10,6 @@ }, { "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", @@ -20,7 +18,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "Counter": "Fixed counter 1", "UMask": "0x2", "AnyThread": "1", @@ -31,7 +28,6 @@ }, { "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", - "EventCode": "0x00", "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", @@ -121,7 +117,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.", "EventCode": "0x0E", "Counter": "0,1,2,3", "UMask": "0x2", @@ -248,6 +244,16 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", + "EventCode": "0x59", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", "EventCode": "0x5E", "Counter": "0,1,2,3", @@ -361,8 +367,8 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", - "EventCode": "0xA2", + "PublicDescription": "Counts resource-related stall cycles.", + "EventCode": "0xa2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "RESOURCE_STALLS.ANY", @@ -735,7 +741,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", + "PublicDescription": "This event counts cycles without actually retired uops.", "EventCode": "0xC2", "Invert": "1", "Counter": "0,1,2,3", @@ -759,6 +765,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Number of machine clears (nukes) of any type.", "EventCode": "0xC3", "Counter": "0,1,2,3", "UMask": "0x1", @@ -839,14 +846,15 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x10", "Errata": "SKL091", "EventName": "BR_INST_RETIRED.NOT_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired.", + "BriefDescription": "Counts all not taken macro branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -924,7 +932,7 @@ "UMask": "0x20", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -938,6 +946,15 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "EventCode": "0xCC", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "ROB_MISC_EVENTS.PAUSE_INST", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", "EventCode": "0xE6", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index 71e9737f4614..2c95417a4dae 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -1,164 +1,364 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", - "MetricGroup": "Frontend", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", + "MetricGroup": "Branches;PGO", + "MetricName": "IpTB" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTB" + }, + { + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ))", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means loads are more frequent)", + "MetricGroup": "Instruction_Type;L1_Bound", + "MetricName": "IpL" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store", + "MetricGroup": "Instruction_Type;Store_Bound", + "MetricName": "IpS" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch", + "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6", + "MetricName": "IpB" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instruction per (near) call", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS", + "MetricName": "FLOPc" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS_SMT", + "MetricName": "FLOPc_SMT" + }, + { + "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)", - "MetricGroup": "Unknown_Branches", - "MetricName": "BAClear_Cost" + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "Branch_Misprediction_Cost" }, { + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts_SMT", + "MetricName": "Branch_Misprediction_Cost_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "IpMispredict" + }, + { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, { - "BriefDescription": "Average CPU Utilization", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricGroup": "TLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, + { + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Access_BW" + }, + { + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI_All" + }, + { + "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2HPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { + "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,thresh\\=1@", + "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_Parallel_Reads" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json index 5c9940866acd..24df183693fa 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json @@ -61,17 +61,17 @@ }, { "EventCode": "0x24", - "UMask": "0x41", + "UMask": "0xc1", "BriefDescription": "Demand Data Read requests that hit L2 cache", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x24", - "UMask": "0x42", + "UMask": "0xc2", "BriefDescription": "RFO requests that hit L2 cache", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.RFO_HIT", @@ -81,7 +81,7 @@ }, { "EventCode": "0x24", - "UMask": "0x44", + "UMask": "0xc4", "BriefDescription": "L2 cache hits when fetching instructions, code reads.", "Counter": "0,1,2,3", "EventName": "L2_RQSTS.CODE_RD_HIT", @@ -165,6 +165,7 @@ "BriefDescription": "Core-originated cacheable demand requests missed L3", "Counter": "0,1,2,3", "EventName": "LONGEST_LAT_CACHE.MISS", + "Errata": "SKL057", "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" @@ -175,28 +176,29 @@ "BriefDescription": "Core-originated cacheable demand requests that refer to L3", "Counter": "0,1,2,3", "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", + "Errata": "SKL057", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x48", "UMask": "0x1", - "BriefDescription": "L1D miss outstandings duration in cycles", + "BriefDescription": "Cycles with L1D load Misses outstanding.", "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.PENDING", - "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x48", "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding.", + "BriefDescription": "L1D miss outstandings duration in cycles", "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "EventName": "L1D_PEND_MISS.PENDING", + "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -234,21 +236,21 @@ { "EventCode": "0x60", "UMask": "0x1", - "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", - "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "CounterMask": "1", + "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", "UMask": "0x1", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "CounterMask": "1", - "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -307,21 +309,21 @@ { "EventCode": "0x60", "UMask": "0x8", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "CounterMask": "1", + "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", "UMask": "0x8", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "CounterMask": "1", - "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -486,7 +488,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -558,7 +560,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -690,6 +692,7 @@ "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.", "Counter": "0,1,2,3", "EventName": "L2_LINES_OUT.SILENT", + "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -699,17 +702,18 @@ "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped", "Counter": "0,1,2,3", "EventName": "L2_LINES_OUT.NON_SILENT", - "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped.", + "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xF2", "UMask": "0x4", - "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", + "Deprecated": "1", "Counter": "0,1,2,3", "EventName": "L2_LINES_OUT.USELESS_PREF", - "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.", + "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -736,12 +740,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that have any response type.", - "MSRValue": "0x0000010001 ", + "BriefDescription": "Counts demand data reads have any response type.", + "MSRValue": "0x0000010001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -749,12 +753,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0001 ", + "BriefDescription": "Counts demand data reads TBD TBD", + "MSRValue": "0x01003C0001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -762,25 +766,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0001 ", + "BriefDescription": "Counts demand data reads TBD TBD", + "MSRValue": "0x04003C0001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0001 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -788,12 +779,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0001 ", + "BriefDescription": "Counts demand data reads TBD TBD", + "MSRValue": "0x10003C0001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -801,12 +792,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that hit in the L3.", - "MSRValue": "0x3f803c0001 ", + "BriefDescription": "Counts demand data reads TBD TBD", + "MSRValue": "0x3F803C0001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -814,12 +805,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.", - "MSRValue": "0x0000010002 ", + "BriefDescription": "Counts all demand data writes (RFOs) have any response type.", + "MSRValue": "0x0000010002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -827,12 +818,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", + "MSRValue": "0x01003C0002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -840,12 +831,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", + "MSRValue": "0x04003C0002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -853,25 +844,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0002 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", + "MSRValue": "0x10003C0002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -879,12 +857,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.", - "MSRValue": "0x3f803c0002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", + "MSRValue": "0x3F803C0002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -892,12 +870,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that have any response type.", - "MSRValue": "0x0000010004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", + "MSRValue": "0x0000010004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -905,12 +883,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", + "MSRValue": "0x01003C0004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -918,12 +896,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", + "MSRValue": "0x04003C0004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -931,25 +909,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0004 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", + "MSRValue": "0x10003C0004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -957,12 +922,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that hit in the L3.", - "MSRValue": "0x3f803c0004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", + "MSRValue": "0x3F803C0004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -970,12 +935,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.", - "MSRValue": "0x0000010010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.", + "MSRValue": "0x0000010010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -983,12 +948,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", + "MSRValue": "0x01003C0010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -996,12 +961,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", + "MSRValue": "0x04003C0010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1009,25 +974,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "PF_L2_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0010 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", + "MSRValue": "0x10003C0010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1035,12 +987,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.", - "MSRValue": "0x3f803c0010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", + "MSRValue": "0x3F803C0010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1048,12 +1000,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.", - "MSRValue": "0x0000010020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.", + "MSRValue": "0x0000010020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1061,12 +1013,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", + "MSRValue": "0x01003C0020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1074,12 +1026,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", + "MSRValue": "0x04003C0020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1087,25 +1039,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "PF_L2_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0020 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", + "MSRValue": "0x10003C0020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1113,12 +1052,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.", - "MSRValue": "0x3f803c0020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", + "MSRValue": "0x3F803C0020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1126,12 +1065,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.", - "MSRValue": "0x0000010080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.", + "MSRValue": "0x0000010080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1139,12 +1078,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", + "MSRValue": "0x01003C0080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1152,25 +1091,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", + "MSRValue": "0x04003C0080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0080 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1178,12 +1104,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", + "MSRValue": "0x10003C0080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1191,12 +1117,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.", - "MSRValue": "0x3f803c0080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", + "MSRValue": "0x3F803C0080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1204,12 +1130,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.", - "MSRValue": "0x0000010100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.", + "MSRValue": "0x0000010100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1217,12 +1143,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "MSRValue": "0x01003C0100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1230,12 +1156,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "MSRValue": "0x04003C0100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1243,12 +1169,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "MSRValue": "0x10003C0100", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1256,12 +1182,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "MSRValue": "0x3F803C0100", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1269,12 +1195,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.", - "MSRValue": "0x3f803c0100 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.", + "MSRValue": "0x0000010400", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1282,12 +1208,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.", - "MSRValue": "0x0000010400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", + "MSRValue": "0x01003C0400", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1295,12 +1221,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", + "MSRValue": "0x04003C0400", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1308,12 +1234,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", + "MSRValue": "0x10003C0400", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1321,12 +1247,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "PF_L1D_AND_SW & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", + "MSRValue": "0x3F803C0400", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1334,12 +1260,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0400 ", + "BriefDescription": "TBD have any response type.", + "MSRValue": "0x0000010490", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1347,12 +1273,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.", - "MSRValue": "0x3f803c0400 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x01003C0490", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1360,12 +1286,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts any other requests that have any response type.", - "MSRValue": "0x0000018000 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x04003C0490", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1373,12 +1299,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c8000 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x10003C0490", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1386,12 +1312,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c8000 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x3F803C0490", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1399,12 +1325,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "OTHER & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c8000 ", + "BriefDescription": "TBD have any response type.", + "MSRValue": "0x0000010120", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1412,12 +1338,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c8000 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x01003C0120", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1425,12 +1351,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts any other requests that hit in the L3.", - "MSRValue": "0x3f803c8000 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x04003C0120", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1438,12 +1364,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that have any response type.", - "MSRValue": "0x0000010490 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x10003C0120", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1451,12 +1377,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0490 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x3F803C0120", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1464,12 +1390,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0490 ", + "BriefDescription": "TBD have any response type.", + "MSRValue": "0x0000010491", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1477,12 +1403,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0490 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x01003C0491", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1490,12 +1416,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0490 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x04003C0491", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1503,12 +1429,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that hit in the L3.", - "MSRValue": "0x3f803c0490 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x10003C0491", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1516,12 +1442,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that have any response type.", - "MSRValue": "0x0000010120 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x3F803C0491", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1529,12 +1455,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0120 ", + "BriefDescription": "TBD have any response type.", + "MSRValue": "0x0000010122", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD have any response type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1542,12 +1468,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0120 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x01003C0122", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1555,12 +1481,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0120 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x04003C0122", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1568,12 +1494,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0120 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x10003C0122", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1581,12 +1507,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that hit in the L3.", - "MSRValue": "0x3f803c0120 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x3F803C0122", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1594,12 +1520,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that have any response type.", - "MSRValue": "0x0000010491 ", + "BriefDescription": "Counts demand data reads", + "MSRValue": "0x08007C0001", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "Counts demand data reads", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1607,12 +1532,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0491 ", + "BriefDescription": "Counts all demand data writes (RFOs)", + "MSRValue": "0x08007C0002", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "Counts all demand data writes (RFOs)", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1620,12 +1544,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0491 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", + "MSRValue": "0x08007C0004", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1633,12 +1556,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0491 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads", + "MSRValue": "0x08007C0010", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1646,12 +1568,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0491 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", + "MSRValue": "0x08007C0020", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1659,12 +1580,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.", - "MSRValue": "0x3f803c0491 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", + "MSRValue": "0x08007C0080", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1672,12 +1592,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.", - "MSRValue": "0x0000010122 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", + "MSRValue": "0x08007C0100", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1685,12 +1604,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "MSRValue": "0x01003c0122 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests", + "MSRValue": "0x08007C0400", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1698,12 +1616,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x04003c0122 ", + "BriefDescription": "TBD", + "MSRValue": "0x08007C0490", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1711,12 +1628,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", - "MSRValue": "0x08003c0122 ", + "BriefDescription": "TBD", + "MSRValue": "0x08007C0120", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1724,12 +1640,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", - "MSRValue": "0x10003c0122 ", + "BriefDescription": "TBD", + "MSRValue": "0x08007C0491", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1737,12 +1652,11 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.", - "MSRValue": "0x3f803c0122 ", + "BriefDescription": "TBD", + "MSRValue": "0x08007C0122", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "PublicDescription": "TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json index 286ed1a37ec9..c5d0babe89fc 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json @@ -59,7 +59,6 @@ "BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE", - "PublicDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -69,7 +68,6 @@ "BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE", - "PublicDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json index 403a4f89e9b2..4dc583cfb545 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json @@ -2,16 +2,6 @@ { "EventCode": "0x79", "UMask": "0x4", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", - "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_UOPS", - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x4", "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", "EventName": "IDQ.MITE_CYCLES", @@ -22,11 +12,11 @@ }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", + "UMask": "0x4", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_UOPS", - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "EventName": "IDQ.MITE_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -43,6 +33,16 @@ }, { "EventCode": "0x79", + "UMask": "0x8", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", + "Counter": "0,1,2,3", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", "UMask": "0x10", "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", @@ -55,22 +55,22 @@ { "EventCode": "0x79", "UMask": "0x18", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", - "CounterMask": "4", - "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "CounterMask": "1", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", "UMask": "0x18", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", - "CounterMask": "1", - "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", + "CounterMask": "4", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -87,22 +87,22 @@ { "EventCode": "0x79", "UMask": "0x24", - "BriefDescription": "Cycles MITE is delivering 4 Uops", + "BriefDescription": "Cycles MITE is delivering any Uop", "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", - "CounterMask": "4", - "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", + "CounterMask": "1", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", "UMask": "0x24", - "BriefDescription": "Cycles MITE is delivering any Uop", + "BriefDescription": "Cycles MITE is delivering 4 Uops", "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", - "CounterMask": "1", - "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", + "CounterMask": "4", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -118,24 +118,24 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EdgeDetect": "1", "EventCode": "0x79", "UMask": "0x30", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_SWITCHES", - "CounterMask": "1", - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "EdgeDetect": "1", "EventCode": "0x79", "UMask": "0x30", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_UOPS", - "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", + "EventName": "IDQ.MS_SWITCHES", + "CounterMask": "1", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -177,67 +177,67 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "Invert": "1", "EventCode": "0x9C", "UMask": "0x1", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", + "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x9C", "UMask": "0x1", - "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", + "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", - "CounterMask": "4", - "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", + "CounterMask": "1", + "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x9C", "UMask": "0x1", - "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", + "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", - "CounterMask": "3", - "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", + "CounterMask": "2", + "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x9C", "UMask": "0x1", - "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", + "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", - "CounterMask": "2", - "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "CounterMask": "3", + "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x9C", "UMask": "0x1", - "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", + "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", - "CounterMask": "1", - "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "CounterMask": "4", + "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", "EventCode": "0x9C", "UMask": "0x1", - "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", - "CounterMask": "1", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -247,20 +247,19 @@ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x11", + "MSRValue": "0x400406", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.DSB_MISS", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -268,11 +267,11 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x12", + "MSRValue": "0x200206", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.L1I_MISS", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", "MSRIndex": "0x3F7", "TakenAlone": "1", "SampleAfterValue": "100007", @@ -281,11 +280,11 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x13", + "MSRValue": "0x400206", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.L2_MISS", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", "MSRIndex": "0x3F7", "TakenAlone": "1", "SampleAfterValue": "100007", @@ -294,13 +293,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", "PEBS": "1", - "MSRValue": "0x14", + "MSRValue": "0x15", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "EventName": "FRONTEND_RETIRED.STLB_MISS", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -308,13 +307,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", + "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", "PEBS": "1", - "MSRValue": "0x15", + "MSRValue": "0x14", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.STLB_MISS", + "EventName": "FRONTEND_RETIRED.ITLB_MISS", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -322,11 +321,11 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", "PEBS": "1", - "MSRValue": "0x400206", + "MSRValue": "0x13", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", + "EventName": "FRONTEND_RETIRED.L2_MISS", "MSRIndex": "0x3F7", "TakenAlone": "1", "SampleAfterValue": "100007", @@ -335,11 +334,11 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", "PEBS": "1", - "MSRValue": "0x200206", + "MSRValue": "0x12", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", + "EventName": "FRONTEND_RETIRED.L1I_MISS", "MSRIndex": "0x3F7", "TakenAlone": "1", "SampleAfterValue": "100007", @@ -348,12 +347,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", "PEBS": "1", - "MSRValue": "0x400406", + "MSRValue": "0x11", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", + "EventName": "FRONTEND_RETIRED.DSB_MISS", "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -361,13 +361,12 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x400806", + "MSRValue": "0x300206", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -375,13 +374,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x401006", + "MSRValue": "0x100206", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -389,13 +388,12 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x402006", + "MSRValue": "0x420006", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -403,11 +401,11 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x404006", + "MSRValue": "0x410006", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", "MSRIndex": "0x3F7", "TakenAlone": "1", "SampleAfterValue": "100007", @@ -429,11 +427,11 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x410006", + "MSRValue": "0x404006", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", "MSRIndex": "0x3F7", "TakenAlone": "1", "SampleAfterValue": "100007", @@ -442,12 +440,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x420006", + "MSRValue": "0x402006", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -455,13 +454,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", - "MSRValue": "0x100206", + "MSRValue": "0x401006", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -469,12 +468,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", "PEBS": "1", - "MSRValue": "0x300206", + "MSRValue": "0x400806", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json index e7f1aa31226d..48a9cdf81307 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json @@ -129,20 +129,20 @@ { "EventCode": "0x60", "UMask": "0x10", - "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", + "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", - "CounterMask": "1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", + "CounterMask": "6", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", "UMask": "0x10", - "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", + "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", - "CounterMask": "6", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -210,7 +210,7 @@ { "EventCode": "0xC8", "UMask": "0x4", - "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one). ", + "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED", @@ -242,6 +242,7 @@ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", + "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -287,7 +288,7 @@ { "EventCode": "0xC9", "UMask": "0x4", - "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one). ", + "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "RTM_RETIRED.ABORTED", @@ -347,125 +348,125 @@ { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", "PEBS": "2", - "MSRValue": "0x4", + "MSRValue": "0x200", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", "TakenAlone": "1", - "SampleAfterValue": "100003", + "SampleAfterValue": "101", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", "PEBS": "2", - "MSRValue": "0x8", + "MSRValue": "0x100", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", "TakenAlone": "1", - "SampleAfterValue": "50021", + "SampleAfterValue": "503", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "PEBS": "2", - "MSRValue": "0x10", + "MSRValue": "0x80", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", "TakenAlone": "1", - "SampleAfterValue": "20011", + "SampleAfterValue": "1009", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", "PEBS": "2", - "MSRValue": "0x20", + "MSRValue": "0x40", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", "TakenAlone": "1", - "SampleAfterValue": "100007", + "SampleAfterValue": "2003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", "PEBS": "2", - "MSRValue": "0x40", + "MSRValue": "0x20", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", "TakenAlone": "1", - "SampleAfterValue": "2003", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", "PEBS": "2", - "MSRValue": "0x80", + "MSRValue": "0x10", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", "TakenAlone": "1", - "SampleAfterValue": "1009", + "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", "PEBS": "2", - "MSRValue": "0x100", + "MSRValue": "0x8", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", "TakenAlone": "1", - "SampleAfterValue": "503", + "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xCD", "UMask": "0x1", - "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", "PEBS": "2", - "MSRValue": "0x200", + "MSRValue": "0x4", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", - "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", "TakenAlone": "1", - "SampleAfterValue": "101", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that miss in the L3.", - "MSRValue": "0x3fbc000001 ", + "BriefDescription": "Counts demand data reads TBD TBD", + "MSRValue": "0x3FBC000001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -473,12 +474,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00001 ", + "BriefDescription": "Counts demand data reads TBD", + "MSRValue": "0x083FC00001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -486,12 +487,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00001 ", + "BriefDescription": "Counts demand data reads TBD", + "MSRValue": "0x103FC00001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -499,12 +500,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00001 ", + "BriefDescription": "Counts demand data reads TBD", + "MSRValue": "0x063FC00001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -512,12 +513,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800001 ", + "BriefDescription": "Counts demand data reads TBD", + "MSRValue": "0x063B800001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -525,12 +526,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000001 ", + "BriefDescription": "Counts demand data reads TBD", + "MSRValue": "0x0604000001", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -538,12 +539,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.", - "MSRValue": "0x3fbc000002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", + "MSRValue": "0x3FBC000002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -551,12 +552,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD", + "MSRValue": "0x083FC00002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -564,12 +565,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD", + "MSRValue": "0x103FC00002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -577,12 +578,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD", + "MSRValue": "0x063FC00002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -590,12 +591,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD", + "MSRValue": "0x063B800002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -603,12 +604,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000002 ", + "BriefDescription": "Counts all demand data writes (RFOs) TBD", + "MSRValue": "0x0604000002", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -616,12 +617,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that miss in the L3.", - "MSRValue": "0x3fbc000004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", + "MSRValue": "0x3FBC000004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -629,12 +630,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", + "MSRValue": "0x083FC00004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -642,12 +643,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", + "MSRValue": "0x103FC00004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -655,12 +656,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", + "MSRValue": "0x063FC00004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -668,12 +669,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", + "MSRValue": "0x063B800004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -681,12 +682,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000004 ", + "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", + "MSRValue": "0x0604000004", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -694,12 +695,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.", - "MSRValue": "0x3fbc000010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", + "MSRValue": "0x3FBC000010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -707,12 +708,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", + "MSRValue": "0x083FC00010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -720,12 +721,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", + "MSRValue": "0x103FC00010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -733,12 +734,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", + "MSRValue": "0x063FC00010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -746,12 +747,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", + "MSRValue": "0x063B800010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -759,12 +760,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000010 ", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", + "MSRValue": "0x0604000010", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -772,12 +773,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.", - "MSRValue": "0x3fbc000020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", + "MSRValue": "0x3FBC000020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -785,12 +786,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", + "MSRValue": "0x083FC00020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -798,12 +799,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", + "MSRValue": "0x103FC00020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -811,12 +812,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", + "MSRValue": "0x063FC00020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -824,12 +825,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", + "MSRValue": "0x063B800020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -837,12 +838,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000020 ", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", + "MSRValue": "0x0604000020", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -850,12 +851,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.", - "MSRValue": "0x3fbc000080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", + "MSRValue": "0x3FBC000080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -863,12 +864,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", + "MSRValue": "0x083FC00080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -876,12 +877,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", + "MSRValue": "0x103FC00080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -889,12 +890,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", + "MSRValue": "0x063FC00080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -902,12 +903,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", + "MSRValue": "0x063B800080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -915,12 +916,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000080 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", + "MSRValue": "0x0604000080", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -928,12 +929,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.", - "MSRValue": "0x3fbc000100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "MSRValue": "0x3FBC000100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -941,12 +942,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", + "MSRValue": "0x083FC00100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -954,12 +955,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", + "MSRValue": "0x103FC00100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -967,12 +968,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", + "MSRValue": "0x063FC00100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -980,12 +981,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", + "MSRValue": "0x063B800100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -993,12 +994,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000100 ", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", + "MSRValue": "0x0604000100", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1006,12 +1007,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.", - "MSRValue": "0x3fbc000400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", + "MSRValue": "0x3FBC000400", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1019,12 +1020,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", + "MSRValue": "0x083FC00400", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1032,12 +1033,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", + "MSRValue": "0x103FC00400", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1045,12 +1046,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", + "MSRValue": "0x063FC00400", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1058,12 +1059,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", + "MSRValue": "0x063B800400", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1071,90 +1072,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000400 ", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", + "MSRValue": "0x0604000400", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts any other requests that miss in the L3.", - "MSRValue": "0x3fbc008000 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc08000 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc08000 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc08000 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b808000 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604008000 ", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1162,12 +1085,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that miss in the L3.", - "MSRValue": "0x3fbc000490 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x3FBC000490", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1175,12 +1098,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00490 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x083FC00490", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1188,12 +1111,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00490 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x103FC00490", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1201,12 +1124,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00490 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x063FC00490", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1214,12 +1137,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800490 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x063B800490", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1227,12 +1150,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000490 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x0604000490", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1240,12 +1163,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that miss in the L3.", - "MSRValue": "0x3fbc000120 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x3FBC000120", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1253,12 +1176,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00120 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x083FC00120", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1266,12 +1189,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00120 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x103FC00120", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1279,12 +1202,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00120 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x063FC00120", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1292,12 +1215,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800120 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x063B800120", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1305,12 +1228,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000120 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x0604000120", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1318,12 +1241,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.", - "MSRValue": "0x3fbc000491 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x3FBC000491", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1331,12 +1254,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00491 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x083FC00491", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1344,12 +1267,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00491 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x103FC00491", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1357,12 +1280,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00491 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x063FC00491", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1370,12 +1293,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800491 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x063B800491", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1383,12 +1306,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000491 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x0604000491", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1396,12 +1319,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.", - "MSRValue": "0x3fbc000122 ", + "BriefDescription": "TBD TBD TBD", + "MSRValue": "0x3FBC000122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1409,12 +1332,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.", - "MSRValue": "0x083fc00122 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x083FC00122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1422,12 +1345,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.", - "MSRValue": "0x103fc00122 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x103FC00122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1435,12 +1358,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.", - "MSRValue": "0x063fc00122 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x063FC00122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1448,12 +1371,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.", - "MSRValue": "0x063b800122 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x063B800122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1461,12 +1384,12 @@ "Offcore": "1", "EventCode": "0xB7, 0xBB", "UMask": "0x1", - "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.", - "MSRValue": "0x0604000122 ", + "BriefDescription": "TBD TBD", + "MSRValue": "0x0604000122", "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "MSRIndex": "0x1a6, 0x1a7", + "PublicDescription": "TBD TBD", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index f99f7ae27820..369f56c1d1b5 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -1,6 +1,5 @@ [ { - "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", "Counter": "Fixed counter 0", @@ -10,7 +9,6 @@ "CounterHTOff": "Fixed counter 0" }, { - "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state", "Counter": "Fixed counter 1", @@ -20,7 +18,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "Counter": "Fixed counter 1", @@ -30,7 +27,6 @@ "CounterHTOff": "Fixed counter 1" }, { - "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", "Counter": "Fixed counter 2", @@ -99,24 +95,24 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "Invert": "1", "EventCode": "0x0E", "UMask": "0x1", - "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.ANY", - "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", "EventCode": "0x0E", "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -126,7 +122,7 @@ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", "Counter": "0,1,2,3", "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", - "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -203,19 +199,19 @@ { "EventCode": "0x3C", "UMask": "0x1", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", "SampleAfterValue": "2503", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x3C", "UMask": "0x1", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", "SampleAfterValue": "2503", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -248,12 +244,12 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x5E", + "EventCode": "0x59", "UMask": "0x1", - "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", + "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_CYCLES", - "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", + "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", + "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -271,6 +267,16 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "EventCode": "0x5E", + "UMask": "0x1", + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", + "Counter": "0,1,2,3", + "EventName": "RS_EVENTS.EMPTY_CYCLES", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "EventCode": "0x87", "UMask": "0x1", "BriefDescription": "Stalls caused by changing prefix length of the instruction.", @@ -361,12 +367,12 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xa2", "UMask": "0x1", "BriefDescription": "Resource-related stall cycles", "Counter": "0,1,2,3", "EventName": "RESOURCE_STALLS.ANY", - "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "PublicDescription": "Counts resource-related stall cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -522,6 +528,17 @@ { "EventCode": "0xA8", "UMask": "0x1", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "Counter": "0,1,2,3", + "EventName": "LSD.CYCLES_4_UOPS", + "CounterMask": "4", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA8", + "UMask": "0x1", "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", "EventName": "LSD.CYCLES_ACTIVE", @@ -531,35 +548,35 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "EventCode": "0xB1", "UMask": "0x1", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_4_UOPS", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", "CounterMask": "4", - "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xB1", "UMask": "0x1", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.THREAD", - "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "CounterMask": "3", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", "EventCode": "0xB1", "UMask": "0x1", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "CounterMask": "2", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -575,35 +592,24 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "Invert": "1", "EventCode": "0xB1", "UMask": "0x1", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "CounterMask": "2", - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "CounterMask": "3", - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xB1", "UMask": "0x1", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "CounterMask": "4", - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -618,11 +624,12 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "Invert": "1", "EventCode": "0xB1", "UMask": "0x2", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" @@ -630,10 +637,10 @@ { "EventCode": "0xB1", "UMask": "0x2", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "CounterMask": "2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterMask": "4", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -650,20 +657,19 @@ { "EventCode": "0xB1", "UMask": "0x2", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "CounterMask": "4", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "CounterMask": "2", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", "EventCode": "0xB1", "UMask": "0x2", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" @@ -725,12 +731,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "Invert": "1", "EventCode": "0xC2", "UMask": "0x2", - "BriefDescription": "Retirement slots used.", + "BriefDescription": "Cycles with less than 10 actually retired uops.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "Counts the retirement slots used.", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "CounterMask": "10", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -742,19 +750,17 @@ "Counter": "0,1,2,3", "EventName": "UOPS_RETIRED.STALL_CYCLES", "CounterMask": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", "EventCode": "0xC2", "UMask": "0x2", - "BriefDescription": "Cycles with less than 10 actually retired uops.", + "BriefDescription": "Retirement slots used.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", - "CounterMask": "10", - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "Counts the retirement slots used.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -766,6 +772,7 @@ "Counter": "0,1,2,3", "EventName": "MACHINE_CLEARS.COUNT", "CounterMask": "1", + "PublicDescription": "Number of machine clears (nukes) of any type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -841,11 +848,12 @@ { "EventCode": "0xC4", "UMask": "0x10", - "BriefDescription": "Not taken branch instructions retired.", + "BriefDescription": "Counts all not taken macro branch instructions retired.", + "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NOT_TAKEN", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -919,7 +927,7 @@ { "EventCode": "0xC5", "UMask": "0x20", - "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", @@ -938,6 +946,15 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "EventCode": "0xCC", + "UMask": "0x40", + "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", + "Counter": "0,1,2,3", + "EventName": "ROB_MISC_EVENTS.PAUSE_INST", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "EventCode": "0xE6", "UMask": "0x1", "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 71e9737f4614..35b255fa6a79 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -1,164 +1,376 @@ [ { - "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound" + }, + { + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation" + }, + { + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound" + }, + { + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring" + }, + { + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.", + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT" + }, + { "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Instructions Per Cycle (per logical thread)", "MetricGroup": "TopDownL1", "MetricName": "IPC" }, { - "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline", + "BriefDescription": "Uops Per Instruction", + "MetricGroup": "Pipeline;Retiring", "MetricName": "UPI" }, { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", - "MetricGroup": "Frontend", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Instruction per taken branch", + "MetricGroup": "Branches;PGO", + "MetricName": "IpTB" + }, + { + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "BriefDescription": "Branch instructions per taken branch. ", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTB" + }, + { + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )", + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", + "MetricGroup": "PGO", "MetricName": "IFetch_Line_Utilization" }, { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", - "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ))", + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricGroup": "DSB;Frontend_Bandwidth", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Cycles Per Instruction (threaded)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", + "BriefDescription": "Cycles Per Instruction (threaded)", "MetricGroup": "Pipeline;Summary", "MetricName": "CPI" }, { - "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "BriefDescription": "Per-thread actual clocks when the logical processor is active.", "MetricGroup": "Summary", "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots", - "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricExpr": "4 * cycles", + "BriefDescription": "Total issue-pipeline slots (per core)", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per core)", + "MetricGroup": "TopDownL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", + "BriefDescription": "Instructions per Load (lower number means loads are more frequent)", + "MetricGroup": "Instruction_Type;L1_Bound", + "MetricName": "IpL" + }, + { + "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", + "BriefDescription": "Instructions per Store", + "MetricGroup": "Instruction_Type;Store_Bound", + "MetricName": "IpS" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "BriefDescription": "Instructions per Branch", + "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6", + "MetricName": "IpB" + }, + { + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "BriefDescription": "Instruction per (near) call", + "MetricGroup": "Branches", + "MetricName": "IpCall" + }, + { "MetricExpr": "INST_RETIRED.ANY", + "BriefDescription": "Total number of retired Instructions", "MetricGroup": "Summary", "MetricName": "Instructions" }, { + "MetricExpr": "INST_RETIRED.ANY / cycles", "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", "MetricGroup": "SMT", "MetricName": "CoreIPC" }, { + "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricGroup": "SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS", + "MetricName": "FLOPc" + }, + { + "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricGroup": "FLOPS_SMT", + "MetricName": "FLOPc_SMT" + }, + { + "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { - "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", - "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)", - "MetricGroup": "Unknown_Branches", - "MetricName": "BAClear_Cost" + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "Branch_Misprediction_Cost" + }, + { + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)", + "MetricGroup": "Branch_Mispredicts_SMT", + "MetricName": "Branch_Misprediction_Cost_SMT" }, { + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricGroup": "Branch_Mispredicts", + "MetricName": "IpMispredict" + }, + { + "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "BriefDescription": "Core actual clocks when any thread is active on the physical core", - "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", "MetricGroup": "SMT", "MetricName": "CORE_CLKS" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, { - "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", - "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)", "MetricGroup": "Memory_Bound;Memory_BW", "MetricName": "MLP" }, { + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )", "MetricGroup": "TLB", "MetricName": "Page_Walks_Utilization" }, { - "BriefDescription": "Average CPU Utilization", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricGroup": "TLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, + { + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "L3_Cache_Access_BW" + }, + { + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI_All" + }, + { + "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2HPKI_All" + }, + { + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "BriefDescription": "Average CPU Utilization", "MetricGroup": "Summary", "MetricName": "CPU_Utilization" }, { + "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time", "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, { - "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Average Frequency Utilization relative nominal frequency", "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, { - "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "BriefDescription": "Fraction of cycles where both hardware threads were active", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "BriefDescription": "Fraction of cycles spent in Kernel mode", "MetricGroup": "Summary", "MetricName": "Kernel_Utilization" }, { - "BriefDescription": "C3 residency percent per core", + "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_BW_Use" + }, + { + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", + "MetricGroup": "Memory_Lat", + "MetricName": "DRAM_Read_Latency" + }, + { + "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@", + "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", + "MetricGroup": "Memory_BW", + "MetricName": "DRAM_Parallel_Reads" + }, + { + "MetricExpr": "cha_0@event\\=0x0@", + "BriefDescription": "Socket actual clocks when any core is active on that socket", + "MetricGroup": "", + "MetricName": "Socket_CLKS" + }, + { "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per core", "MetricName": "C3_Core_Residency" }, { - "BriefDescription": "C6 residency percent per core", "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per core", "MetricName": "C6_Core_Residency" }, { - "BriefDescription": "C7 residency percent per core", "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per core", "MetricName": "C7_Core_Residency" }, { - "BriefDescription": "C2 residency percent per package", "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C2 residency percent per package", "MetricName": "C2_Pkg_Residency" }, { - "BriefDescription": "C3 residency percent per package", "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C3 residency percent per package", "MetricName": "C3_Pkg_Residency" }, { - "BriefDescription": "C6 residency percent per package", "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C6 residency percent per package", "MetricName": "C6_Pkg_Residency" }, { - "BriefDescription": "C7 residency percent per package", "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", + "BriefDescription": "C7 residency percent per package", "MetricName": "C7_Pkg_Residency" } ] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 68c92bb599ee..1a91a197cafb 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -235,6 +235,10 @@ static struct map { { "iMPH-U", "uncore_arb" }, { "CPU-M-CF", "cpum_cf" }, { "CPU-M-SF", "cpum_sf" }, + { "UPI LL", "uncore_upi" }, + { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, + { "hisi_sccl,hha", "hisi_sccl,hha" }, + { "hisi_sccl,l3c", "hisi_sccl,l3c" }, {} }; @@ -403,7 +407,7 @@ static void free_arch_std_events(void) list_for_each_entry_safe(es, next, &arch_std_events, list) { FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD); - list_del(&es->list); + list_del_init(&es->list); free(es); } } @@ -414,7 +418,6 @@ static int save_arch_std_events(void *data, char *name, char *event, char *metric_name, char *metric_group) { struct event_struct *es; - struct stat *sb = data; es = malloc(sizeof(*es)); if (!es) @@ -841,7 +844,7 @@ static void create_empty_mapping(const char *output_file) _Exit(1); } - fprintf(outfp, "#include \"../../pmu-events/pmu-events.h\"\n"); + fprintf(outfp, "#include \"pmu-events/pmu-events.h\"\n"); print_mapping_table_prefix(outfp); print_mapping_table_suffix(outfp); fclose(outfp); @@ -1096,7 +1099,7 @@ int main(int argc, char *argv[]) } /* Include pmu-events.h first */ - fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n"); + fprintf(eventsfp, "#include \"pmu-events/pmu-events.h\"\n"); /* * The mapfile allows multiple CPUids to point to the same JSON file, diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index 0a29c5c3079f..ff87ccf5b708 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py @@ -1,17 +1,10 @@ #! /usr/bin/python +# SPDX-License-Identifier: GPL-2.0-only # -*- python -*- # -*- coding: utf-8 -*- # twatch - Experimental use of the perf python interface # Copyright (C) 2011 Arnaldo Carvalho de Melo <acme@redhat.com> # -# This application is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; version 2. -# -# This application is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. import perf diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c index 28431d1bbcf5..ead521dd8d79 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c @@ -1,9 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * This file was generated automatically by ExtUtils::ParseXS version 2.18_02 from the * contents of Context.xs. Do not edit this file, edit Context.xs instead. * * ANY CHANGES MADE HERE WILL BE LOST! - * */ #include <stdbool.h> #ifndef HAS_BOOL @@ -14,21 +14,6 @@ * Context.xs. XS interfaces for perf script. * * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #include "EXTERN.h" diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl index 74844ee2be3e..168fa5e94b44 100644 --- a/tools/perf/scripts/perl/rw-by-file.pl +++ b/tools/perf/scripts/perl/rw-by-file.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl -w +# SPDX-License-Identifier: GPL-2.0-only # (c) 2009, Tom Zanussi <tzanussi@gmail.com> -# Licensed under the terms of the GNU GPL License version 2 # Display r/w activity for files read/written to for a given program diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl index 9db23c9daf55..495698250b2f 100644 --- a/tools/perf/scripts/perl/rw-by-pid.pl +++ b/tools/perf/scripts/perl/rw-by-pid.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl -w +# SPDX-License-Identifier: GPL-2.0-only # (c) 2009, Tom Zanussi <tzanussi@gmail.com> -# Licensed under the terms of the GNU GPL License version 2 # Display r/w activity for all processes diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl index 8b20787021c1..6473442568a2 100644 --- a/tools/perf/scripts/perl/rwtop.pl +++ b/tools/perf/scripts/perl/rwtop.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl -w +# SPDX-License-Identifier: GPL-2.0-only # (c) 2010, Tom Zanussi <tzanussi@gmail.com> -# Licensed under the terms of the GNU GPL License version 2 # read/write top # diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl index d9143dcec6c6..efcfec5e347a 100644 --- a/tools/perf/scripts/perl/wakeup-latency.pl +++ b/tools/perf/scripts/perl/wakeup-latency.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl -w +# SPDX-License-Identifier: GPL-2.0-only # (c) 2009, Tom Zanussi <tzanussi@gmail.com> -# Licensed under the terms of the GNU GPL License version 2 # Display avg/min/max wakeup latency diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index 1a0d27757eec..217568bc29ce 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Context.c. Python interfaces for perf script. * * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #include <Python.h> diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index c3eae1d77d36..92713d93e956 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -27,18 +27,31 @@ import datetime # # fedora: # -# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql +# $ sudo yum install postgresql postgresql-server qt-postgresql # $ sudo su - postgres -c initdb # $ sudo service postgresql start # $ sudo su - postgres -# $ createuser <your user id here> +# $ createuser -s <your user id here> # Older versions may not support -s, in which case answer the prompt below: # Shall the new role be a superuser? (y/n) y +# $ sudo yum install python-pyside +# +# Alternately, to use Python3 and/or pyside 2, one of the following: +# $ sudo yum install python3-pyside +# $ pip install --user PySide2 +# $ pip3 install --user PySide2 # # ubuntu: # -# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql +# $ sudo apt-get install postgresql # $ sudo su - postgres # $ createuser -s <your user id here> +# $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql +# +# Alternately, to use Python3 and/or pyside 2, one of the following: +# +# $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql +# $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql +# $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql # # An example of using this script with Intel PT: # @@ -199,7 +212,16 @@ import datetime # print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5)) # call_path_id = query.value(6) -from PySide.QtSql import * +pyside_version_1 = True +if not "pyside-version-1" in sys.argv: + try: + from PySide2.QtSql import * + pyside_version_1 = False + except: + pass + +if pyside_version_1: + from PySide.QtSql import * if sys.version_info < (3, 0): def toserverstr(str): @@ -255,11 +277,12 @@ def printdate(*args, **kw_args): print(datetime.datetime.today(), *args, sep=' ', **kw_args) def usage(): - printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]") - printerr("where: columns 'all' or 'branches'") - printerr(" calls 'calls' => create calls and call_paths table") - printerr(" callchains 'callchains' => create call_paths table") - raise Exception("Too few arguments") + printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>] [<pyside-version-1>]"); + printerr("where: columns 'all' or 'branches'"); + printerr(" calls 'calls' => create calls and call_paths table"); + printerr(" callchains 'callchains' => create call_paths table"); + printerr(" pyside-version-1 'pyside-version-1' => use pyside version 1"); + raise Exception("Too few or bad arguments") if (len(sys.argv) < 2): usage() @@ -281,6 +304,8 @@ for i in range(3,len(sys.argv)): perf_db_export_calls = True elif (sys.argv[i] == "callchains"): perf_db_export_callchains = True + elif (sys.argv[i] == "pyside-version-1"): + pass else: usage() @@ -369,7 +394,9 @@ if branches: 'to_ip bigint,' 'branch_type integer,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') else: do_query(query, 'CREATE TABLE samples (' 'id bigint NOT NULL,' @@ -393,7 +420,9 @@ else: 'data_src bigint,' 'branch_type integer,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE TABLE call_paths (' @@ -414,7 +443,41 @@ if perf_db_export_calls: 'return_id bigint,' 'parent_call_path_id bigint,' 'flags integer,' - 'parent_id bigint)') + 'parent_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') + +do_query(query, 'CREATE TABLE ptwrite (' + 'id bigint NOT NULL,' + 'payload bigint,' + 'exact_ip boolean)') + +do_query(query, 'CREATE TABLE cbr (' + 'id bigint NOT NULL,' + 'cbr integer,' + 'mhz integer,' + 'percent integer)') + +do_query(query, 'CREATE TABLE mwait (' + 'id bigint NOT NULL,' + 'hints integer,' + 'extensions integer)') + +do_query(query, 'CREATE TABLE pwre (' + 'id bigint NOT NULL,' + 'cstate integer,' + 'subcstate integer,' + 'hw boolean)') + +do_query(query, 'CREATE TABLE exstop (' + 'id bigint NOT NULL,' + 'exact_ip boolean)') + +do_query(query, 'CREATE TABLE pwrx (' + 'id bigint NOT NULL,' + 'deepest_cstate integer,' + 'last_cstate integer,' + 'wake_reason integer)') do_query(query, 'CREATE VIEW machines_view AS ' 'SELECT ' @@ -496,6 +559,9 @@ if perf_db_export_calls: 'return_time,' 'return_time - call_time AS elapsed_time,' 'branch_count,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC,' 'call_id,' 'return_id,' 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE CAST ( flags AS VARCHAR(6) ) END AS flags,' @@ -521,9 +587,110 @@ do_query(query, 'CREATE VIEW samples_view AS ' 'to_sym_offset,' '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' - 'in_tx' + 'in_tx,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC' ' FROM samples') +do_query(query, 'CREATE VIEW ptwrite_view AS ' + 'SELECT ' + 'ptwrite.id,' + 'time,' + 'cpu,' + 'to_hex(payload) AS payload_hex,' + 'CASE WHEN exact_ip=FALSE THEN \'False\' ELSE \'True\' END AS exact_ip' + ' FROM ptwrite' + ' INNER JOIN samples ON samples.id = ptwrite.id') + +do_query(query, 'CREATE VIEW cbr_view AS ' + 'SELECT ' + 'cbr.id,' + 'time,' + 'cpu,' + 'cbr,' + 'mhz,' + 'percent' + ' FROM cbr' + ' INNER JOIN samples ON samples.id = cbr.id') + +do_query(query, 'CREATE VIEW mwait_view AS ' + 'SELECT ' + 'mwait.id,' + 'time,' + 'cpu,' + 'to_hex(hints) AS hints_hex,' + 'to_hex(extensions) AS extensions_hex' + ' FROM mwait' + ' INNER JOIN samples ON samples.id = mwait.id') + +do_query(query, 'CREATE VIEW pwre_view AS ' + 'SELECT ' + 'pwre.id,' + 'time,' + 'cpu,' + 'cstate,' + 'subcstate,' + 'CASE WHEN hw=FALSE THEN \'False\' ELSE \'True\' END AS hw' + ' FROM pwre' + ' INNER JOIN samples ON samples.id = pwre.id') + +do_query(query, 'CREATE VIEW exstop_view AS ' + 'SELECT ' + 'exstop.id,' + 'time,' + 'cpu,' + 'CASE WHEN exact_ip=FALSE THEN \'False\' ELSE \'True\' END AS exact_ip' + ' FROM exstop' + ' INNER JOIN samples ON samples.id = exstop.id') + +do_query(query, 'CREATE VIEW pwrx_view AS ' + 'SELECT ' + 'pwrx.id,' + 'time,' + 'cpu,' + 'deepest_cstate,' + 'last_cstate,' + 'CASE WHEN wake_reason=1 THEN \'Interrupt\'' + ' WHEN wake_reason=2 THEN \'Timer Deadline\'' + ' WHEN wake_reason=4 THEN \'Monitored Address\'' + ' WHEN wake_reason=8 THEN \'HW\'' + ' ELSE CAST ( wake_reason AS VARCHAR(2) )' + 'END AS wake_reason' + ' FROM pwrx' + ' INNER JOIN samples ON samples.id = pwrx.id') + +do_query(query, 'CREATE VIEW power_events_view AS ' + 'SELECT ' + 'samples.id,' + 'samples.time,' + 'samples.cpu,' + 'selected_events.name AS event,' + 'FORMAT(\'%6s\', cbr.cbr) AS cbr,' + 'FORMAT(\'%6s\', cbr.mhz) AS MHz,' + 'FORMAT(\'%5s\', cbr.percent) AS percent,' + 'to_hex(mwait.hints) AS hints_hex,' + 'to_hex(mwait.extensions) AS extensions_hex,' + 'FORMAT(\'%3s\', pwre.cstate) AS cstate,' + 'FORMAT(\'%3s\', pwre.subcstate) AS subcstate,' + 'CASE WHEN pwre.hw=FALSE THEN \'False\' WHEN pwre.hw=TRUE THEN \'True\' ELSE NULL END AS hw,' + 'CASE WHEN exstop.exact_ip=FALSE THEN \'False\' WHEN exstop.exact_ip=TRUE THEN \'True\' ELSE NULL END AS exact_ip,' + 'FORMAT(\'%3s\', pwrx.deepest_cstate) AS deepest_cstate,' + 'FORMAT(\'%3s\', pwrx.last_cstate) AS last_cstate,' + 'CASE WHEN pwrx.wake_reason=1 THEN \'Interrupt\'' + ' WHEN pwrx.wake_reason=2 THEN \'Timer Deadline\'' + ' WHEN pwrx.wake_reason=4 THEN \'Monitored Address\'' + ' WHEN pwrx.wake_reason=8 THEN \'HW\'' + ' ELSE FORMAT(\'%2s\', pwrx.wake_reason)' + 'END AS wake_reason' + ' FROM cbr' + ' FULL JOIN mwait ON mwait.id = cbr.id' + ' FULL JOIN pwre ON pwre.id = cbr.id' + ' FULL JOIN exstop ON exstop.id = cbr.id' + ' FULL JOIN pwrx ON pwrx.id = cbr.id' + ' INNER JOIN samples ON samples.id = coalesce(cbr.id, mwait.id, pwre.id, exstop.id, pwrx.id)' + ' INNER JOIN selected_events ON selected_events.id = samples.evsel_id' + ' ORDER BY samples.id') file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0) file_trailer = b"\377\377" @@ -583,6 +750,12 @@ if perf_db_export_calls or perf_db_export_callchains: call_path_file = open_output_file("call_path_table.bin") if perf_db_export_calls: call_file = open_output_file("call_table.bin") +ptwrite_file = open_output_file("ptwrite_table.bin") +cbr_file = open_output_file("cbr_table.bin") +mwait_file = open_output_file("mwait_table.bin") +pwre_file = open_output_file("pwre_table.bin") +exstop_file = open_output_file("exstop_table.bin") +pwrx_file = open_output_file("pwrx_table.bin") def trace_begin(): printdate("Writing to intermediate files...") @@ -593,13 +766,23 @@ def trace_begin(): comm_table(0, "unknown") dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) if perf_db_export_calls or perf_db_export_callchains: call_path_table(0, 0, 0, 0) - call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) unhandled_count = 0 +def is_table_empty(table_name): + do_query(query, 'SELECT * FROM ' + table_name + ' LIMIT 1'); + if query.next(): + return False + return True + +def drop(table_name): + do_query(query, 'DROP VIEW ' + table_name + '_view'); + do_query(query, 'DROP TABLE ' + table_name); + def trace_end(): printdate("Copying to database...") copy_output_file(evsel_file, "selected_events") @@ -615,6 +798,12 @@ def trace_end(): copy_output_file(call_path_file, "call_paths") if perf_db_export_calls: copy_output_file(call_file, "calls") + copy_output_file(ptwrite_file, "ptwrite") + copy_output_file(cbr_file, "cbr") + copy_output_file(mwait_file, "mwait") + copy_output_file(pwre_file, "pwre") + copy_output_file(exstop_file, "exstop") + copy_output_file(pwrx_file, "pwrx") printdate("Removing intermediate files...") remove_output_file(evsel_file) @@ -630,6 +819,12 @@ def trace_end(): remove_output_file(call_path_file) if perf_db_export_calls: remove_output_file(call_file) + remove_output_file(ptwrite_file) + remove_output_file(cbr_file) + remove_output_file(mwait_file) + remove_output_file(pwre_file) + remove_output_file(exstop_file) + remove_output_file(pwrx_file) os.rmdir(output_dir_name) printdate("Adding primary keys") do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') @@ -645,6 +840,12 @@ def trace_end(): do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)') if perf_db_export_calls: do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE ptwrite ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE cbr ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE mwait ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE pwre ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE exstop ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE pwrx ADD PRIMARY KEY (id)') printdate("Adding foreign keys") do_query(query, 'ALTER TABLE threads ' @@ -680,6 +881,30 @@ def trace_end(): 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)') do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') + do_query(query, 'ALTER TABLE ptwrite ' + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') + do_query(query, 'ALTER TABLE cbr ' + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') + do_query(query, 'ALTER TABLE mwait ' + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') + do_query(query, 'ALTER TABLE pwre ' + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') + do_query(query, 'ALTER TABLE exstop ' + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') + do_query(query, 'ALTER TABLE pwrx ' + 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') + + printdate("Dropping unused tables") + if is_table_empty("ptwrite"): + drop("ptwrite") + if is_table_empty("mwait") and is_table_empty("pwre") and is_table_empty("exstop") and is_table_empty("pwrx"): + do_query(query, 'DROP VIEW power_events_view'); + drop("mwait") + drop("pwre") + drop("exstop") + drop("pwrx") + if is_table_empty("cbr"): + drop("cbr") if (unhandled_count): printdate("Warning: ", unhandled_count, " unhandled events") @@ -747,11 +972,11 @@ def branch_type_table(branch_type, name, *x): value = struct.pack(fmt, 2, 4, branch_type, n, name) branch_type_file.write(value) -def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x): +def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, insn_cnt, cyc_cnt, *x): if branches: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiqiqiq", 20, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt) else: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiqiqiq", 24, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt) sample_file.write(value) def call_path_table(cp_id, parent_id, symbol_id, ip, *x): @@ -759,7 +984,70 @@ def call_path_table(cp_id, parent_id, symbol_id, ip, *x): value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip) call_path_file.write(value) -def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, *x): - fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiq" - value = struct.pack(fmt, 12, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id) +def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, insn_cnt, cyc_cnt, *x): + fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiqiqiq" + value = struct.pack(fmt, 14, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id, 8, insn_cnt, 8, cyc_cnt) call_file.write(value) + +def ptwrite(id, raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + flags = data[0] + payload = data[1] + exact_ip = flags & 1 + value = struct.pack("!hiqiqiB", 3, 8, id, 8, payload, 1, exact_ip) + ptwrite_file.write(value) + +def cbr(id, raw_buf): + data = struct.unpack_from("<BBBBII", raw_buf) + cbr = data[0] + MHz = (data[4] + 500) / 1000 + percent = ((cbr * 1000 / data[2]) + 5) / 10 + value = struct.pack("!hiqiiiiii", 4, 8, id, 4, cbr, 4, MHz, 4, percent) + cbr_file.write(value) + +def mwait(id, raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + payload = data[1] + hints = payload & 0xff + extensions = (payload >> 32) & 0x3 + value = struct.pack("!hiqiiii", 3, 8, id, 4, hints, 4, extensions) + mwait_file.write(value) + +def pwre(id, raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + payload = data[1] + hw = (payload >> 7) & 1 + cstate = (payload >> 12) & 0xf + subcstate = (payload >> 8) & 0xf + value = struct.pack("!hiqiiiiiB", 4, 8, id, 4, cstate, 4, subcstate, 1, hw) + pwre_file.write(value) + +def exstop(id, raw_buf): + data = struct.unpack_from("<I", raw_buf) + flags = data[0] + exact_ip = flags & 1 + value = struct.pack("!hiqiB", 2, 8, id, 1, exact_ip) + exstop_file.write(value) + +def pwrx(id, raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + payload = data[1] + deepest_cstate = payload & 0xf + last_cstate = (payload >> 4) & 0xf + wake_reason = (payload >> 8) & 0xf + value = struct.pack("!hiqiiiiii", 4, 8, id, 4, deepest_cstate, 4, last_cstate, 4, wake_reason) + pwrx_file.write(value) + +def synth_data(id, config, raw_buf, *x): + if config == 0: + ptwrite(id, raw_buf) + elif config == 1: + mwait(id, raw_buf) + elif config == 2: + pwre(id, raw_buf) + elif config == 3: + exstop(id, raw_buf) + elif config == 4: + pwrx(id, raw_buf) + elif config == 5: + cbr(id, raw_buf) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index bf271fbc3a88..021326c46285 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -21,6 +21,26 @@ import datetime # provides LGPL-licensed Python bindings for Qt. You will also need the package # libqt4-sql-sqlite for Qt sqlite3 support. # +# Examples of installing pyside: +# +# ubuntu: +# +# $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql +# +# Alternately, to use Python3 and/or pyside 2, one of the following: +# +# $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql +# $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql +# $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql +# fedora: +# +# $ sudo yum install python-pyside +# +# Alternately, to use Python3 and/or pyside 2, one of the following: +# $ sudo yum install python3-pyside +# $ pip install --user PySide2 +# $ pip3 install --user PySide2 +# # An example of using this script with Intel PT: # # $ perf record -e intel_pt//u ls @@ -49,7 +69,16 @@ import datetime # difference is the 'transaction' column of the 'samples' table which is # renamed 'transaction_' in sqlite because 'transaction' is a reserved word. -from PySide.QtSql import * +pyside_version_1 = True +if not "pyside-version-1" in sys.argv: + try: + from PySide2.QtSql import * + pyside_version_1 = False + except: + pass + +if pyside_version_1: + from PySide.QtSql import * sys.path.append(os.environ['PERF_EXEC_PATH'] + \ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') @@ -69,11 +98,12 @@ def printdate(*args, **kw_args): print(datetime.datetime.today(), *args, sep=' ', **kw_args) def usage(): - printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]"); - printerr("where: columns 'all' or 'branches'"); - printerr(" calls 'calls' => create calls and call_paths table"); - printerr(" callchains 'callchains' => create call_paths table"); - raise Exception("Too few arguments") + printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>] [<pyside-version-1>]"); + printerr("where: columns 'all' or 'branches'"); + printerr(" calls 'calls' => create calls and call_paths table"); + printerr(" callchains 'callchains' => create call_paths table"); + printerr(" pyside-version-1 'pyside-version-1' => use pyside version 1"); + raise Exception("Too few or bad arguments") if (len(sys.argv) < 2): usage() @@ -95,6 +125,8 @@ for i in range(3,len(sys.argv)): perf_db_export_calls = True elif (sys.argv[i] == "callchains"): perf_db_export_callchains = True + elif (sys.argv[i] == "pyside-version-1"): + pass else: usage() @@ -186,7 +218,9 @@ if branches: 'to_ip bigint,' 'branch_type integer,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') else: do_query(query, 'CREATE TABLE samples (' 'id integer NOT NULL PRIMARY KEY,' @@ -210,7 +244,9 @@ else: 'data_src bigint,' 'branch_type integer,' 'in_tx boolean,' - 'call_path_id bigint)') + 'call_path_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE TABLE call_paths (' @@ -231,7 +267,41 @@ if perf_db_export_calls: 'return_id bigint,' 'parent_call_path_id bigint,' 'flags integer,' - 'parent_id bigint)') + 'parent_id bigint,' + 'insn_count bigint,' + 'cyc_count bigint)') + +do_query(query, 'CREATE TABLE ptwrite (' + 'id integer NOT NULL PRIMARY KEY,' + 'payload bigint,' + 'exact_ip integer)') + +do_query(query, 'CREATE TABLE cbr (' + 'id integer NOT NULL PRIMARY KEY,' + 'cbr integer,' + 'mhz integer,' + 'percent integer)') + +do_query(query, 'CREATE TABLE mwait (' + 'id integer NOT NULL PRIMARY KEY,' + 'hints integer,' + 'extensions integer)') + +do_query(query, 'CREATE TABLE pwre (' + 'id integer NOT NULL PRIMARY KEY,' + 'cstate integer,' + 'subcstate integer,' + 'hw integer)') + +do_query(query, 'CREATE TABLE exstop (' + 'id integer NOT NULL PRIMARY KEY,' + 'exact_ip integer)') + +do_query(query, 'CREATE TABLE pwrx (' + 'id integer NOT NULL PRIMARY KEY,' + 'deepest_cstate integer,' + 'last_cstate integer,' + 'wake_reason integer)') # printf was added to sqlite in version 3.8.3 sqlite_has_printf = False @@ -327,6 +397,9 @@ if perf_db_export_calls: 'return_time,' 'return_time - call_time AS elapsed_time,' 'branch_count,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC,' 'call_id,' 'return_id,' 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' @@ -352,9 +425,108 @@ do_query(query, 'CREATE VIEW samples_view AS ' 'to_sym_offset,' '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' - 'in_tx' + 'in_tx,' + 'insn_count,' + 'cyc_count,' + 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC' ' FROM samples') +do_query(query, 'CREATE VIEW ptwrite_view AS ' + 'SELECT ' + 'ptwrite.id,' + 'time,' + 'cpu,' + + emit_to_hex('payload') + ' AS payload_hex,' + 'CASE WHEN exact_ip=0 THEN \'False\' ELSE \'True\' END AS exact_ip' + ' FROM ptwrite' + ' INNER JOIN samples ON samples.id = ptwrite.id') + +do_query(query, 'CREATE VIEW cbr_view AS ' + 'SELECT ' + 'cbr.id,' + 'time,' + 'cpu,' + 'cbr,' + 'mhz,' + 'percent' + ' FROM cbr' + ' INNER JOIN samples ON samples.id = cbr.id') + +do_query(query, 'CREATE VIEW mwait_view AS ' + 'SELECT ' + 'mwait.id,' + 'time,' + 'cpu,' + + emit_to_hex('hints') + ' AS hints_hex,' + + emit_to_hex('extensions') + ' AS extensions_hex' + ' FROM mwait' + ' INNER JOIN samples ON samples.id = mwait.id') + +do_query(query, 'CREATE VIEW pwre_view AS ' + 'SELECT ' + 'pwre.id,' + 'time,' + 'cpu,' + 'cstate,' + 'subcstate,' + 'CASE WHEN hw=0 THEN \'False\' ELSE \'True\' END AS hw' + ' FROM pwre' + ' INNER JOIN samples ON samples.id = pwre.id') + +do_query(query, 'CREATE VIEW exstop_view AS ' + 'SELECT ' + 'exstop.id,' + 'time,' + 'cpu,' + 'CASE WHEN exact_ip=0 THEN \'False\' ELSE \'True\' END AS exact_ip' + ' FROM exstop' + ' INNER JOIN samples ON samples.id = exstop.id') + +do_query(query, 'CREATE VIEW pwrx_view AS ' + 'SELECT ' + 'pwrx.id,' + 'time,' + 'cpu,' + 'deepest_cstate,' + 'last_cstate,' + 'CASE WHEN wake_reason=1 THEN \'Interrupt\'' + ' WHEN wake_reason=2 THEN \'Timer Deadline\'' + ' WHEN wake_reason=4 THEN \'Monitored Address\'' + ' WHEN wake_reason=8 THEN \'HW\'' + ' ELSE wake_reason ' + 'END AS wake_reason' + ' FROM pwrx' + ' INNER JOIN samples ON samples.id = pwrx.id') + +do_query(query, 'CREATE VIEW power_events_view AS ' + 'SELECT ' + 'samples.id,' + 'time,' + 'cpu,' + 'selected_events.name AS event,' + 'CASE WHEN selected_events.name=\'cbr\' THEN (SELECT cbr FROM cbr WHERE cbr.id = samples.id) ELSE "" END AS cbr,' + 'CASE WHEN selected_events.name=\'cbr\' THEN (SELECT mhz FROM cbr WHERE cbr.id = samples.id) ELSE "" END AS mhz,' + 'CASE WHEN selected_events.name=\'cbr\' THEN (SELECT percent FROM cbr WHERE cbr.id = samples.id) ELSE "" END AS percent,' + 'CASE WHEN selected_events.name=\'mwait\' THEN (SELECT ' + emit_to_hex('hints') + ' FROM mwait WHERE mwait.id = samples.id) ELSE "" END AS hints_hex,' + 'CASE WHEN selected_events.name=\'mwait\' THEN (SELECT ' + emit_to_hex('extensions') + ' FROM mwait WHERE mwait.id = samples.id) ELSE "" END AS extensions_hex,' + 'CASE WHEN selected_events.name=\'pwre\' THEN (SELECT cstate FROM pwre WHERE pwre.id = samples.id) ELSE "" END AS cstate,' + 'CASE WHEN selected_events.name=\'pwre\' THEN (SELECT subcstate FROM pwre WHERE pwre.id = samples.id) ELSE "" END AS subcstate,' + 'CASE WHEN selected_events.name=\'pwre\' THEN (SELECT hw FROM pwre WHERE pwre.id = samples.id) ELSE "" END AS hw,' + 'CASE WHEN selected_events.name=\'exstop\' THEN (SELECT exact_ip FROM exstop WHERE exstop.id = samples.id) ELSE "" END AS exact_ip,' + 'CASE WHEN selected_events.name=\'pwrx\' THEN (SELECT deepest_cstate FROM pwrx WHERE pwrx.id = samples.id) ELSE "" END AS deepest_cstate,' + 'CASE WHEN selected_events.name=\'pwrx\' THEN (SELECT last_cstate FROM pwrx WHERE pwrx.id = samples.id) ELSE "" END AS last_cstate,' + 'CASE WHEN selected_events.name=\'pwrx\' THEN (SELECT ' + 'CASE WHEN wake_reason=1 THEN \'Interrupt\'' + ' WHEN wake_reason=2 THEN \'Timer Deadline\'' + ' WHEN wake_reason=4 THEN \'Monitored Address\'' + ' WHEN wake_reason=8 THEN \'HW\'' + ' ELSE wake_reason ' + 'END' + ' FROM pwrx WHERE pwrx.id = samples.id) ELSE "" END AS wake_reason' + ' FROM samples' + ' INNER JOIN selected_events ON selected_events.id = evsel_id' + ' WHERE selected_events.name IN (\'cbr\',\'mwait\',\'exstop\',\'pwre\',\'pwrx\')') + do_query(query, 'END TRANSACTION') evsel_query = QSqlQuery(db) @@ -375,15 +547,27 @@ branch_type_query = QSqlQuery(db) branch_type_query.prepare("INSERT INTO branch_types VALUES (?, ?)") sample_query = QSqlQuery(db) if branches: - sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") else: - sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") if perf_db_export_calls or perf_db_export_callchains: call_path_query = QSqlQuery(db) call_path_query.prepare("INSERT INTO call_paths VALUES (?, ?, ?, ?)") if perf_db_export_calls: call_query = QSqlQuery(db) - call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") +ptwrite_query = QSqlQuery(db) +ptwrite_query.prepare("INSERT INTO ptwrite VALUES (?, ?, ?)") +cbr_query = QSqlQuery(db) +cbr_query.prepare("INSERT INTO cbr VALUES (?, ?, ?, ?)") +mwait_query = QSqlQuery(db) +mwait_query.prepare("INSERT INTO mwait VALUES (?, ?, ?)") +pwre_query = QSqlQuery(db) +pwre_query.prepare("INSERT INTO pwre VALUES (?, ?, ?, ?)") +exstop_query = QSqlQuery(db) +exstop_query.prepare("INSERT INTO exstop VALUES (?, ?)") +pwrx_query = QSqlQuery(db) +pwrx_query.prepare("INSERT INTO pwrx VALUES (?, ?, ?, ?)") def trace_begin(): printdate("Writing records...") @@ -395,13 +579,23 @@ def trace_begin(): comm_table(0, "unknown") dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) if perf_db_export_calls or perf_db_export_callchains: call_path_table(0, 0, 0, 0) - call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) unhandled_count = 0 +def is_table_empty(table_name): + do_query(query, 'SELECT * FROM ' + table_name + ' LIMIT 1'); + if query.next(): + return False + return True + +def drop(table_name): + do_query(query, 'DROP VIEW ' + table_name + '_view'); + do_query(query, 'DROP TABLE ' + table_name); + def trace_end(): do_query(query, 'END TRANSACTION') @@ -410,6 +604,18 @@ def trace_end(): do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') + printdate("Dropping unused tables") + if is_table_empty("ptwrite"): + drop("ptwrite") + if is_table_empty("mwait") and is_table_empty("pwre") and is_table_empty("exstop") and is_table_empty("pwrx"): + do_query(query, 'DROP VIEW power_events_view'); + drop("mwait") + drop("pwre") + drop("exstop") + drop("pwrx") + if is_table_empty("cbr"): + drop("cbr") + if (unhandled_count): printdate("Warning: ", unhandled_count, " unhandled events") printdate("Done") @@ -454,14 +660,91 @@ def sample_table(*x): if branches: for xx in x[0:15]: sample_query.addBindValue(str(xx)) - for xx in x[19:22]: + for xx in x[19:24]: sample_query.addBindValue(str(xx)) do_query_(sample_query) else: - bind_exec(sample_query, 22, x) + bind_exec(sample_query, 24, x) def call_path_table(*x): bind_exec(call_path_query, 4, x) def call_return_table(*x): - bind_exec(call_query, 12, x) + bind_exec(call_query, 14, x) + +def ptwrite(id, raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + flags = data[0] + payload = data[1] + exact_ip = flags & 1 + ptwrite_query.addBindValue(str(id)) + ptwrite_query.addBindValue(str(payload)) + ptwrite_query.addBindValue(str(exact_ip)) + do_query_(ptwrite_query) + +def cbr(id, raw_buf): + data = struct.unpack_from("<BBBBII", raw_buf) + cbr = data[0] + MHz = (data[4] + 500) / 1000 + percent = ((cbr * 1000 / data[2]) + 5) / 10 + cbr_query.addBindValue(str(id)) + cbr_query.addBindValue(str(cbr)) + cbr_query.addBindValue(str(MHz)) + cbr_query.addBindValue(str(percent)) + do_query_(cbr_query) + +def mwait(id, raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + payload = data[1] + hints = payload & 0xff + extensions = (payload >> 32) & 0x3 + mwait_query.addBindValue(str(id)) + mwait_query.addBindValue(str(hints)) + mwait_query.addBindValue(str(extensions)) + do_query_(mwait_query) + +def pwre(id, raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + payload = data[1] + hw = (payload >> 7) & 1 + cstate = (payload >> 12) & 0xf + subcstate = (payload >> 8) & 0xf + pwre_query.addBindValue(str(id)) + pwre_query.addBindValue(str(cstate)) + pwre_query.addBindValue(str(subcstate)) + pwre_query.addBindValue(str(hw)) + do_query_(pwre_query) + +def exstop(id, raw_buf): + data = struct.unpack_from("<I", raw_buf) + flags = data[0] + exact_ip = flags & 1 + exstop_query.addBindValue(str(id)) + exstop_query.addBindValue(str(exact_ip)) + do_query_(exstop_query) + +def pwrx(id, raw_buf): + data = struct.unpack_from("<IQ", raw_buf) + payload = data[1] + deepest_cstate = payload & 0xf + last_cstate = (payload >> 4) & 0xf + wake_reason = (payload >> 8) & 0xf + pwrx_query.addBindValue(str(id)) + pwrx_query.addBindValue(str(deepest_cstate)) + pwrx_query.addBindValue(str(last_cstate)) + pwrx_query.addBindValue(str(wake_reason)) + do_query_(pwrx_query) + +def synth_data(id, config, raw_buf, *x): + if config == 0: + ptwrite(id, raw_buf) + elif config == 1: + mwait(id, raw_buf) + elif config == 2: + pwre(id, raw_buf) + elif config == 3: + exstop(id, raw_buf) + elif config == 4: + pwrx(id, raw_buf) + elif config == 5: + cbr(id, raw_buf) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 74ef92f1d19a..6e7934f2ac9a 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 # exported-sql-viewer.py: view data from sql database # Copyright (c) 2014-2018, Intel Corporation. @@ -91,6 +91,7 @@ from __future__ import print_function import sys +import argparse import weakref import threading import string @@ -104,10 +105,23 @@ except ImportError: glb_nsz = 16 import re import os -from PySide.QtCore import * -from PySide.QtGui import * -from PySide.QtSql import * + pyside_version_1 = True +if not "--pyside-version-1" in sys.argv: + try: + from PySide2.QtCore import * + from PySide2.QtGui import * + from PySide2.QtSql import * + from PySide2.QtWidgets import * + pyside_version_1 = False + except: + pass + +if pyside_version_1: + from PySide.QtCore import * + from PySide.QtGui import * + from PySide.QtSql import * + from decimal import * from ctypes import * from multiprocessing import Process, Array, Value, Event @@ -186,9 +200,10 @@ class Thread(QThread): class TreeModel(QAbstractItemModel): - def __init__(self, glb, parent=None): + def __init__(self, glb, params, parent=None): super(TreeModel, self).__init__(parent) self.glb = glb + self.params = params self.root = self.GetRoot() self.last_row_read = 0 @@ -385,6 +400,7 @@ class FindBar(): def Activate(self): self.bar.show() + self.textbox.lineEdit().selectAll() self.textbox.setFocus() def Deactivate(self): @@ -449,13 +465,18 @@ class FindBar(): class CallGraphLevelItemBase(object): - def __init__(self, glb, row, parent_item): + def __init__(self, glb, params, row, parent_item): self.glb = glb + self.params = params self.row = row self.parent_item = parent_item self.query_done = False; self.child_count = 0 self.child_items = [] + if parent_item: + self.level = parent_item.level + 1 + else: + self.level = 0 def getChildItem(self, row): return self.child_items[row] @@ -485,18 +506,24 @@ class CallGraphLevelItemBase(object): class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): - super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): + super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.call_path_id = call_path_id + self.insn_cnt = insn_cnt + self.cyc_cnt = cyc_cnt self.branch_count = branch_count self.time = time def Select(self): self.query_done = True; query = QSqlQuery(self.glb.db) - QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" + if self.params.have_ipc: + ipc_str = ", SUM(insn_count), SUM(cyc_count)" + else: + ipc_str = "" + QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time)" + ipc_str + ", SUM(branch_count)" " FROM calls" " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" @@ -507,7 +534,15 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): " GROUP BY call_path_id, name, short_name" " ORDER BY call_path_id") while query.next(): - child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + if self.params.have_ipc: + insn_cnt = int(query.value(5)) + cyc_cnt = int(query.value(6)) + branch_count = int(query.value(7)) + else: + insn_cnt = 0 + cyc_cnt = 0 + branch_count = int(query.value(5)) + child_item = CallGraphLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self) self.child_items.append(child_item) self.child_count += 1 @@ -515,37 +550,57 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): - super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item): + super(CallGraphLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item) dso = dsoname(dso) - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + if self.params.have_ipc: + insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt) + cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt) + br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count) + ipc = CalcIPC(cyc_cnt, insn_cnt) + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] + else: + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] self.dbid = call_path_id # Context-sensitive call graph data model level two item class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): - super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) - self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] + def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): + super(CallGraphLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 1, 0, 0, 0, 0, parent_item) + if self.params.have_ipc: + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""] + else: + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] self.dbid = thread_id def Select(self): super(CallGraphLevelTwoItem, self).Select() for child_item in self.child_items: self.time += child_item.time + self.insn_cnt += child_item.insn_cnt + self.cyc_cnt += child_item.cyc_cnt self.branch_count += child_item.branch_count for child_item in self.child_items: child_item.data[4] = PercentToOneDP(child_item.time, self.time) - child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) + if self.params.have_ipc: + child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt) + child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt) + child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count) + else: + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) # Context-sensitive call graph data model level one item class CallGraphLevelOneItem(CallGraphLevelItemBase): - def __init__(self, glb, row, comm_id, comm, parent_item): - super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item) - self.data = [comm, "", "", "", "", "", ""] + def __init__(self, glb, params, row, comm_id, comm, parent_item): + super(CallGraphLevelOneItem, self).__init__(glb, params, row, parent_item) + if self.params.have_ipc: + self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""] + else: + self.data = [comm, "", "", "", "", "", ""] self.dbid = comm_id def Select(self): @@ -556,7 +611,7 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase): " INNER JOIN threads ON thread_id = threads.id" " WHERE comm_id = " + str(self.dbid)) while query.next(): - child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) + child_item = CallGraphLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) self.child_items.append(child_item) self.child_count += 1 @@ -564,8 +619,8 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase): class CallGraphRootItem(CallGraphLevelItemBase): - def __init__(self, glb): - super(CallGraphRootItem, self).__init__(glb, 0, None) + def __init__(self, glb, params): + super(CallGraphRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 self.query_done = True; query = QSqlQuery(glb.db) @@ -573,16 +628,23 @@ class CallGraphRootItem(CallGraphLevelItemBase): while query.next(): if not query.value(0): continue - child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) + child_item = CallGraphLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self) self.child_items.append(child_item) self.child_count += 1 +# Call graph model parameters + +class CallGraphModelParams(): + + def __init__(self, glb, parent=None): + self.have_ipc = IsSelectable(glb.db, "calls", columns = "insn_count, cyc_count") + # Context-sensitive call graph data model base class CallGraphModelBase(TreeModel): def __init__(self, glb, parent=None): - super(CallGraphModelBase, self).__init__(glb, parent) + super(CallGraphModelBase, self).__init__(glb, CallGraphModelParams(glb), parent) def FindSelect(self, value, pattern, query): if pattern: @@ -664,17 +726,26 @@ class CallGraphModel(CallGraphModelBase): super(CallGraphModel, self).__init__(glb, parent) def GetRoot(self): - return CallGraphRootItem(self.glb) + return CallGraphRootItem(self.glb, self.params) def columnCount(self, parent=None): - return 7 + if self.params.have_ipc: + return 12 + else: + return 7 def columnHeader(self, column): - headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + if self.params.have_ipc: + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "] + else: + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] return headers[column] def columnAlignment(self, column): - alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + if self.params.have_ipc: + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + else: + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] return alignment[column] def DoFindSelect(self, query, match): @@ -711,11 +782,13 @@ class CallGraphModel(CallGraphModelBase): class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item): - super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): + super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.calls_id = calls_id + self.insn_cnt = insn_cnt + self.cyc_cnt = cyc_cnt self.branch_count = branch_count self.time = time @@ -725,8 +798,12 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id) else: comm_thread = "" + if self.params.have_ipc: + ipc_str = ", insn_count, cyc_count" + else: + ipc_str = "" query = QSqlQuery(self.glb.db) - QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time, branch_count" + QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time" + ipc_str + ", branch_count" " FROM calls" " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" @@ -734,7 +811,15 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): " WHERE calls.parent_id = " + str(self.calls_id) + comm_thread + " ORDER BY call_time, calls.id") while query.next(): - child_item = CallTreeLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + if self.params.have_ipc: + insn_cnt = int(query.value(5)) + cyc_cnt = int(query.value(6)) + branch_count = int(query.value(7)) + else: + insn_cnt = 0 + cyc_cnt = 0 + branch_count = int(query.value(5)) + child_item = CallTreeLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self) self.child_items.append(child_item) self.child_count += 1 @@ -742,37 +827,57 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, calls_id, name, dso, count, time, branch_count, parent_item): - super(CallTreeLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item): + super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item) dso = dsoname(dso) - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + if self.params.have_ipc: + insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt) + cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt) + br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count) + ipc = CalcIPC(cyc_cnt, insn_cnt) + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] + else: + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] self.dbid = calls_id # Call tree data model level two item class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase): - def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): - super(CallTreeLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 0, 0, 0, parent_item) - self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] + def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): + super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, parent_item) + if self.params.have_ipc: + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""] + else: + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] self.dbid = thread_id def Select(self): super(CallTreeLevelTwoItem, self).Select() for child_item in self.child_items: self.time += child_item.time + self.insn_cnt += child_item.insn_cnt + self.cyc_cnt += child_item.cyc_cnt self.branch_count += child_item.branch_count for child_item in self.child_items: child_item.data[4] = PercentToOneDP(child_item.time, self.time) - child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) + if self.params.have_ipc: + child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt) + child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt) + child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count) + else: + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) # Call tree data model level one item class CallTreeLevelOneItem(CallGraphLevelItemBase): - def __init__(self, glb, row, comm_id, comm, parent_item): - super(CallTreeLevelOneItem, self).__init__(glb, row, parent_item) - self.data = [comm, "", "", "", "", "", ""] + def __init__(self, glb, params, row, comm_id, comm, parent_item): + super(CallTreeLevelOneItem, self).__init__(glb, params, row, parent_item) + if self.params.have_ipc: + self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""] + else: + self.data = [comm, "", "", "", "", "", ""] self.dbid = comm_id def Select(self): @@ -783,7 +888,7 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase): " INNER JOIN threads ON thread_id = threads.id" " WHERE comm_id = " + str(self.dbid)) while query.next(): - child_item = CallTreeLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) + child_item = CallTreeLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) self.child_items.append(child_item) self.child_count += 1 @@ -791,8 +896,8 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase): class CallTreeRootItem(CallGraphLevelItemBase): - def __init__(self, glb): - super(CallTreeRootItem, self).__init__(glb, 0, None) + def __init__(self, glb, params): + super(CallTreeRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 self.query_done = True; query = QSqlQuery(glb.db) @@ -800,7 +905,7 @@ class CallTreeRootItem(CallGraphLevelItemBase): while query.next(): if not query.value(0): continue - child_item = CallTreeLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) + child_item = CallTreeLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self) self.child_items.append(child_item) self.child_count += 1 @@ -812,17 +917,26 @@ class CallTreeModel(CallGraphModelBase): super(CallTreeModel, self).__init__(glb, parent) def GetRoot(self): - return CallTreeRootItem(self.glb) + return CallTreeRootItem(self.glb, self.params) def columnCount(self, parent=None): - return 7 + if self.params.have_ipc: + return 12 + else: + return 7 def columnHeader(self, column): - headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + if self.params.have_ipc: + headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "] + else: + headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] return headers[column] def columnAlignment(self, column): - alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + if self.params.have_ipc: + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + else: + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] return alignment[column] def DoFindSelect(self, query, match): @@ -877,9 +991,14 @@ class TreeWindowBase(QMdiSubWindow): super(TreeWindowBase, self).__init__(parent) self.model = None - self.view = None self.find_bar = None + self.view = QTreeView() + self.view.setSelectionMode(QAbstractItemView.ContiguousSelection) + self.view.CopyCellsToClipboard = CopyTreeCellsToClipboard + + self.context_menu = TreeContextMenu(self.view) + def DisplayFound(self, ids): if not len(ids): return False @@ -921,7 +1040,6 @@ class CallGraphWindow(TreeWindowBase): self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x)) - self.view = QTreeView() self.view.setModel(self.model) for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): @@ -944,7 +1062,6 @@ class CallTreeWindow(TreeWindowBase): self.model = LookupCreateModel("Call Tree", lambda x=glb: CallTreeModel(x)) - self.view = QTreeView() self.view.setModel(self.model) for c, w in ((0, 230), (1, 100), (2, 100), (3, 70), (4, 70), (5, 100)): @@ -1348,11 +1465,11 @@ class FetchMoreRecordsBar(): class BranchLevelTwoItem(): - def __init__(self, row, text, parent_item): + def __init__(self, row, col, text, parent_item): self.row = row self.parent_item = parent_item - self.data = [""] * 8 - self.data[7] = text + self.data = [""] * (col + 1) + self.data[col] = text self.level = 2 def getParentItem(self): @@ -1384,6 +1501,7 @@ class BranchLevelOneItem(): self.dbid = data[0] self.level = 1 self.query_done = False + self.br_col = len(self.data) - 1 def getChildItem(self, row): return self.child_items[row] @@ -1464,7 +1582,7 @@ class BranchLevelOneItem(): while k < 15: byte_str += " " k += 1 - self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self)) + self.child_items.append(BranchLevelTwoItem(0, self.br_col, byte_str + " " + text, self)) self.child_count += 1 else: return @@ -1515,16 +1633,37 @@ class BranchRootItem(): def getData(self, column): return "" +# Calculate instructions per cycle + +def CalcIPC(cyc_cnt, insn_cnt): + if cyc_cnt and insn_cnt: + ipc = Decimal(float(insn_cnt) / cyc_cnt) + ipc = str(ipc.quantize(Decimal(".01"), rounding=ROUND_HALF_UP)) + else: + ipc = "0" + return ipc + # Branch data preparation -def BranchDataPrep(query): - data = [] - for i in xrange(0, 8): - data.append(query.value(i)) +def BranchDataPrepBr(query, data): data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + " (" + dsoname(query.value(11)) + ")" + " -> " + tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + " (" + dsoname(query.value(15)) + ")") + +def BranchDataPrepIPC(query, data): + insn_cnt = query.value(16) + cyc_cnt = query.value(17) + ipc = CalcIPC(cyc_cnt, insn_cnt) + data.append(insn_cnt) + data.append(cyc_cnt) + data.append(ipc) + +def BranchDataPrep(query): + data = [] + for i in xrange(0, 8): + data.append(query.value(i)) + BranchDataPrepBr(query, data) return data def BranchDataPrepWA(query): @@ -1534,10 +1673,26 @@ def BranchDataPrepWA(query): data.append("{:>19}".format(query.value(1))) for i in xrange(2, 8): data.append(query.value(i)) - data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + - " (" + dsoname(query.value(11)) + ")" + " -> " + - tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + - " (" + dsoname(query.value(15)) + ")") + BranchDataPrepBr(query, data) + return data + +def BranchDataWithIPCPrep(query): + data = [] + for i in xrange(0, 8): + data.append(query.value(i)) + BranchDataPrepIPC(query, data) + BranchDataPrepBr(query, data) + return data + +def BranchDataWithIPCPrepWA(query): + data = [] + data.append(query.value(0)) + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string + data.append("{:>19}".format(query.value(1))) + for i in xrange(2, 8): + data.append(query.value(i)) + BranchDataPrepIPC(query, data) + BranchDataPrepBr(query, data) return data # Branch data model @@ -1547,14 +1702,24 @@ class BranchModel(TreeModel): progress = Signal(object) def __init__(self, glb, event_id, where_clause, parent=None): - super(BranchModel, self).__init__(glb, parent) + super(BranchModel, self).__init__(glb, None, parent) self.event_id = event_id self.more = True self.populated = 0 + self.have_ipc = IsSelectable(glb.db, "samples", columns = "insn_count, cyc_count") + if self.have_ipc: + select_ipc = ", insn_count, cyc_count" + prep_fn = BranchDataWithIPCPrep + prep_wa_fn = BranchDataWithIPCPrepWA + else: + select_ipc = "" + prep_fn = BranchDataPrep + prep_wa_fn = BranchDataPrepWA sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name," " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END," " ip, symbols.name, sym_offset, dsos.short_name," " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name" + + select_ipc + " FROM samples" " INNER JOIN comms ON comm_id = comms.id" " INNER JOIN threads ON thread_id = threads.id" @@ -1568,9 +1733,9 @@ class BranchModel(TreeModel): " ORDER BY samples.id" " LIMIT " + str(glb_chunk_sz)) if pyside_version_1 and sys.version_info[0] == 3: - prep = BranchDataPrepWA + prep = prep_fn else: - prep = BranchDataPrep + prep = prep_wa_fn self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample) self.fetcher.done.connect(self.Update) self.fetcher.Fetch(glb_chunk_sz) @@ -1579,13 +1744,23 @@ class BranchModel(TreeModel): return BranchRootItem() def columnCount(self, parent=None): - return 8 + if self.have_ipc: + return 11 + else: + return 8 def columnHeader(self, column): - return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] + if self.have_ipc: + return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Insn Cnt", "Cyc Cnt", "IPC", "Branch")[column] + else: + return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] def columnFont(self, column): - if column != 7: + if self.have_ipc: + br_col = 10 + else: + br_col = 7 + if column != br_col: return None return QFont("Monospace") @@ -1649,10 +1824,14 @@ class BranchWindow(QMdiSubWindow): self.view = QTreeView() self.view.setUniformRowHeights(True) + self.view.setSelectionMode(QAbstractItemView.ContiguousSelection) + self.view.CopyCellsToClipboard = CopyTreeCellsToClipboard self.view.setModel(self.model) self.ResizeColumnsToContents() + self.context_menu = TreeContextMenu(self.view) + self.find_bar = FindBar(self, self, True) self.finder = ChildDataItemFinder(self.model.root) @@ -2089,10 +2268,10 @@ def GetEventList(db): # Is a table selectable -def IsSelectable(db, table, sql = ""): +def IsSelectable(db, table, sql = "", columns = "*"): query = QSqlQuery(db) try: - QueryExec(query, "SELECT * FROM " + table + " " + sql + " LIMIT 1") + QueryExec(query, "SELECT " + columns + " FROM " + table + " " + sql + " LIMIT 1") except: return False return True @@ -2261,6 +2440,240 @@ class ResizeColumnsToContentsBase(QObject): self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths) self.ResizeColumnsToContents() +# Convert value to CSV + +def ToCSValue(val): + if '"' in val: + val = val.replace('"', '""') + if "," in val or '"' in val: + val = '"' + val + '"' + return val + +# Key to sort table model indexes by row / column, assuming fewer than 1000 columns + +glb_max_cols = 1000 + +def RowColumnKey(a): + return a.row() * glb_max_cols + a.column() + +# Copy selected table cells to clipboard + +def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False): + indexes = sorted(view.selectedIndexes(), key=RowColumnKey) + idx_cnt = len(indexes) + if not idx_cnt: + return + if idx_cnt == 1: + with_hdr=False + min_row = indexes[0].row() + max_row = indexes[0].row() + min_col = indexes[0].column() + max_col = indexes[0].column() + for i in indexes: + min_row = min(min_row, i.row()) + max_row = max(max_row, i.row()) + min_col = min(min_col, i.column()) + max_col = max(max_col, i.column()) + if max_col > glb_max_cols: + raise RuntimeError("glb_max_cols is too low") + max_width = [0] * (1 + max_col - min_col) + for i in indexes: + c = i.column() - min_col + max_width[c] = max(max_width[c], len(str(i.data()))) + text = "" + pad = "" + sep = "" + if with_hdr: + model = indexes[0].model() + for col in range(min_col, max_col + 1): + val = model.headerData(col, Qt.Horizontal) + if as_csv: + text += sep + ToCSValue(val) + sep = "," + else: + c = col - min_col + max_width[c] = max(max_width[c], len(val)) + width = max_width[c] + align = model.headerData(col, Qt.Horizontal, Qt.TextAlignmentRole) + if align & Qt.AlignRight: + val = val.rjust(width) + text += pad + sep + val + pad = " " * (width - len(val)) + sep = " " + text += "\n" + pad = "" + sep = "" + last_row = min_row + for i in indexes: + if i.row() > last_row: + last_row = i.row() + text += "\n" + pad = "" + sep = "" + if as_csv: + text += sep + ToCSValue(str(i.data())) + sep = "," + else: + width = max_width[i.column() - min_col] + if i.data(Qt.TextAlignmentRole) & Qt.AlignRight: + val = str(i.data()).rjust(width) + else: + val = str(i.data()) + text += pad + sep + val + pad = " " * (width - len(val)) + sep = " " + QApplication.clipboard().setText(text) + +def CopyTreeCellsToClipboard(view, as_csv=False, with_hdr=False): + indexes = view.selectedIndexes() + if not len(indexes): + return + + selection = view.selectionModel() + + first = None + for i in indexes: + above = view.indexAbove(i) + if not selection.isSelected(above): + first = i + break + + if first is None: + raise RuntimeError("CopyTreeCellsToClipboard internal error") + + model = first.model() + row_cnt = 0 + col_cnt = model.columnCount(first) + max_width = [0] * col_cnt + + indent_sz = 2 + indent_str = " " * indent_sz + + expanded_mark_sz = 2 + if sys.version_info[0] == 3: + expanded_mark = "\u25BC " + not_expanded_mark = "\u25B6 " + else: + expanded_mark = unicode(chr(0xE2) + chr(0x96) + chr(0xBC) + " ", "utf-8") + not_expanded_mark = unicode(chr(0xE2) + chr(0x96) + chr(0xB6) + " ", "utf-8") + leaf_mark = " " + + if not as_csv: + pos = first + while True: + row_cnt += 1 + row = pos.row() + for c in range(col_cnt): + i = pos.sibling(row, c) + if c: + n = len(str(i.data())) + else: + n = len(str(i.data()).strip()) + n += (i.internalPointer().level - 1) * indent_sz + n += expanded_mark_sz + max_width[c] = max(max_width[c], n) + pos = view.indexBelow(pos) + if not selection.isSelected(pos): + break + + text = "" + pad = "" + sep = "" + if with_hdr: + for c in range(col_cnt): + val = model.headerData(c, Qt.Horizontal, Qt.DisplayRole).strip() + if as_csv: + text += sep + ToCSValue(val) + sep = "," + else: + max_width[c] = max(max_width[c], len(val)) + width = max_width[c] + align = model.headerData(c, Qt.Horizontal, Qt.TextAlignmentRole) + if align & Qt.AlignRight: + val = val.rjust(width) + text += pad + sep + val + pad = " " * (width - len(val)) + sep = " " + text += "\n" + pad = "" + sep = "" + + pos = first + while True: + row = pos.row() + for c in range(col_cnt): + i = pos.sibling(row, c) + val = str(i.data()) + if not c: + if model.hasChildren(i): + if view.isExpanded(i): + mark = expanded_mark + else: + mark = not_expanded_mark + else: + mark = leaf_mark + val = indent_str * (i.internalPointer().level - 1) + mark + val.strip() + if as_csv: + text += sep + ToCSValue(val) + sep = "," + else: + width = max_width[c] + if c and i.data(Qt.TextAlignmentRole) & Qt.AlignRight: + val = val.rjust(width) + text += pad + sep + val + pad = " " * (width - len(val)) + sep = " " + pos = view.indexBelow(pos) + if not selection.isSelected(pos): + break + text = text.rstrip() + "\n" + pad = "" + sep = "" + + QApplication.clipboard().setText(text) + +def CopyCellsToClipboard(view, as_csv=False, with_hdr=False): + view.CopyCellsToClipboard(view, as_csv, with_hdr) + +def CopyCellsToClipboardHdr(view): + CopyCellsToClipboard(view, False, True) + +def CopyCellsToClipboardCSV(view): + CopyCellsToClipboard(view, True, True) + +# Context menu + +class ContextMenu(object): + + def __init__(self, view): + self.view = view + self.view.setContextMenuPolicy(Qt.CustomContextMenu) + self.view.customContextMenuRequested.connect(self.ShowContextMenu) + + def ShowContextMenu(self, pos): + menu = QMenu(self.view) + self.AddActions(menu) + menu.exec_(self.view.mapToGlobal(pos)) + + def AddCopy(self, menu): + menu.addAction(CreateAction("&Copy selection", "Copy to clipboard", lambda: CopyCellsToClipboardHdr(self.view), self.view)) + menu.addAction(CreateAction("Copy selection as CS&V", "Copy to clipboard as CSV", lambda: CopyCellsToClipboardCSV(self.view), self.view)) + + def AddActions(self, menu): + self.AddCopy(menu) + +class TreeContextMenu(ContextMenu): + + def __init__(self, view): + super(TreeContextMenu, self).__init__(view) + + def AddActions(self, menu): + i = self.view.currentIndex() + text = str(i.data()).strip() + if len(text): + menu.addAction(CreateAction('Copy "' + text + '"', "Copy to clipboard", lambda: QApplication.clipboard().setText(text), self.view)) + self.AddCopy(menu) + # Table window class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase): @@ -2279,9 +2692,13 @@ class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase): self.view.verticalHeader().setVisible(False) self.view.sortByColumn(-1, Qt.AscendingOrder) self.view.setSortingEnabled(True) + self.view.setSelectionMode(QAbstractItemView.ContiguousSelection) + self.view.CopyCellsToClipboard = CopyTableCellsToClipboard self.ResizeColumnsToContents() + self.context_menu = ContextMenu(self.view) + self.find_bar = FindBar(self, self, True) self.finder = ChildDataItemFinder(self.data_model) @@ -2395,6 +2812,10 @@ class TopCallsWindow(QMdiSubWindow, ResizeColumnsToContentsBase): self.view.setModel(self.model) self.view.setEditTriggers(QAbstractItemView.NoEditTriggers) self.view.verticalHeader().setVisible(False) + self.view.setSelectionMode(QAbstractItemView.ContiguousSelection) + self.view.CopyCellsToClipboard = CopyTableCellsToClipboard + + self.context_menu = ContextMenu(self.view) self.ResizeColumnsToContents() @@ -2501,7 +2922,7 @@ class WindowMenu(): action = self.window_menu.addAction(label) action.setCheckable(True) action.setChecked(sub_window == self.mdi_area.activeSubWindow()) - action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x)) + action.triggered.connect(lambda a=None,x=nr: self.setActiveSubWindow(x)) self.window_menu.addAction(action) nr += 1 @@ -2587,6 +3008,12 @@ cd xed sudo ./mfile.py --prefix=/usr/local install sudo ldconfig </pre> +<h3>Instructions per Cycle (IPC)</h3> +If available, IPC information is displayed in columns 'insn_cnt', 'cyc_cnt' and 'IPC'. +<p><b>Intel PT note:</b> The information applies to the blocks of code ending with, and including, that branch. +Due to the granularity of timing information, the number of cycles for some code blocks will not be known. +In that case, 'insn_cnt', 'cyc_cnt' and 'IPC' are zero, but when 'IPC' is displayed it covers the period +since the previous displayed 'IPC'. <h3>Find</h3> Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. Refer to Python documentation for the regular expression syntax. @@ -2660,6 +3087,60 @@ class HelpOnlyWindow(QMainWindow): self.setCentralWidget(self.text) +# PostqreSQL server version + +def PostqreSQLServerVersion(db): + query = QSqlQuery(db) + QueryExec(query, "SELECT VERSION()") + if query.next(): + v_str = query.value(0) + v_list = v_str.strip().split(" ") + if v_list[0] == "PostgreSQL" and v_list[2] == "on": + return v_list[1] + return v_str + return "Unknown" + +# SQLite version + +def SQLiteVersion(db): + query = QSqlQuery(db) + QueryExec(query, "SELECT sqlite_version()") + if query.next(): + return query.value(0) + return "Unknown" + +# About dialog + +class AboutDialog(QDialog): + + def __init__(self, glb, parent=None): + super(AboutDialog, self).__init__(parent) + + self.setWindowTitle("About Exported SQL Viewer") + self.setMinimumWidth(300) + + pyside_version = "1" if pyside_version_1 else "2" + + text = "<pre>" + text += "Python version: " + sys.version.split(" ")[0] + "\n" + text += "PySide version: " + pyside_version + "\n" + text += "Qt version: " + qVersion() + "\n" + if glb.dbref.is_sqlite3: + text += "SQLite version: " + SQLiteVersion(glb.db) + "\n" + else: + text += "PostqreSQL version: " + PostqreSQLServerVersion(glb.db) + "\n" + text += "</pre>" + + self.text = QTextBrowser() + self.text.setHtml(text) + self.text.setReadOnly(True) + self.text.setOpenExternalLinks(True) + + self.vbox = QVBoxLayout() + self.vbox.addWidget(self.text) + + self.setLayout(self.vbox); + # Font resize def ResizeFont(widget, diff): @@ -2732,6 +3213,8 @@ class MainWindow(QMainWindow): file_menu.addAction(CreateExitAction(glb.app, self)) edit_menu = menu.addMenu("&Edit") + edit_menu.addAction(CreateAction("&Copy", "Copy to clipboard", self.CopyToClipboard, self, QKeySequence.Copy)) + edit_menu.addAction(CreateAction("Copy as CS&V", "Copy to clipboard as CSV", self.CopyToClipboardCSV, self)) edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find)) edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)])) edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")])) @@ -2755,6 +3238,21 @@ class MainWindow(QMainWindow): help_menu = menu.addMenu("&Help") help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents)) + help_menu.addAction(CreateAction("&About Exported SQL Viewer", "About this application", self.About, self)) + + def Try(self, fn): + win = self.mdi_area.activeSubWindow() + if win: + try: + fn(win.view) + except: + pass + + def CopyToClipboard(self): + self.Try(CopyCellsToClipboardHdr) + + def CopyToClipboardCSV(self): + self.Try(CopyCellsToClipboardCSV) def Find(self): win = self.mdi_area.activeSubWindow() @@ -2773,12 +3271,10 @@ class MainWindow(QMainWindow): pass def ShrinkFont(self): - win = self.mdi_area.activeSubWindow() - ShrinkFont(win.view) + self.Try(ShrinkFont) def EnlargeFont(self): - win = self.mdi_area.activeSubWindow() - EnlargeFont(win.view) + self.Try(EnlargeFont) def EventMenu(self, events, reports_menu): branches_events = 0 @@ -2792,14 +3288,14 @@ class MainWindow(QMainWindow): event = event.split(":")[0] if event == "branches": label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" - reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewBranchView(x), self)) label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" - reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self)) + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewSelectedBranchView(x), self)) def TableMenu(self, tables, menu): table_menu = menu.addMenu("&Tables") for table in tables: - table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self)) + table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda a=None,t=table: self.NewTableView(t), self)) def NewCallGraph(self): CallGraphWindow(self.glb, self) @@ -2828,6 +3324,10 @@ class MainWindow(QMainWindow): def Help(self): HelpWindow(self.glb, self) + def About(self): + dialog = AboutDialog(self.glb, self) + dialog.exec_() + # XED Disassembler class xed_state_t(Structure): @@ -3035,18 +3535,27 @@ class DBRef(): # Main def Main(): - if (len(sys.argv) < 2): - printerr("Usage is: exported-sql-viewer.py {<database name> | --help-only}"); - raise Exception("Too few arguments") - - dbname = sys.argv[1] - if dbname == "--help-only": + usage_str = "exported-sql-viewer.py [--pyside-version-1] <database name>\n" \ + " or: exported-sql-viewer.py --help-only" + ap = argparse.ArgumentParser(usage = usage_str, add_help = False) + ap.add_argument("--pyside-version-1", action='store_true') + ap.add_argument("dbname", nargs="?") + ap.add_argument("--help-only", action='store_true') + args = ap.parse_args() + + if args.help_only: app = QApplication(sys.argv) mainwindow = HelpOnlyWindow() mainwindow.show() err = app.exec_() sys.exit(err) + dbname = args.dbname + if dbname is None: + ap.print_usage() + print("Too few arguments") + sys.exit(1) + is_sqlite3 = False try: f = open(dbname, "rb") diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 0b2b8305c965..e72accefd669 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + perf-y += builtin-test.o perf-y += parse-events.o perf-y += dso-data.o @@ -50,6 +52,8 @@ perf-y += perf-hooks.o perf-y += clang.o perf-y += unit_number__scnprintf.o perf-y += mem2node.o +perf-y += map_groups.o +perf-y += time-utils-test.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index 57fc544aedb0..153624e2d0f5 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index 1ca5106df5f1..ab4b98b3165d 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bpf-script-example.c * Test basic LLVM building diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c index ff3ec8337f0a..219673aa278f 100644 --- a/tools/perf/tests/bpf-script-test-kbuild.c +++ b/tools/perf/tests/bpf-script-test-kbuild.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bpf-script-test-kbuild.c * Test include from kernel header diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c index 43f1e16486f4..bd83d364cf30 100644 --- a/tools/perf/tests/bpf-script-test-prologue.c +++ b/tools/perf/tests/bpf-script-test-prologue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bpf-script-test-prologue.c * Test BPF prologue diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c index 93af77421816..74006e4b2d24 100644 --- a/tools/perf/tests/bpf-script-test-relocation.c +++ b/tools/perf/tests/bpf-script-test-relocation.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bpf-script-test-relocation.c * Test BPF loader checking relocation diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 79b54f8ddebf..c9e4cdc4c9c8 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <stdio.h> #include <sys/epoll.h> diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 9852b5d624a5..66a82badc1d1 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -22,6 +22,7 @@ #include "string2.h" #include "symbol.h" #include <linux/kernel.h> +#include <linux/string.h> #include <subcmd/exec-cmd.h> static bool dont_fork; @@ -290,6 +291,14 @@ static struct test generic_tests[] = { .func = test__mem2node, }, { + .desc = "time utils", + .func = test__time_utils, + }, + { + .desc = "map_groups__merge_in", + .func = test__map_groups__merge_in, + }, + { .func = NULL, }, }; @@ -430,7 +439,7 @@ static const char *shell_test__description(char *description, size_t size, description = fgets(description, size, fp); fclose(fp); - return description ? trim(description + 1) : NULL; + return description ? strim(description + 1) : NULL; } #define for_each_shell_test(dir, base, ent) \ diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 4ebd2681e760..aa6df122b175 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -22,7 +22,7 @@ #include "tests.h" -#include "sane_ctype.h" +#include <linux/ctype.h> #define BUFSZ 1024 #define READLEN 128 diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 7f6c52021e41..946ab4b63acd 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -304,7 +304,7 @@ int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_u /* Make sure we did not leak any file descriptor. */ nr_end = open_files_cnt(); pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end); - TEST_ASSERT_VAL("failed leadking files", nr == nr_end); + TEST_ASSERT_VAL("failed leaking files", nr == nr_end); return 0; } @@ -380,6 +380,6 @@ int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_ /* Make sure we did not leak any file descriptor. */ nr_end = open_files_cnt(); pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end); - TEST_ASSERT_VAL("failed leadking files", nr == nr_end); + TEST_ASSERT_VAL("failed leaking files", nr == nr_end); return 0; } diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 077c306c1cae..f33709a79335 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/types.h> +#include <linux/zalloc.h> #include <inttypes.h> #include <unistd.h> #include "tests.h" @@ -115,8 +116,8 @@ noinline int test_dwarf_unwind__thread(struct thread *thread) } out: - free(sample.user_stack.data); - free(sample.user_regs.regs); + zfree(&sample.user_stack.data); + zfree(&sample.user_regs.regs); return err; } diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 9acc1e80b936..ee1d88650e69 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -3,6 +3,7 @@ #include "util/expr.h" #include "tests.h" #include <stdlib.h> +#include <linux/zalloc.h> static int test(struct parse_ctx *ctx, const char *e, double val2) { @@ -58,7 +59,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) TEST_ASSERT_VAL("find other", other[3] == NULL); for (i = 0; i < num_other; i++) - free((void *)other[i]); + zfree(&other[i]); free((void *)other); return 0; diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index a039f93199e5..ca5a5f94ce79 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> +#include <stdlib.h> #include <bpf/libbpf.h> #include <util/llvm-utils.h> #include <util/cache.h> diff --git a/tools/perf/tests/make b/tools/perf/tests/make index e46723568516..5363a12a8b9b 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -107,7 +107,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 -make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 +make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 # $(run) contains all available tests run := make_pure diff --git a/tools/perf/tests/map_groups.c b/tools/perf/tests/map_groups.c new file mode 100644 index 000000000000..594fdaca4f71 --- /dev/null +++ b/tools/perf/tests/map_groups.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <linux/kernel.h> +#include "tests.h" +#include "map.h" +#include "map_groups.h" +#include "dso.h" +#include "debug.h" + +struct map_def { + const char *name; + u64 start; + u64 end; +}; + +static int check_maps(struct map_def *merged, unsigned int size, struct map_groups *mg) +{ + struct map *map; + unsigned int i = 0; + + map = map_groups__first(mg); + while (map) { + TEST_ASSERT_VAL("wrong map start", map->start == merged[i].start); + TEST_ASSERT_VAL("wrong map end", map->end == merged[i].end); + TEST_ASSERT_VAL("wrong map name", !strcmp(map->dso->name, merged[i].name)); + TEST_ASSERT_VAL("wrong map refcnt", refcount_read(&map->refcnt) == 2); + + i++; + map = map_groups__next(map); + + TEST_ASSERT_VAL("less maps expected", (map && i < size) || (!map && i == size)); + } + + return TEST_OK; +} + +int test__map_groups__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused) +{ + struct map_groups mg; + unsigned int i; + struct map_def bpf_progs[] = { + { "bpf_prog_1", 200, 300 }, + { "bpf_prog_2", 500, 600 }, + { "bpf_prog_3", 800, 900 }, + }; + struct map_def merged12[] = { + { "kcore1", 100, 200 }, + { "bpf_prog_1", 200, 300 }, + { "kcore1", 300, 500 }, + { "bpf_prog_2", 500, 600 }, + { "kcore1", 600, 800 }, + { "bpf_prog_3", 800, 900 }, + { "kcore1", 900, 1000 }, + }; + struct map_def merged3[] = { + { "kcore1", 100, 200 }, + { "bpf_prog_1", 200, 300 }, + { "kcore1", 300, 500 }, + { "bpf_prog_2", 500, 600 }, + { "kcore1", 600, 800 }, + { "bpf_prog_3", 800, 900 }, + { "kcore1", 900, 1000 }, + { "kcore3", 1000, 1100 }, + }; + struct map *map_kcore1, *map_kcore2, *map_kcore3; + int ret; + + map_groups__init(&mg, NULL); + + for (i = 0; i < ARRAY_SIZE(bpf_progs); i++) { + struct map *map; + + map = dso__new_map(bpf_progs[i].name); + TEST_ASSERT_VAL("failed to create map", map); + + map->start = bpf_progs[i].start; + map->end = bpf_progs[i].end; + map_groups__insert(&mg, map); + map__put(map); + } + + map_kcore1 = dso__new_map("kcore1"); + TEST_ASSERT_VAL("failed to create map", map_kcore1); + + map_kcore2 = dso__new_map("kcore2"); + TEST_ASSERT_VAL("failed to create map", map_kcore2); + + map_kcore3 = dso__new_map("kcore3"); + TEST_ASSERT_VAL("failed to create map", map_kcore3); + + /* kcore1 map overlaps over all bpf maps */ + map_kcore1->start = 100; + map_kcore1->end = 1000; + + /* kcore2 map hides behind bpf_prog_2 */ + map_kcore2->start = 550; + map_kcore2->end = 570; + + /* kcore3 map hides behind bpf_prog_3, kcore1 and adds new map */ + map_kcore3->start = 880; + map_kcore3->end = 1100; + + ret = map_groups__merge_in(&mg, map_kcore1); + TEST_ASSERT_VAL("failed to merge map", !ret); + + ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg); + TEST_ASSERT_VAL("merge check failed", !ret); + + ret = map_groups__merge_in(&mg, map_kcore2); + TEST_ASSERT_VAL("failed to merge map", !ret); + + ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg); + TEST_ASSERT_VAL("merge check failed", !ret); + + ret = map_groups__merge_in(&mg, map_kcore3); + TEST_ASSERT_VAL("failed to merge map", !ret); + + ret = check_maps(merged3, ARRAY_SIZE(merged3), &mg); + TEST_ASSERT_VAL("merge check failed", !ret); + return TEST_OK; +} diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c index 0f82ee9fd3f7..efe3397824d2 100644 --- a/tools/perf/tests/mem.c +++ b/tools/perf/tests/mem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "util/mem-events.h" #include "util/symbol.h" #include "linux/perf_event.h" diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index 9e9e4d37cc77..520cc91af256 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -1,5 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/bitmap.h> +#include <linux/zalloc.h> #include "cpumap.h" #include "mem2node.h" #include "tests.h" @@ -66,7 +68,7 @@ int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused) T("failed: mem2node__node", -1 == mem2node__node(&map, 0x1050)); for (i = 0; i < ARRAY_SIZE(nodes); i++) - free(nodes[i].set); + zfree(&nodes[i].set); mem2node__exit(&map); return 0; diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index ba87e6e8d18c..0a4301a5155c 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -53,7 +53,7 @@ static void *thread_fn(void *arg) { struct thread_data *td = arg; ssize_t ret; - int go; + int go = 0; if (thread_init(td)) return NULL; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 4a69c07f4101..8f3c80e13584 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -18,6 +18,32 @@ #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) +#if defined(__s390x__) +/* Return true if kvm module is available and loaded. Test this + * and retun success when trace point kvm_s390_create_vm + * exists. Otherwise this test always fails. + */ +static bool kvm_s390_create_vm_valid(void) +{ + char *eventfile; + bool rc = false; + + eventfile = get_events_file("kvm-s390"); + + if (eventfile) { + DIR *mydir = opendir(eventfile); + + if (mydir) { + rc = true; + closedir(mydir); + } + put_events_file(eventfile); + } + + return rc; +} +#endif + static int test__checkevent_tracepoint(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evlist__first(evlist); @@ -1642,6 +1668,7 @@ static struct evlist_test test__events[] = { { .name = "kvm-s390:kvm_s390_create_vm", .check = test__checkevent_tracepoint, + .valid = kvm_s390_create_vm_valid, .id = 100, }, #endif diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 236ce0d6c826..361714e2583c 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdbool.h> #include <inttypes.h> +#include <stdlib.h> #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/types.h> diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh index e37787be672b..51e3f60baba0 100644 --- a/tools/perf/tests/shell/lib/probe.sh +++ b/tools/perf/tests/shell/lib/probe.sh @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 skip_if_no_perf_probe() { diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh index 46e076e3c537..5d1b63d3f3e1 100755 --- a/tools/perf/tests/shell/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -1,6 +1,7 @@ #!/bin/sh # Add vfs_getname probe to get syscall args filenames -# + +# SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 . $(dirname $0)/lib/probe.sh diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 61c9f8fc6fa1..f12a4e217968 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -7,6 +7,7 @@ # This needs no debuginfo package, all is done using the libc ELF symtab # and the CFI info in the binaries. +# SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 . $(dirname $0)/lib/probe.sh @@ -44,7 +45,7 @@ trace_libc_inet_pton_backtrace() { eventattr='max-stack=4' echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected - echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected + echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; *) eventattr='max-stack=3' diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 9b073e7fa88c..54030c18bfc2 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -6,6 +6,7 @@ # checks that that was captured by the vfs_getname probe in the generated # perf.data file, with the temp file name as the pathname argument. +# SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 . $(dirname $0)/lib/probe.sh diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh new file mode 100755 index 000000000000..899604d17b85 --- /dev/null +++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# Zstd perf.data compression/decompression + +# SPDX-License-Identifier: GPL-2.0 + +trace_file=$(mktemp /tmp/perf.data.XXX) +perf_tool=perf + +skip_if_no_z_record() { + $perf_tool record -h 2>&1 | grep -q '\-z, \-\-compression\-level' +} + +collect_z_record() { + echo "Collecting compressed record file:" + $perf_tool record -o $trace_file -g -z -F 5000 -- \ + dd count=500 if=/dev/random of=/dev/null +} + +check_compressed_stats() { + echo "Checking compressed events stats:" + $perf_tool report -i $trace_file --header --stats | \ + grep -E "(# compressed : Zstd,)|(COMPRESSED events:)" +} + +check_compressed_output() { + $perf_tool inject -i $trace_file -o $trace_file.decomp && + $perf_tool report -i $trace_file --stdio | head -n -3 > $trace_file.comp.output && + $perf_tool report -i $trace_file.decomp --stdio | head -n -3 > $trace_file.decomp.output && + diff $trace_file.comp.output $trace_file.decomp.output +} + +skip_if_no_z_record || exit 2 +collect_z_record && check_compressed_stats && check_compressed_output +err=$? +rm -f $trace_file* +exit $err diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 147efeb6b195..45d269b0157e 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -7,6 +7,7 @@ # that already handles "probe:vfs_getname" if present, and used in the # "open" syscall "filename" argument beautifier. +# SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 . $(dirname $0)/lib/probe.sh diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 9b5be51e5e7b..6cdab5f4812a 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -4,6 +4,7 @@ #include <errno.h> #include <time.h> #include <stdlib.h> +#include <linux/zalloc.h> #include "parse-events.h" #include "evlist.h" @@ -237,7 +238,7 @@ static void free_event_nodes(struct list_head *events) while (!list_empty(events)) { node = list_entry(events->next, struct event_node, list); - list_del(&node->list); + list_del_init(&node->list); free(node); } } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 399f18ca71a3..72912eb473cb 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -107,6 +107,8 @@ const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); int test__unit_number__scnprint(struct test *test, int subtest); int test__mem2node(struct test *t, int subtest); +int test__map_groups__merge_in(struct test *t, int subtest); +int test__time_utils(struct test *t, int subtest); bool test__bp_signal_is_supported(void); bool test__wp_is_supported(void); diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 4de1939b58ba..ccc17aced49e 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -6,6 +6,7 @@ #include "tests.h" #include "thread_map.h" #include "debug.h" +#include <linux/zalloc.h> #define NAME (const char *) "perf" #define NAMEUL (unsigned long) NAME @@ -133,7 +134,7 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb thread_map__remove(threads, 0)); for (i = 0; i < threads->nr; i++) - free(threads->map[i].comm); + zfree(&threads->map[i].comm); free(threads); return 0; diff --git a/tools/perf/tests/time-utils-test.c b/tools/perf/tests/time-utils-test.c new file mode 100644 index 000000000000..4f53006233a1 --- /dev/null +++ b/tools/perf/tests/time-utils-test.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <linux/time64.h> +#include <inttypes.h> +#include <string.h> +#include "time-utils.h" +#include "evlist.h" +#include "session.h" +#include "debug.h" +#include "tests.h" + +static bool test__parse_nsec_time(const char *str, u64 expected) +{ + u64 ptime; + int err; + + pr_debug("\nparse_nsec_time(\"%s\")\n", str); + + err = parse_nsec_time(str, &ptime); + if (err) { + pr_debug("error %d\n", err); + return false; + } + + if (ptime != expected) { + pr_debug("Failed. ptime %" PRIu64 " expected %" PRIu64 "\n", + ptime, expected); + return false; + } + + pr_debug("%" PRIu64 "\n", ptime); + + return true; +} + +static bool test__perf_time__parse_str(const char *ostr, u64 start, u64 end) +{ + struct perf_time_interval ptime; + int err; + + pr_debug("\nperf_time__parse_str(\"%s\")\n", ostr); + + err = perf_time__parse_str(&ptime, ostr); + if (err) { + pr_debug("Error %d\n", err); + return false; + } + + if (ptime.start != start || ptime.end != end) { + pr_debug("Failed. Expected %" PRIu64 " to %" PRIu64 "\n", + start, end); + return false; + } + + return true; +} + +#define TEST_MAX 64 + +struct test_data { + const char *str; + u64 first; + u64 last; + struct perf_time_interval ptime[TEST_MAX]; + int num; + u64 skip[TEST_MAX]; + u64 noskip[TEST_MAX]; +}; + +static bool test__perf_time__parse_for_ranges(struct test_data *d) +{ + struct perf_evlist evlist = { + .first_sample_time = d->first, + .last_sample_time = d->last, + }; + struct perf_session session = { .evlist = &evlist }; + struct perf_time_interval *ptime = NULL; + int range_size, range_num; + bool pass = false; + int i, err; + + pr_debug("\nperf_time__parse_for_ranges(\"%s\")\n", d->str); + + if (strchr(d->str, '%')) + pr_debug("first_sample_time %" PRIu64 " last_sample_time %" PRIu64 "\n", + d->first, d->last); + + err = perf_time__parse_for_ranges(d->str, &session, &ptime, &range_size, + &range_num); + if (err) { + pr_debug("error %d\n", err); + goto out; + } + + if (range_size < d->num || range_num != d->num) { + pr_debug("bad size: range_size %d range_num %d expected num %d\n", + range_size, range_num, d->num); + goto out; + } + + for (i = 0; i < d->num; i++) { + if (ptime[i].start != d->ptime[i].start || + ptime[i].end != d->ptime[i].end) { + pr_debug("bad range %d expected %" PRIu64 " to %" PRIu64 "\n", + i, d->ptime[i].start, d->ptime[i].end); + goto out; + } + } + + if (perf_time__ranges_skip_sample(ptime, d->num, 0)) { + pr_debug("failed to keep 0\n"); + goto out; + } + + for (i = 0; i < TEST_MAX; i++) { + if (d->skip[i] && + !perf_time__ranges_skip_sample(ptime, d->num, d->skip[i])) { + pr_debug("failed to skip %" PRIu64 "\n", d->skip[i]); + goto out; + } + if (d->noskip[i] && + perf_time__ranges_skip_sample(ptime, d->num, d->noskip[i])) { + pr_debug("failed to keep %" PRIu64 "\n", d->noskip[i]); + goto out; + } + } + + pass = true; +out: + free(ptime); + return pass; +} + +int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused) +{ + bool pass = true; + + pass &= test__parse_nsec_time("0", 0); + pass &= test__parse_nsec_time("1", 1000000000ULL); + pass &= test__parse_nsec_time("0.000000001", 1); + pass &= test__parse_nsec_time("1.000000001", 1000000001ULL); + pass &= test__parse_nsec_time("123456.123456", 123456123456000ULL); + pass &= test__parse_nsec_time("1234567.123456789", 1234567123456789ULL); + pass &= test__parse_nsec_time("18446744073.709551615", + 0xFFFFFFFFFFFFFFFFULL); + + pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456789", + 1234567123456789ULL, 1234567123456789ULL); + pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456790", + 1234567123456789ULL, 1234567123456790ULL); + pass &= test__perf_time__parse_str("1234567.123456789,", + 1234567123456789ULL, 0); + pass &= test__perf_time__parse_str(",1234567.123456789", + 0, 1234567123456789ULL); + pass &= test__perf_time__parse_str("0,1234567.123456789", + 0, 1234567123456789ULL); + + { + u64 b = 1234567123456789ULL; + struct test_data d = { + .str = "1234567.123456789,1234567.123456790", + .ptime = { {b, b + 1}, }, + .num = 1, + .skip = { b - 1, b + 2, }, + .noskip = { b, b + 1, }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + { + u64 b = 1234567123456789ULL; + u64 c = 7654321987654321ULL; + u64 e = 8000000000000000ULL; + struct test_data d = { + .str = "1234567.123456789,1234567.123456790 " + "7654321.987654321,7654321.987654444 " + "8000000,8000000.000000005", + .ptime = { {b, b + 1}, {c, c + 123}, {e, e + 5}, }, + .num = 3, + .skip = { b - 1, b + 2, c - 1, c + 124, e - 1, e + 6 }, + .noskip = { b, b + 1, c, c + 123, e, e + 5 }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + { + u64 b = 7654321ULL * NSEC_PER_SEC; + struct test_data d = { + .str = "10%/1", + .first = b, + .last = b + 100, + .ptime = { {b, b + 9}, }, + .num = 1, + .skip = { b - 1, b + 10, }, + .noskip = { b, b + 9, }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + { + u64 b = 7654321ULL * NSEC_PER_SEC; + struct test_data d = { + .str = "10%/2", + .first = b, + .last = b + 100, + .ptime = { {b + 10, b + 19}, }, + .num = 1, + .skip = { b + 9, b + 20, }, + .noskip = { b + 10, b + 19, }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + { + u64 b = 11223344ULL * NSEC_PER_SEC; + struct test_data d = { + .str = "10%/1,10%/2", + .first = b, + .last = b + 100, + .ptime = { {b, b + 9}, {b + 10, b + 19}, }, + .num = 2, + .skip = { b - 1, b + 20, }, + .noskip = { b, b + 8, b + 9, b + 10, b + 11, b + 12, b + 19, }, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + { + u64 b = 11223344ULL * NSEC_PER_SEC; + struct test_data d = { + .str = "10%/1,10%/3,10%/10", + .first = b, + .last = b + 100, + .ptime = { {b, b + 9}, {b + 20, b + 29}, { b + 90, b + 100}, }, + .num = 3, + .skip = { b - 1, b + 10, b + 19, b + 30, b + 89, b + 101 }, + .noskip = { b, b + 9, b + 20, b + 29, b + 90, b + 100}, + }; + + pass &= test__perf_time__parse_for_ranges(&d); + } + + pr_debug("\n"); + + return pass ? 0 : TEST_FAIL; +} diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 7691980b7df1..5e8834fc7dec 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -3,6 +3,7 @@ #include <linux/rbtree.h> #include <inttypes.h> #include <string.h> +#include <stdlib.h> #include "map.h" #include "symbol.h" #include "util.h" @@ -161,9 +162,16 @@ next_pair: continue; } - } else + } else if (mem_start == kallsyms.vmlinux_map->end) { + /* + * Ignore aliases to _etext, i.e. to the end of the kernel text area, + * such as __indirect_thunk_end. + */ + continue; + } else { pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n", mem_start, sym->name); + } err = -1; } diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 85f328ddf897..afa75a76f6b8 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -1,11 +1,14 @@ perf-y += clone.o perf-y += fcntl.o perf-y += flock.o +perf-y += fsmount.o +perf-y += fspick.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) perf-y += ioctl.o endif perf-y += kcmp.o perf-y += mount_flags.o +perf-y += move_mount.o perf-y += pkey_alloc.o perf-y += arch_prctl.o perf-y += prctl.o @@ -13,3 +16,4 @@ perf-y += renameat.o perf-y += sockaddr.o perf-y += socket.o perf-y += statx.o +perf-y += sync_file_range.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 139d485a6f16..7e06605f7c76 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -108,6 +108,9 @@ struct syscall_arg { unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx); +size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_STRARRAY_FLAGS syscall_arg__scnprintf_strarray_flags + size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays @@ -141,6 +144,12 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FLOCK syscall_arg__scnprintf_flock +size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_FSMOUNT_ATTR_FLAGS syscall_arg__scnprintf_fsmount_attr_flags + +size_t syscall_arg__scnprintf_fspick_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_FSPICK_FLAGS syscall_arg__scnprintf_fspick_flags + size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd @@ -156,6 +165,9 @@ unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigne size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags +size_t syscall_arg__scnprintf_move_mount_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_MOVE_MOUNT_FLAGS syscall_arg__scnprintf_move_mount_flags + size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights @@ -189,6 +201,9 @@ size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask +size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_SYNC_FILE_RANGE_FLAGS syscall_arg__scnprintf_sync_file_range_flags + size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix); void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c index 6eb9a6636171..1a8d3be2030e 100644 --- a/tools/perf/trace/beauty/clone.c +++ b/tools/perf/trace/beauty/clone.c @@ -25,6 +25,7 @@ static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size, P_FLAG(FS); P_FLAG(FILES); P_FLAG(SIGHAND); + P_FLAG(PIDFD); P_FLAG(PTRACE); P_FLAG(VFORK); P_FLAG(PARENT); diff --git a/tools/perf/trace/beauty/fsconfig.sh b/tools/perf/trace/beauty/fsconfig.sh new file mode 100755 index 000000000000..83fb24df05c9 --- /dev/null +++ b/tools/perf/trace/beauty/fsconfig.sh @@ -0,0 +1,17 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 1 ] ; then + linux_header_dir=tools/include/uapi/linux +else + linux_header_dir=$1 +fi + +linux_mount=${linux_header_dir}/mount.h + +printf "static const char *fsconfig_cmds[] = {\n" +regex='^[[:space:]]*+FSCONFIG_([[:alnum:]_]+)[[:space:]]*=[[:space:]]*([[:digit:]]+)[[:space:]]*,[[:space:]]*.*' +egrep $regex ${linux_mount} | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/beauty/fsmount.c b/tools/perf/trace/beauty/fsmount.c new file mode 100644 index 000000000000..30c8c082a3c3 --- /dev/null +++ b/tools/perf/trace/beauty/fsmount.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/fsmount.c + * + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ + +#include "trace/beauty/beauty.h" +#include <linux/log2.h> +#include <uapi/linux/mount.h> + +static size_t fsmount__scnprintf_attr_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) +{ +#include "trace/beauty/generated/fsmount_arrays.c" + static DEFINE_STRARRAY(fsmount_attr_flags, "MOUNT_ATTR_"); + size_t printed = 0; + + if ((flags & ~MOUNT_ATTR__ATIME) != 0) + printed += strarray__scnprintf_flags(&strarray__fsmount_attr_flags, bf, size, show_prefix, flags); + + if ((flags & MOUNT_ATTR__ATIME) == MOUNT_ATTR_RELATIME) { + printed += scnprintf(bf + printed, size - printed, "%s%s%s", + printed ? "|" : "", show_prefix ? "MOUNT_ATTR_" : "", "RELATIME"); + } + + return printed; +} + +size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return fsmount__scnprintf_attr_flags(flags, bf, size, arg->show_string_prefix); +} diff --git a/tools/perf/trace/beauty/fsmount.sh b/tools/perf/trace/beauty/fsmount.sh new file mode 100755 index 000000000000..615cc0fcf4f9 --- /dev/null +++ b/tools/perf/trace/beauty/fsmount.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 1 ] ; then + linux_header_dir=tools/include/uapi/linux +else + linux_header_dir=$1 +fi + +linux_mount=${linux_header_dir}/mount.h + +# Remove MOUNT_ATTR_RELATIME as it is zeros, handle it a special way in the beautifier +# Only handle MOUNT_ATTR_ followed by a capital letter/num as __ is special case +# for things like MOUNT_ATTR__ATIME that is a mask for the possible ATIME handling +# bits. Special case it as well in the beautifier + +printf "static const char *fsmount_attr_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOUNT_ATTR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +egrep $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/beauty/fspick.c b/tools/perf/trace/beauty/fspick.c new file mode 100644 index 000000000000..c402479c96f0 --- /dev/null +++ b/tools/perf/trace/beauty/fspick.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/fspick.c + * + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ + +#include "trace/beauty/beauty.h" +#include <linux/log2.h> + +static size_t fspick__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) +{ +#include "trace/beauty/generated/fspick_arrays.c" + static DEFINE_STRARRAY(fspick_flags, "FSPICK_"); + + return strarray__scnprintf_flags(&strarray__fspick_flags, bf, size, show_prefix, flags); +} + +size_t syscall_arg__scnprintf_fspick_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return fspick__scnprintf_flags(flags, bf, size, arg->show_string_prefix); +} diff --git a/tools/perf/trace/beauty/fspick.sh b/tools/perf/trace/beauty/fspick.sh new file mode 100755 index 000000000000..b220e07ef452 --- /dev/null +++ b/tools/perf/trace/beauty/fspick.sh @@ -0,0 +1,17 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 1 ] ; then + linux_header_dir=tools/include/uapi/linux +else + linux_header_dir=$1 +fi + +linux_mount=${linux_header_dir}/mount.h + +printf "static const char *fspick_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+FSPICK_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +egrep $regex ${linux_mount} | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/beauty/move_mount.c b/tools/perf/trace/beauty/move_mount.c new file mode 100644 index 000000000000..78ed80395406 --- /dev/null +++ b/tools/perf/trace/beauty/move_mount.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/move_mount.c + * + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ + +#include "trace/beauty/beauty.h" +#include <linux/log2.h> + +static size_t move_mount__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) +{ +#include "trace/beauty/generated/move_mount_flags_array.c" + static DEFINE_STRARRAY(move_mount_flags, "MOVE_MOUNT_"); + + return strarray__scnprintf_flags(&strarray__move_mount_flags, bf, size, show_prefix, flags); +} + +size_t syscall_arg__scnprintf_move_mount_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return move_mount__scnprintf_flags(flags, bf, size, arg->show_string_prefix); +} diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh new file mode 100755 index 000000000000..55e59241daa4 --- /dev/null +++ b/tools/perf/trace/beauty/move_mount_flags.sh @@ -0,0 +1,17 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 1 ] ; then + linux_header_dir=tools/include/uapi/linux +else + linux_header_dir=$1 +fi + +linux_mount=${linux_header_dir}/mount.h + +printf "static const char *move_mount_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +egrep $regex ${linux_mount} | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/beauty/renameat.c b/tools/perf/trace/beauty/renameat.c index 6dab340cc506..852d2e271833 100644 --- a/tools/perf/trace/beauty/renameat.c +++ b/tools/perf/trace/beauty/renameat.c @@ -2,7 +2,6 @@ // Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> #include "trace/beauty/beauty.h" -#include <uapi/linux/fs.h> static size_t renameat2__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) { diff --git a/tools/perf/trace/beauty/sync_file_range.c b/tools/perf/trace/beauty/sync_file_range.c new file mode 100644 index 000000000000..1c425f04047d --- /dev/null +++ b/tools/perf/trace/beauty/sync_file_range.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/sync_file_range.c + * + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ + +#include "trace/beauty/beauty.h" +#include <linux/log2.h> +#include <uapi/linux/fs.h> + +static size_t sync_file_range__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) +{ +#include "trace/beauty/generated/sync_file_range_arrays.c" + static DEFINE_STRARRAY(sync_file_range_flags, "SYNC_FILE_RANGE_"); + size_t printed = 0; + + if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) == SYNC_FILE_RANGE_WRITE_AND_WAIT) { + printed += scnprintf(bf + printed, size - printed, "%s%s", show_prefix ? "SYNC_FILE_RANGE_" : "", "WRITE_AND_WAIT"); + flags &= ~SYNC_FILE_RANGE_WRITE_AND_WAIT; + } + + return printed + strarray__scnprintf_flags(&strarray__sync_file_range_flags, bf + printed, size - printed, show_prefix, flags); +} + +size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return sync_file_range__scnprintf_flags(flags, bf, size, arg->show_string_prefix); +} diff --git a/tools/perf/trace/beauty/sync_file_range.sh b/tools/perf/trace/beauty/sync_file_range.sh new file mode 100755 index 000000000000..7a9282d04e44 --- /dev/null +++ b/tools/perf/trace/beauty/sync_file_range.sh @@ -0,0 +1,17 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 1 ] ; then + linux_header_dir=tools/include/uapi/linux +else + linux_header_dir=$1 +fi + +linux_fs=${linux_header_dir}/fs.h + +printf "static const char *sync_file_range_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+SYNC_FILE_RANGE_([[:alnum:]_]+)[[:space:]]+([[:xdigit:]]+)[[:space:]]*.*' +egrep $regex ${linux_fs} | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/strace/groups/string b/tools/perf/trace/strace/groups/string new file mode 100644 index 000000000000..c87129a3e3c4 --- /dev/null +++ b/tools/perf/trace/strace/groups/string @@ -0,0 +1,65 @@ +access +acct +add_key +chdir +chmod +chown +chroot +creat +delete_module +execve +execveat +faccessat +fchmodat +fchownat +fgetxattr +finit_module +fremovexattr +fsetxattr +futimesat +getxattr +inotify_add_watch +lchown +lgetxattr +link +linkat +listxattr +llistxattr +lremovexattr +lsetxattr +lstat +memfd_create +mkdir +mkdirat +mknod +mknodat +mq_open +mq_timedsend +mq_unlink +name_to_handle_at +newfstatat +open +openat +pivot_root +pwrite64 +quotactl +readlink +readlinkat +removexattr +rename +renameat +renameat2 +request_key +rmdir +setxattr +stat +statfs +statx +swapoff +swapon +symlink +symlinkat +truncate +unlink +unlinkat +utimensat diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 4ad37d8c7d6a..f80c51d53565 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../util.h" #include "../string2.h" #include "../config.h" #include "../../perf.h" @@ -16,7 +15,8 @@ #include "helpline.h" #include "keysyms.h" #include "../color.h" -#include "sane_ctype.h" +#include <linux/ctype.h> +#include <linux/zalloc.h> static int ui_browser__percent_color(struct ui_browser *browser, double percent, bool current) @@ -594,7 +594,7 @@ static int ui_browser__color_config(const char *var, const char *value, break; *bg = '\0'; - bg = ltrim(++bg); + bg = skip_spaces(bg + 1); ui_browser__colorsets[i].bg = bg; ui_browser__colorsets[i].fg = fg; return 0; diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index aa5932e1d62e..dc1444136658 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <stdarg.h> +#include <sys/types.h> #define HE_COLORSET_TOP 50 #define HE_COLORSET_MEDIUM 51 diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 98d934a36d86..e67880bf1efe 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../util/util.h" #include "../browser.h" #include "../helpline.h" #include "../ui.h" @@ -15,6 +14,7 @@ #include <pthread.h> #include <linux/kernel.h> #include <linux/string.h> +#include <linux/zalloc.h> #include <sys/ttydefaults.h> #include <asm/bug.h> @@ -97,11 +97,12 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct annotation *notes = browser__annotation(browser); struct annotation_line *al = list_entry(entry, struct annotation_line, node); + const bool is_current_entry = ui_browser__is_current_entry(browser, row); struct annotation_write_ops ops = { .first_line = row == 0, - .current_entry = ui_browser__is_current_entry(browser, row), + .current_entry = is_current_entry, .change_color = (!notes->options->hide_src_code && - (!ops.current_entry || + (!is_current_entry || (browser->use_navkeypressed && !browser->navkeypressed))), .width = browser->width, diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 3421ecbdd3f0..a94eb0755e8b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -6,8 +6,10 @@ #include <stdlib.h> #include <string.h> #include <linux/rbtree.h> +#include <linux/string.h> #include <sys/ttydefaults.h> #include <linux/time64.h> +#include <linux/zalloc.h> #include "../../util/callchain.h" #include "../../util/evsel.h" @@ -17,7 +19,6 @@ #include "../../util/symbol.h" #include "../../util/pstack.h" #include "../../util/sort.h" -#include "../../util/util.h" #include "../../util/top.h" #include "../../util/thread.h" #include "../../arch/common.h" @@ -33,7 +34,7 @@ #include "units.h" #include "time-utils.h" -#include "sane_ctype.h" +#include <linux/ctype.h> extern void hist_browser__init_hpp(void); @@ -638,7 +639,11 @@ int hist_browser__run(struct hist_browser *browser, const char *help, switch (key) { case K_TIMER: { u64 nr_entries; - hbt->timer(hbt->arg); + + WARN_ON_ONCE(!hbt); + + if (hbt) + hbt->timer(hbt->arg); if (hist_browser__has_filter(browser) || symbol_conf.report_hierarchy) @@ -1470,7 +1475,7 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, int i = 0; width -= fmt->entry(fmt, &hpp, entry); - ui_browser__printf(&browser->b, "%s", ltrim(s)); + ui_browser__printf(&browser->b, "%s", skip_spaces(s)); while (isspace(s[i++])) width++; @@ -1686,7 +1691,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL); dummy_hpp.buf[ret] = '\0'; - start = trim(dummy_hpp.buf); + start = strim(dummy_hpp.buf); ret = strlen(start); if (start != dummy_hpp.buf) @@ -2070,7 +2075,8 @@ static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, advance_hpp(&hpp, ret); } - printed += fprintf(fp, "%s\n", rtrim(s)); + strim(s); + printed += fprintf(fp, "%s\n", s); if (he->leaf && folded_sign == '-') { printed += hist_browser__fprintf_callchain(browser, he, fp, @@ -2819,7 +2825,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, { struct hists *hists = evsel__hists(evsel); struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); - struct branch_info *bi; + struct branch_info *bi = NULL; #define MAX_OPTIONS 16 char *options[MAX_OPTIONS]; struct popup_action actions[MAX_OPTIONS]; @@ -3085,7 +3091,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, goto skip_annotation; if (sort__mode == SORT_MODE__BRANCH) { - bi = browser->he_selection->branch_info; + + if (browser->he_selection) + bi = browser->he_selection->branch_info; if (bi == NULL) goto skip_annotation; @@ -3269,7 +3277,8 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu, switch (key) { case K_TIMER: - hbt->timer(hbt->arg); + if (hbt) + hbt->timer(hbt->arg); if (!menu->lost_events_warned && menu->lost_events && diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index c70d9337405b..4c545b92e20d 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -2,6 +2,7 @@ #include <elf.h> #include <inttypes.h> #include <sys/ttydefaults.h> +#include <stdlib.h> #include <string.h> #include <linux/bitops.h> #include "../../util/util.h" @@ -13,7 +14,7 @@ #include "../keysyms.h" #include "map.h" -#include "sane_ctype.h" +#include <linux/ctype.h> struct map_browser { struct ui_browser b; diff --git a/tools/perf/ui/browsers/res_sample.c b/tools/perf/ui/browsers/res_sample.c index c0dd73176d42..8aa3547bb9ff 100644 --- a/tools/perf/ui/browsers/res_sample.c +++ b/tools/perf/ui/browsers/res_sample.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* Display a menu with individual samples to browse with perf script */ -#include "util.h" #include "hist.h" #include "evsel.h" #include "hists.h" @@ -8,6 +7,7 @@ #include "config.h" #include "time-utils.h" #include <linux/time64.h> +#include <linux/zalloc.h> static u64 context_len = 10 * NSEC_PER_MSEC; @@ -46,14 +46,14 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, if (asprintf(&names[i], "%s: CPU %d tid %d", tbuf, res_samples[i].cpu, res_samples[i].tid) < 0) { while (--i >= 0) - free(names[i]); + zfree(&names[i]); free(names); return -1; } } choice = ui__popup_menu(num_res, names); for (i = 0; i < num_res; i++) - free(names[i]); + zfree(&names[i]); free(names); if (choice < 0 || choice >= num_res) diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 27cf3ab88d13..4d565cc14076 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include "../../util/sort.h" -#include "../../util/util.h" #include "../../util/hist.h" #include "../../util/debug.h" #include "../../util/symbol.h" #include "../browser.h" #include "../libslang.h" #include "config.h" +#include <linux/zalloc.h> #define SCRIPT_NAMELEN 128 #define SCRIPT_MAX_NO 64 @@ -142,7 +142,7 @@ static int list_scripts(char *script_name, bool *custom, out: free(buf); for (i = 0; i < max_std; i++) - free(paths[i]); + zfree(&paths[i]); return ret; } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index df49c9ba1785..3af87c18a914 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -152,7 +152,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym, gtk_container_add(GTK_CONTAINER(window), view); list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { - list_del(&pos->al.node); + list_del_init(&pos->al.node); disasm_line__free(pos); } diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 0c08890f006a..3955ed1d1bd9 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -9,6 +9,7 @@ #include "../string2.h" #include "gtk.h" #include <signal.h> +#include <linux/string.h> #define MAX_COLUMNS 32 @@ -459,7 +460,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, advance_hpp(hpp, ret + 2); } - gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1); + gtk_tree_store_set(store, &iter, col_idx, strim(bf), -1); if (!he->leaf) { hpp->buf = bf; @@ -555,7 +556,7 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, first_col = false; fmt->header(fmt, &hpp, hists, 0, NULL); - strcat(buf, ltrim(rtrim(hpp.buf))); + strcat(buf, strim(hpp.buf)); } } diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index 7250d8101c8f..c28bdb7517ac 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -1,11 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include "../util.h" -#include "../../util/util.h" #include "../../util/debug.h" #include "gtk.h" #include <string.h> - +#include <linux/zalloc.h> struct perf_gtk_context *pgctx; diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h index c0686cda39a5..991e692b9b46 100644 --- a/tools/perf/ui/libslang.h +++ b/tools/perf/ui/libslang.h @@ -10,7 +10,12 @@ #ifndef HAVE_LONG_LONG #define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG #endif + +#ifdef HAVE_SLANG_INCLUDE_SUBDIR +#include <slang/slang.h> +#else #include <slang.h> +#endif #if SLANG_VERSION < 20104 #define slsmg_printf(msg, args...) \ diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index bbfbc91a0fa4..8cd3b64c6893 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> -#include "../cache.h" +#include "../util/cache.h" #include "progress.h" static void null_progress__update(struct ui_progress *p __maybe_unused) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index a60f2993d390..ee7ea6deed21 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -3,7 +3,6 @@ #include <linux/string.h> #include "../../util/callchain.h" -#include "../../util/util.h" #include "../../util/hist.h" #include "../../util/map.h" #include "../../util/map_groups.h" @@ -13,7 +12,8 @@ #include "../../util/srcline.h" #include "../../util/string2.h" #include "../../util/thread.h" -#include "../../util/sane_ctype.h" +#include <linux/ctype.h> +#include <linux/zalloc.h> static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) { @@ -516,7 +516,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, * dynamic entries are right-aligned but we want left-aligned * in the hierarchy mode */ - printed += fprintf(fp, "%s%s", sep ?: " ", ltrim(buf)); + printed += fprintf(fp, "%s%s", sep ?: " ", skip_spaces(buf)); } printed += putc('\n', fp); @@ -531,6 +531,30 @@ out: return printed; } +static int hist_entry__block_fprintf(struct hist_entry *he, + char *bf, size_t size, + FILE *fp) +{ + struct block_hist *bh = container_of(he, struct block_hist, he); + int ret = 0; + + for (unsigned int i = 0; i < bh->block_hists.nr_entries; i++) { + struct perf_hpp hpp = { + .buf = bf, + .size = size, + .skip = false, + }; + + bh->block_idx = i; + hist_entry__snprintf(he, &hpp); + + if (!hpp.skip) + ret += fprintf(fp, "%s\n", bf); + } + + return ret; +} + static int hist_entry__fprintf(struct hist_entry *he, size_t size, char *bf, size_t bfsz, FILE *fp, bool ignore_callchains) @@ -550,6 +574,9 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, if (symbol_conf.report_hierarchy) return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp); + if (symbol_conf.report_block) + return hist_entry__block_fprintf(he, bf, size, fp); + hist_entry__snprintf(he, &hpp); ret = fprintf(fp, "%s\n", bf); @@ -566,10 +593,14 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, static int print_hierarchy_indent(const char *sep, int indent, const char *line, FILE *fp) { + int width; + if (sep != NULL || indent < 2) return 0; - return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line); + width = (indent - 2) * HIERARCHY_INDENT; + + return fprintf(fp, "%-*.*s", width, width, line); } static int hists__fprintf_hierarchy_headers(struct hists *hists, @@ -587,7 +618,7 @@ static int hists__fprintf_hierarchy_headers(struct hists *hists, indent = hists->nr_hpp_node; /* preserve max indent depth for column headers */ - print_hierarchy_indent(sep, indent, spaces, fp); + print_hierarchy_indent(sep, indent, " ", fp); /* the first hpp_list_node is for overhead columns */ fmt_node = list_first_entry(&hists->hpp_formats, @@ -616,7 +647,7 @@ static int hists__fprintf_hierarchy_headers(struct hists *hists, fmt->header(fmt, hpp, hists, 0, NULL); - header_width += fprintf(fp, "%s", trim(hpp->buf)); + header_width += fprintf(fp, "%s", strim(hpp->buf)); } } @@ -816,7 +847,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { int depth = hists->nr_hpp_node + h->depth + 1; - print_hierarchy_indent(sep, depth, spaces, fp); + print_hierarchy_indent(sep, depth, " ", fp); fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); if (max_rows && ++nr_rows >= max_rows) diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index d4ac41679721..3ad0d3363ac6 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -2,6 +2,7 @@ #include <errno.h> #include <signal.h> #include <stdbool.h> +#include <stdlib.h> #include <linux/kernel.h> #ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c index b9794d6185af..fe5e571816fc 100644 --- a/tools/perf/ui/tui/util.c +++ b/tools/perf/ui/tui/util.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../util/util.h" #include <signal.h> #include <stdbool.h> #include <string.h> +#include <stdlib.h> #include <sys/ttydefaults.h> #include "../../util/cache.h" diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8dd3102301ea..d7e3b008a613 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -20,10 +20,12 @@ perf-y += parse-events.o perf-y += perf_regs.o perf-y += path.o perf-y += print_binary.o +perf-y += argv_split.o perf-y += rbtree.o perf-y += libstring.o perf-y += bitmap.o perf-y += hweight.o +perf-y += zalloc.o perf-y += smt.o perf-y += strbuf.o perf-y += string.o @@ -145,6 +147,8 @@ perf-y += scripting-engines/ perf-$(CONFIG_ZLIB) += zlib.o perf-$(CONFIG_LZMA) += lzma.o +perf-$(CONFIG_ZSTD) += zstd.o + perf-y += demangle-java.o perf-y += demangle-rust.o @@ -207,10 +211,18 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) +$(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + $(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) +$(OUTPUT)util/ctype.o: ../lib/ctype.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + $(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) @@ -230,3 +242,7 @@ $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/zalloc.o: ../lib/zalloc.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 3802cee5e188..59241ff342be 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -19,7 +19,7 @@ TAG= if test -d ../../.git -o -f ../../.git then TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null ) - CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" + CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" elif test -f ../../PERF-VERSION-FILE then TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 09762985c713..ac9ad2330f93 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> * * Parts came from builtin-annotate.c, see those files for further * copyright notes. - * - * Released under the GPL v2. (and only v2, not any later version) */ #include <errno.h> @@ -36,6 +35,7 @@ #include <pthread.h> #include <linux/bitops.h> #include <linux/kernel.h> +#include <linux/string.h> #include <bpf/libbpf.h> /* FIXME: For the HE_COLORSET */ @@ -50,7 +50,7 @@ #define DARROW_CHAR ((unsigned char)'.') #define UARROW_CHAR ((unsigned char)'-') -#include "sane_ctype.h" +#include <linux/ctype.h> struct annotation_options annotation__default_options = { .use_offset = true, @@ -145,6 +145,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/arc/annotate/instructions.c" #include "arch/arm/annotate/instructions.c" #include "arch/arm64/annotate/instructions.c" +#include "arch/csky/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" @@ -164,6 +165,10 @@ static struct arch architectures[] = { .init = arm64__annotate_init, }, { + .name = "csky", + .init = csky__annotate_init, + }, + { .name = "x86", .init = x86__annotate_init, .instructions = x86__instructions, @@ -558,7 +563,7 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (comment == NULL) return 0; - comment = ltrim(comment); + comment = skip_spaces(comment); comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); @@ -603,7 +608,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops if (comment == NULL) return 0; - comment = ltrim(comment); + comment = skip_spaces(comment); comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; @@ -932,9 +937,8 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, if (sym == NULL) return 0; src = symbol__hists(sym, evsel->evlist->nr_entries); - if (src == NULL) - return -ENOMEM; - return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample); + return (src) ? __symbol__inc_addr_samples(sym, map, src, evsel->idx, + addr, sample) : 0; } static int symbol__account_cycles(u64 addr, u64 start, @@ -1021,7 +1025,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 float ipc = n_insn / ((double)ch->cycles / (double)ch->num); /* Hide data when there are too many overlaps. */ - if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) + if (ch->reset >= 0x7fff) return; for (offset = start; offset <= end; offset++) { @@ -1100,7 +1104,7 @@ static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, str static int disasm_line__parse(char *line, const char **namep, char **rawp) { - char tmp, *name = ltrim(line); + char tmp, *name = skip_spaces(line); if (name[0] == '\0') return -1; @@ -1115,16 +1119,14 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp) *namep = strdup(name); if (*namep == NULL) - goto out_free_name; + goto out; (*rawp)[0] = tmp; - *rawp = ltrim(*rawp); + *rawp = skip_spaces(*rawp); return 0; -out_free_name: - free((void *)namep); - *namep = NULL; +out: return -1; } @@ -1233,8 +1235,7 @@ void disasm_line__free(struct disasm_line *dl) dl->ins.ops->free(&dl->ops); else ins__delete(&dl->ops); - free((void *)dl->ins.name); - dl->ins.name = NULL; + zfree(&dl->ins.name); annotation_line__delete(&dl->al); } @@ -1497,7 +1498,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, return -1; line_ip = -1; - parsed_line = rtrim(line); + parsed_line = strim(line); /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { @@ -1505,7 +1506,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, return 0; } - tmp = ltrim(parsed_line); + tmp = skip_spaces(parsed_line); if (*tmp) { /* * Parse hexa addresses followed by ':' @@ -1585,7 +1586,7 @@ static void delete_last_nop(struct symbol *sym) return; } - list_del(&dl->al.node); + list_del_init(&dl->al.node); disasm_line__free(dl); } } @@ -2462,7 +2463,7 @@ void annotated_source__purge(struct annotated_source *as) struct annotation_line *al, *n; list_for_each_entry_safe(al, n, &as->source, node) { - list_del(&al->node); + list_del_init(&al->node); disasm_line__free(disasm_line(al)); } } diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 6067267cc76c..a314e5b26e9d 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <linux/zalloc.h> #include "cpumap.h" #include "color.h" @@ -19,7 +20,6 @@ #include "evlist.h" #include "machine.h" #include "session.h" -#include "util.h" #include "thread.h" #include "debug.h" #include "auxtrace.h" diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index fb76b6b232d4..ec0af36697c4 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * auxtrace.c: AUX area trace support * Copyright (c) 2013-2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <inttypes.h> @@ -33,9 +24,9 @@ #include <stdlib.h> #include <stdio.h> #include <linux/list.h> +#include <linux/zalloc.h> #include "../perf.h" -#include "util.h" #include "evlist.h" #include "dso.h" #include "map.h" @@ -60,7 +51,7 @@ #include "arm-spe.h" #include "s390-cpumsf.h" -#include "sane_ctype.h" +#include <linux/ctype.h> #include "symbol/kallsyms.h" static bool auxtrace__dont_decode(struct perf_session *session) @@ -417,7 +408,7 @@ void auxtrace_queues__free(struct auxtrace_queues *queues) buffer = list_entry(queues->queue_array[i].head.next, struct auxtrace_buffer, list); - list_del(&buffer->list); + list_del_init(&buffer->list); auxtrace_buffer__free(buffer); } } @@ -621,7 +612,7 @@ void auxtrace_index__free(struct list_head *head) struct auxtrace_index *auxtrace_index, *n; list_for_each_entry_safe(auxtrace_index, n, head, list) { - list_del(&auxtrace_index->list); + list_del_init(&auxtrace_index->list); free(auxtrace_index); } } @@ -1010,7 +1001,8 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } if (!str) { - itrace_synth_opts__set_default(synth_opts, false); + itrace_synth_opts__set_default(synth_opts, + synth_opts->default_no_sample); return 0; } @@ -1421,7 +1413,7 @@ void auxtrace_cache__free(struct auxtrace_cache *c) return; auxtrace_cache__drop(c); - free(c->hashtable); + zfree(&c->hashtable); free(c); } @@ -1467,12 +1459,11 @@ void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key) static void addr_filter__free_str(struct addr_filter *filt) { - free(filt->str); + zfree(&filt->str); filt->action = NULL; filt->sym_from = NULL; filt->sym_to = NULL; filt->filename = NULL; - filt->str = NULL; } static struct addr_filter *addr_filter__new(void) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index c69bcd9a3091..e9b4c5edf78b 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * auxtrace.h: AUX area trace support * Copyright (c) 2013-2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef __PERF_AUXTRACE_H @@ -83,6 +74,8 @@ enum itrace_period_type { * @period_type: 'instructions' events period type * @initial_skip: skip N events at the beginning. * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all + * @ptime_range: time intervals to trace or NULL + * @range_num: number of time intervals to trace */ struct itrace_synth_opts { bool set; @@ -107,6 +100,8 @@ struct itrace_synth_opts { enum itrace_period_type period_type; unsigned long initial_skip; unsigned long *cpu_bitmap; + struct perf_time_interval *ptime_range; + int range_num; }; /** @@ -599,6 +594,21 @@ static inline void auxtrace__free(struct perf_session *session) " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" +static inline +void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts, + struct perf_time_interval *ptime_range, + int range_num) +{ + opts->ptime_range = ptime_range; + opts->range_num = range_num; +} + +static inline +void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts) +{ + opts->ptime_range = NULL; + opts->range_num = 0; +} #else @@ -742,6 +752,21 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, #define ITRACE_HELP "" +static inline +void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts + __maybe_unused, + struct perf_time_interval *ptime_range + __maybe_unused, + int range_num __maybe_unused) +{ +} + +static inline +void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts + __maybe_unused) +{ +} + #endif #endif diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 251d9ea6252f..c61974a50aa5 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -12,6 +12,7 @@ #include <linux/err.h> #include <linux/kernel.h> #include <linux/string.h> +#include <linux/zalloc.h> #include <errno.h> #include "perf.h" #include "debug.h" @@ -828,7 +829,7 @@ static void bpf_map_op__delete(struct bpf_map_op *op) { if (!list_empty(&op->list)) - list_del(&op->list); + list_del_init(&op->list); if (op->key_type == BPF_MAP_KEY_RANGES) parse_events__clear_array(&op->k.array); free(op); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 0c5517a8d0b7..f1abfab7aa8c 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -29,7 +29,8 @@ #include "probe-file.h" #include "strlist.h" -#include "sane_ctype.h" +#include <linux/ctype.h> +#include <linux/zalloc.h> static bool no_buildid_cache; diff --git a/tools/perf/util/call-path.c b/tools/perf/util/call-path.c index 904a17052e38..5c60b8be1cf6 100644 --- a/tools/perf/util/call-path.c +++ b/tools/perf/util/call-path.c @@ -1,22 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * call-path.h: Manipulate a tree data structure containing function call paths * Copyright (c) 2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <linux/rbtree.h> #include <linux/list.h> +#include <linux/zalloc.h> +#include <stdlib.h> -#include "util.h" #include "call-path.h" static void call_path__init(struct call_path *cp, struct call_path *parent, @@ -48,7 +40,7 @@ void call_path_root__free(struct call_path_root *cpr) struct call_path_block *pos, *n; list_for_each_entry_safe(pos, n, &cpr->blocks, node) { - list_del(&pos->node); + list_del_init(&pos->node); free(pos); } free(cpr); diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h index 477f6d03b659..6b3229106f16 100644 --- a/tools/perf/util/call-path.h +++ b/tools/perf/util/call-path.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * call-path.h: Manipulate a tree data structure containing function call paths * Copyright (c) 2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef __PERF_CALL_PATH_H diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index abb608b09269..8d7d8f62fcca 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -16,11 +16,11 @@ #include <stdbool.h> #include <errno.h> #include <math.h> +#include <linux/zalloc.h> #include "asm/bug.h" #include "hist.h" -#include "util.h" #include "sort.h" #include "machine.h" #include "map.h" @@ -636,7 +636,7 @@ add_child(struct callchain_node *parent, struct callchain_list *call, *tmp; list_for_each_entry_safe(call, tmp, &new->val, list) { - list_del(&call->list); + list_del_init(&call->list); map__zput(call->ms.map); free(call); } @@ -1002,7 +1002,7 @@ merge_chain_branch(struct callchain_cursor *cursor, callchain_cursor_append(cursor, list->ip, list->ms.map, list->ms.sym, false, NULL, 0, 0, 0, list->srcline); - list_del(&list->list); + list_del_init(&list->list); map__zput(list->ms.map); free(list); } @@ -1453,13 +1453,13 @@ static void free_callchain_node(struct callchain_node *node) struct rb_node *n; list_for_each_entry_safe(list, tmp, &node->parent_val, list) { - list_del(&list->list); + list_del_init(&list->list); map__zput(list->ms.map); free(list); } list_for_each_entry_safe(list, tmp, &node->val, list) { - list_del(&list->list); + list_del_init(&list->list); map__zput(list->ms.map); free(list); } @@ -1544,7 +1544,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) out: list_for_each_entry_safe(chain, new, &head, list) { - list_del(&chain->list); + list_del_init(&chain->list); map__zput(chain->ms.map); free(chain); } diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index ccd02634a616..484c29830a81 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -#include "util.h" #include "../perf.h" #include <subcmd/parse-options.h> #include "evsel.h" #include "cgroup.h" #include "evlist.h" #include <linux/stringify.h> +#include <linux/zalloc.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -124,7 +124,7 @@ static struct cgroup *cgroup__new(const char *name) return cgroup; out_free_name: - free(cgroup->name); + zfree(&cgroup->name); out_err: free(cgroup); return NULL; diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 1066de92af12..afb8d4fd2644 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include "comm.h" -#include "util.h" #include <errno.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #include <linux/refcount.h> #include <linux/rbtree.h> +#include <linux/zalloc.h> #include "rwsem.h" struct comm_str { diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index 892e92e7e7fc..0cd3369af2a4 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -2,6 +2,11 @@ #ifndef PERF_COMPRESS_H #define PERF_COMPRESS_H +#include <stdbool.h> +#ifdef HAVE_ZSTD_SUPPORT +#include <zstd.h> +#endif + #ifdef HAVE_ZLIB_SUPPORT int gzip_decompress_to_file(const char *input, int output_fd); bool gzip_is_compressed(const char *input); @@ -12,4 +17,52 @@ int lzma_decompress_to_file(const char *input, int output_fd); bool lzma_is_compressed(const char *input); #endif +struct zstd_data { +#ifdef HAVE_ZSTD_SUPPORT + ZSTD_CStream *cstream; + ZSTD_DStream *dstream; +#endif +}; + +#ifdef HAVE_ZSTD_SUPPORT + +int zstd_init(struct zstd_data *data, int level); +int zstd_fini(struct zstd_data *data); + +size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, + void *src, size_t src_size, size_t max_record_size, + size_t process_header(void *record, size_t increment)); + +size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size, + void *dst, size_t dst_size); +#else /* !HAVE_ZSTD_SUPPORT */ + +static inline int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused) +{ + return 0; +} + +static inline int zstd_fini(struct zstd_data *data __maybe_unused) +{ + return 0; +} + +static inline +size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, + void *dst __maybe_unused, size_t dst_size __maybe_unused, + void *src __maybe_unused, size_t src_size __maybe_unused, + size_t max_record_size __maybe_unused, + size_t process_header(void *record, size_t increment) __maybe_unused) +{ + return 0; +} + +static inline size_t zstd_decompress_stream(struct zstd_data *data __maybe_unused, void *src __maybe_unused, + size_t src_size __maybe_unused, void *dst __maybe_unused, + size_t dst_size __maybe_unused) +{ + return 0; +} +#endif + #endif /* PERF_COMPRESS_H */ diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 7e3c1b60120c..042ffbc8c53f 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -11,7 +11,6 @@ */ #include <errno.h> #include <sys/param.h> -#include "util.h" #include "cache.h" #include "callchain.h" #include <subcmd/exec-cmd.h> @@ -23,8 +22,8 @@ #include <sys/stat.h> #include <unistd.h> #include <linux/string.h> - -#include "sane_ctype.h" +#include <linux/zalloc.h> +#include <linux/ctype.h> #define MAXNAME (256) @@ -739,11 +738,15 @@ int perf_config(config_fn_t fn, void *data) if (ret < 0) { pr_err("Error: wrong config key-value pair %s=%s\n", key, value); - break; + /* + * Can't be just a 'break', as perf_config_set__for_each_entry() + * expands to two nested for() loops. + */ + goto out; } } } - +out: return ret; } diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 03032b410c29..88be9c4365e0 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -3,7 +3,7 @@ #include <stdlib.h> #include "evsel.h" #include "counts.h" -#include "util.h" +#include <linux/zalloc.h> struct perf_counts *perf_counts__new(int ncpus, int nthreads) { diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 0b599229bc7e..3acfbe34ebaf 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include "util.h" #include <api/fs/fs.h> #include "../perf.h" #include "cpumap.h" @@ -10,7 +9,8 @@ #include <linux/bitmap.h> #include "asm/bug.h" -#include "sane_ctype.h" +#include <linux/ctype.h> +#include <linux/zalloc.h> static int max_cpu_num; static int max_present_cpu_num; @@ -373,6 +373,46 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, return 0; } +int cpu_map__get_die_id(int cpu) +{ + int value, ret = cpu__get_topology_int(cpu, "die_id", &value); + + return ret ?: value; +} + +int cpu_map__get_die(struct cpu_map *map, int idx, void *data) +{ + int cpu, die_id, s; + + if (idx > map->nr) + return -1; + + cpu = map->map[idx]; + + die_id = cpu_map__get_die_id(cpu); + /* There is no die_id on legacy system. */ + if (die_id == -1) + die_id = 0; + + s = cpu_map__get_socket(map, idx, data); + if (s == -1) + return -1; + + /* + * Encode socket in bit range 15:8 + * die_id is relative to socket, and + * we need a global id. So we combine + * socket + die id + */ + if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n")) + return -1; + + if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) + return -1; + + return (s << 8) | (die_id & 0xff); +} + int cpu_map__get_core_id(int cpu) { int value, ret = cpu__get_topology_int(cpu, "core_id", &value); @@ -381,7 +421,7 @@ int cpu_map__get_core_id(int cpu) int cpu_map__get_core(struct cpu_map *map, int idx, void *data) { - int cpu, s; + int cpu, s_die; if (idx > map->nr) return -1; @@ -390,17 +430,22 @@ int cpu_map__get_core(struct cpu_map *map, int idx, void *data) cpu = cpu_map__get_core_id(cpu); - s = cpu_map__get_socket(map, idx, data); - if (s == -1) + /* s_die is the combination of socket + die id */ + s_die = cpu_map__get_die(map, idx, data); + if (s_die == -1) return -1; /* - * encode socket in upper 16 bits - * core_id is relative to socket, and + * encode socket in bit range 31:24 + * encode die id in bit range 23:16 + * core_id is relative to socket and die, * we need a global id. So we combine - * socket+ core id + * socket + die id + core id */ - return (s << 16) | (cpu & 0xffff); + if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n")) + return -1; + + return (s_die << 16) | (cpu & 0xffff); } int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) @@ -408,6 +453,11 @@ int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); } +int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep) +{ + return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); +} + int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep) { return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index f00ce624b9f7..1265f0e33920 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -25,9 +25,12 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); +int cpu_map__get_die_id(int cpu); +int cpu_map__get_die(struct cpu_map *map, int idx, void *data); int cpu_map__get_core_id(int cpu); int cpu_map__get_core(struct cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); +int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); const struct cpu_map *cpu_map__online(void); /* thread unsafe */ @@ -43,7 +46,12 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s) static inline int cpu_map__id_to_socket(int id) { - return id >> 16; + return id >> 24; +} + +static inline int cpu_map__id_to_die(int id) +{ + return (id >> 16) & 0xff; } static inline int cpu_map__id_to_cpu(int id) diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index ece0710249d4..64336a280967 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -1,18 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 #include <sys/param.h> +#include <sys/utsname.h> #include <inttypes.h> +#include <stdlib.h> #include <api/fs/fs.h> +#include <linux/zalloc.h> #include "cputopo.h" #include "cpumap.h" -#include "util.h" #include "env.h" - #define CORE_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" +#define DIE_SIB_FMT \ + "%s/devices/system/cpu/cpu%d/topology/die_cpus_list" #define THRD_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" +#define THRD_SIB_FMT_NEW \ + "%s/devices/system/cpu/cpu%d/topology/core_cpus_list" #define NODE_ONLINE_FMT \ "%s/devices/system/node/online" #define NODE_MEMINFO_FMT \ @@ -34,12 +39,12 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) sysfs__mountpoint(), cpu); fp = fopen(filename, "r"); if (!fp) - goto try_threads; + goto try_dies; sret = getline(&buf, &len, fp); fclose(fp); if (sret <= 0) - goto try_threads; + goto try_dies; p = strchr(buf, '\n'); if (p) @@ -57,9 +62,44 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu) } ret = 0; +try_dies: + if (!tp->die_siblings) + goto try_threads; + + scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, + sysfs__mountpoint(), cpu); + fp = fopen(filename, "r"); + if (!fp) + goto try_threads; + + sret = getline(&buf, &len, fp); + fclose(fp); + if (sret <= 0) + goto try_threads; + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + for (i = 0; i < tp->die_sib; i++) { + if (!strcmp(buf, tp->die_siblings[i])) + break; + } + if (i == tp->die_sib) { + tp->die_siblings[i] = buf; + tp->die_sib++; + buf = NULL; + len = 0; + } + ret = 0; + try_threads: - scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, + scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW, sysfs__mountpoint(), cpu); + if (access(filename, F_OK) == -1) { + scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, + sysfs__mountpoint(), cpu); + } fp = fopen(filename, "r"); if (!fp) goto done; @@ -98,21 +138,46 @@ void cpu_topology__delete(struct cpu_topology *tp) for (i = 0 ; i < tp->core_sib; i++) zfree(&tp->core_siblings[i]); + if (tp->die_sib) { + for (i = 0 ; i < tp->die_sib; i++) + zfree(&tp->die_siblings[i]); + } + for (i = 0 ; i < tp->thread_sib; i++) zfree(&tp->thread_siblings[i]); free(tp); } +static bool has_die_topology(void) +{ + char filename[MAXPATHLEN]; + struct utsname uts; + + if (uname(&uts) < 0) + return false; + + if (strncmp(uts.machine, "x86_64", 6)) + return false; + + scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT, + sysfs__mountpoint(), 0); + if (access(filename, F_OK) == -1) + return false; + + return true; +} + struct cpu_topology *cpu_topology__new(void) { struct cpu_topology *tp = NULL; void *addr; - u32 nr, i; + u32 nr, i, nr_addr; size_t sz; long ncpus; int ret = -1; struct cpu_map *map; + bool has_die = has_die_topology(); ncpus = cpu__max_present_cpu(); @@ -126,7 +191,11 @@ struct cpu_topology *cpu_topology__new(void) nr = (u32)(ncpus & UINT_MAX); sz = nr * sizeof(char *); - addr = calloc(1, sizeof(*tp) + 2 * sz); + if (has_die) + nr_addr = 3; + else + nr_addr = 2; + addr = calloc(1, sizeof(*tp) + nr_addr * sz); if (!addr) goto out_free; @@ -134,6 +203,10 @@ struct cpu_topology *cpu_topology__new(void) addr += sizeof(*tp); tp->core_siblings = addr; addr += sz; + if (has_die) { + tp->die_siblings = addr; + addr += sz; + } tp->thread_siblings = addr; for (i = 0; i < nr; i++) { @@ -271,7 +344,7 @@ void numa_topology__delete(struct numa_topology *tp) u32 i; for (i = 0; i < tp->nr; i++) - free(tp->nodes[i].cpus); + zfree(&tp->nodes[i].cpus); free(tp); } diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h index 47a97e71acdf..bae2f1d41856 100644 --- a/tools/perf/util/cputopo.h +++ b/tools/perf/util/cputopo.h @@ -7,8 +7,10 @@ struct cpu_topology { u32 core_sib; + u32 die_sib; u32 thread_sib; char **core_siblings; + char **die_siblings; char **thread_siblings; }; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 39fe21e1cf93..37d7c492b155 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -8,6 +8,7 @@ #include <linux/err.h> #include <linux/list.h> +#include <linux/zalloc.h> #include <stdlib.h> #include <opencsd/c_api/opencsd_c_api.h> #include <opencsd/etmv4/trc_pkt_types_etmv4.h> @@ -18,8 +19,6 @@ #include "intlist.h" #include "util.h" -#define MAX_BUFFER 1024 - /* use raw logging */ #ifdef CS_DEBUG_RAW #define CS_LOG_RAW_FRAMES @@ -31,33 +30,26 @@ #endif #endif -#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL - struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return; - u32 packet_count; - u32 head; - u32 tail; - struct cs_etm_packet packet_buffer[MAX_BUFFER]; }; static u32 cs_etm_decoder__mem_access(const void *context, const ocsd_vaddr_t address, const ocsd_mem_space_acc_t mem_space __maybe_unused, + const u8 trace_chan_id, const u32 req_size, u8 *buffer) { struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; - return decoder->mem_access(decoder->data, - address, - req_size, - buffer); + return decoder->mem_access(decoder->data, trace_chan_id, + address, req_size, buffer); } int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, @@ -66,9 +58,10 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, { decoder->mem_access = cb_func; - if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end, - OCSD_MEM_SPACE_ANY, - cs_etm_decoder__mem_access, decoder)) + if (ocsd_dt_add_callback_trcid_mem_acc(decoder->dcd_tree, start, end, + OCSD_MEM_SPACE_ANY, + cs_etm_decoder__mem_access, + decoder)) return -1; return 0; @@ -88,14 +81,14 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) return 0; } -int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, +int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, struct cs_etm_packet *packet) { - if (!decoder || !packet) + if (!packet_queue || !packet) return -EINVAL; /* Nothing to do, might as well just return */ - if (decoder->packet_count == 0) + if (packet_queue->packet_count == 0) return 0; /* * The queueing process in function cs_etm_decoder__buffer_packet() @@ -106,11 +99,12 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, * value. Otherwise the first element of the packet queue is not * used. */ - decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); + packet_queue->head = (packet_queue->head + 1) & + (CS_ETM_PACKET_MAX_BUFFER - 1); - *packet = decoder->packet_buffer[decoder->head]; + *packet = packet_queue->packet_buffer[packet_queue->head]; - decoder->packet_count--; + packet_queue->packet_count--; return 1; } @@ -276,84 +270,130 @@ cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, trace_config); } -static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) +static ocsd_datapath_resp_t +cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *packet_queue, + const uint8_t trace_chan_id) { - int i; - - decoder->head = 0; - decoder->tail = 0; - decoder->packet_count = 0; - for (i = 0; i < MAX_BUFFER; i++) { - decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; - decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; - decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; - decoder->packet_buffer[i].instr_count = 0; - decoder->packet_buffer[i].last_instr_taken_branch = false; - decoder->packet_buffer[i].last_instr_size = 0; - decoder->packet_buffer[i].last_instr_type = 0; - decoder->packet_buffer[i].last_instr_subtype = 0; - decoder->packet_buffer[i].last_instr_cond = 0; - decoder->packet_buffer[i].flags = 0; - decoder->packet_buffer[i].exception_number = UINT32_MAX; - decoder->packet_buffer[i].trace_chan_id = UINT8_MAX; - decoder->packet_buffer[i].cpu = INT_MIN; + /* No timestamp packet has been received, nothing to do */ + if (!packet_queue->timestamp) + return OCSD_RESP_CONT; + + packet_queue->timestamp = packet_queue->next_timestamp; + + /* Estimate the timestamp for the next range packet */ + packet_queue->next_timestamp += packet_queue->instr_count; + packet_queue->instr_count = 0; + + /* Tell the front end which traceid_queue needs attention */ + cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); + + return OCSD_RESP_WAIT; +} + +static ocsd_datapath_resp_t +cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, + const ocsd_generic_trace_elem *elem, + const uint8_t trace_chan_id) +{ + struct cs_etm_packet_queue *packet_queue; + + /* First get the packet queue for this traceID */ + packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); + if (!packet_queue) + return OCSD_RESP_FATAL_SYS_ERR; + + /* + * We've seen a timestamp packet before - simply record the new value. + * Function do_soft_timestamp() will report the value to the front end, + * hence asking the decoder to keep decoding rather than stopping. + */ + if (packet_queue->timestamp) { + packet_queue->next_timestamp = elem->timestamp; + return OCSD_RESP_CONT; } + + /* + * This is the first timestamp we've seen since the beginning of traces + * or a discontinuity. Since timestamps packets are generated *after* + * range packets have been generated, we need to estimate the time at + * which instructions started by substracting the number of instructions + * executed to the timestamp. + */ + packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; + packet_queue->next_timestamp = elem->timestamp; + packet_queue->instr_count = 0; + + /* Tell the front end which traceid_queue needs attention */ + cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); + + /* Halt processing until we are being told to proceed */ + return OCSD_RESP_WAIT; +} + +static void +cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) +{ + packet_queue->timestamp = 0; + packet_queue->next_timestamp = 0; + packet_queue->instr_count = 0; } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, +cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue, const u8 trace_chan_id, enum cs_etm_sample_type sample_type) { u32 et = 0; int cpu; - if (decoder->packet_count >= MAX_BUFFER - 1) + if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1) return OCSD_RESP_FATAL_SYS_ERR; if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) return OCSD_RESP_FATAL_SYS_ERR; - et = decoder->tail; - et = (et + 1) & (MAX_BUFFER - 1); - decoder->tail = et; - decoder->packet_count++; - - decoder->packet_buffer[et].sample_type = sample_type; - decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; - decoder->packet_buffer[et].cpu = cpu; - decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; - decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; - decoder->packet_buffer[et].instr_count = 0; - decoder->packet_buffer[et].last_instr_taken_branch = false; - decoder->packet_buffer[et].last_instr_size = 0; - decoder->packet_buffer[et].last_instr_type = 0; - decoder->packet_buffer[et].last_instr_subtype = 0; - decoder->packet_buffer[et].last_instr_cond = 0; - decoder->packet_buffer[et].flags = 0; - decoder->packet_buffer[et].exception_number = UINT32_MAX; - decoder->packet_buffer[et].trace_chan_id = trace_chan_id; - - if (decoder->packet_count == MAX_BUFFER - 1) + et = packet_queue->tail; + et = (et + 1) & (CS_ETM_PACKET_MAX_BUFFER - 1); + packet_queue->tail = et; + packet_queue->packet_count++; + + packet_queue->packet_buffer[et].sample_type = sample_type; + packet_queue->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; + packet_queue->packet_buffer[et].cpu = cpu; + packet_queue->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; + packet_queue->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; + packet_queue->packet_buffer[et].instr_count = 0; + packet_queue->packet_buffer[et].last_instr_taken_branch = false; + packet_queue->packet_buffer[et].last_instr_size = 0; + packet_queue->packet_buffer[et].last_instr_type = 0; + packet_queue->packet_buffer[et].last_instr_subtype = 0; + packet_queue->packet_buffer[et].last_instr_cond = 0; + packet_queue->packet_buffer[et].flags = 0; + packet_queue->packet_buffer[et].exception_number = UINT32_MAX; + packet_queue->packet_buffer[et].trace_chan_id = trace_chan_id; + + if (packet_queue->packet_count == CS_ETM_PACKET_MAX_BUFFER - 1) return OCSD_RESP_WAIT; return OCSD_RESP_CONT; } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, +cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *packet_queue, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { int ret = 0; struct cs_etm_packet *packet; - ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id, CS_ETM_RANGE); if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) return ret; - packet = &decoder->packet_buffer[decoder->tail]; + packet = &packet_queue->packet_buffer[packet_queue->tail]; switch (elem->isa) { case ocsd_isa_aarch64: @@ -396,43 +436,90 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, packet->last_instr_size = elem->last_instr_sz; + /* per-thread scenario, no need to generate a timestamp */ + if (cs_etm__etmq_is_timeless(etmq)) + goto out; + + /* + * The packet queue is full and we haven't seen a timestamp (had we + * seen one the packet queue wouldn't be full). Let the front end + * deal with it. + */ + if (ret == OCSD_RESP_WAIT) + goto out; + + packet_queue->instr_count += elem->num_instr_range; + /* Tell the front end we have a new timestamp to process */ + ret = cs_etm_decoder__do_soft_timestamp(etmq, packet_queue, + trace_chan_id); +out: return ret; } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder, - const uint8_t trace_chan_id) +cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue, + const uint8_t trace_chan_id) { - return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + /* + * Something happened and who knows when we'll get new traces so + * reset time statistics. + */ + cs_etm_decoder__reset_timestamp(queue); + return cs_etm_decoder__buffer_packet(queue, trace_chan_id, CS_ETM_DISCONTINUITY); } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, +cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { int ret = 0; struct cs_etm_packet *packet; - ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id, CS_ETM_EXCEPTION); if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) return ret; - packet = &decoder->packet_buffer[decoder->tail]; + packet = &queue->packet_buffer[queue->tail]; packet->exception_number = elem->exception_number; return ret; } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_exception_ret(struct cs_etm_decoder *decoder, +cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue, const uint8_t trace_chan_id) { - return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + return cs_etm_decoder__buffer_packet(queue, trace_chan_id, CS_ETM_EXCEPTION_RET); } +static ocsd_datapath_resp_t +cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *packet_queue, + const ocsd_generic_trace_elem *elem, + const uint8_t trace_chan_id) +{ + pid_t tid; + + /* Ignore PE_CONTEXT packets that don't have a valid contextID */ + if (!elem->context.ctxt_id_valid) + return OCSD_RESP_CONT; + + tid = elem->context.context_id; + if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) + return OCSD_RESP_FATAL_SYS_ERR; + + /* + * A timestamp is generated after a PE_CONTEXT element so make sure + * to rely on that coming one. + */ + cs_etm_decoder__reset_timestamp(packet_queue); + + return OCSD_RESP_CONT; +} + static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, const ocsd_trc_index_t indx __maybe_unused, @@ -441,6 +528,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( { ocsd_datapath_resp_t resp = OCSD_RESP_CONT; struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + struct cs_etm_queue *etmq = decoder->data; + struct cs_etm_packet_queue *packet_queue; + + /* First get the packet queue for this traceID */ + packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); + if (!packet_queue) + return OCSD_RESP_FATAL_SYS_ERR; switch (elem->elem_type) { case OCSD_GEN_TRC_ELEM_UNKNOWN: @@ -448,24 +542,30 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( case OCSD_GEN_TRC_ELEM_EO_TRACE: case OCSD_GEN_TRC_ELEM_NO_SYNC: case OCSD_GEN_TRC_ELEM_TRACE_ON: - resp = cs_etm_decoder__buffer_discontinuity(decoder, + resp = cs_etm_decoder__buffer_discontinuity(packet_queue, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_INSTR_RANGE: - resp = cs_etm_decoder__buffer_range(decoder, elem, + resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: - resp = cs_etm_decoder__buffer_exception(decoder, elem, + resp = cs_etm_decoder__buffer_exception(packet_queue, elem, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: - resp = cs_etm_decoder__buffer_exception_ret(decoder, + resp = cs_etm_decoder__buffer_exception_ret(packet_queue, trace_chan_id); break; + case OCSD_GEN_TRC_ELEM_TIMESTAMP: + resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, + trace_chan_id); + break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: + resp = cs_etm_decoder__set_tid(etmq, packet_queue, + elem, trace_chan_id); + break; case OCSD_GEN_TRC_ELEM_ADDR_NACC: - case OCSD_GEN_TRC_ELEM_TIMESTAMP: case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: case OCSD_GEN_TRC_ELEM_EVENT: @@ -554,7 +654,6 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, decoder->data = d_params->data; decoder->prev_return = OCSD_RESP_CONT; - cs_etm_decoder__clear_buffer(decoder); format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED : OCSD_TRC_SRC_SINGLE); flags = 0; @@ -577,7 +676,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, /* init library print logging support */ ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder); if (ret != 0) - goto err_free_decoder_tree; + goto err_free_decoder; /* init raw frame logging if required */ cs_etm_decoder__init_raw_frame_logging(d_params, decoder); @@ -587,15 +686,13 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, &t_params[i], decoder); if (ret != 0) - goto err_free_decoder_tree; + goto err_free_decoder; } return decoder; -err_free_decoder_tree: - ocsd_destroy_dcd_tree(decoder->dcd_tree); err_free_decoder: - free(decoder); + cs_etm_decoder__free(decoder); return NULL; } diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 3ab11dfa92ae..11f3391d06f2 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -14,43 +14,12 @@ #include <stdio.h> struct cs_etm_decoder; - -enum cs_etm_sample_type { - CS_ETM_EMPTY, - CS_ETM_RANGE, - CS_ETM_DISCONTINUITY, - CS_ETM_EXCEPTION, - CS_ETM_EXCEPTION_RET, -}; - -enum cs_etm_isa { - CS_ETM_ISA_UNKNOWN, - CS_ETM_ISA_A64, - CS_ETM_ISA_A32, - CS_ETM_ISA_T32, -}; - -struct cs_etm_packet { - enum cs_etm_sample_type sample_type; - enum cs_etm_isa isa; - u64 start_addr; - u64 end_addr; - u32 instr_count; - u32 last_instr_type; - u32 last_instr_subtype; - u32 flags; - u32 exception_number; - u8 last_instr_cond; - u8 last_instr_taken_branch; - u8 last_instr_size; - u8 trace_chan_id; - int cpu; -}; +struct cs_etm_packet; +struct cs_etm_packet_queue; struct cs_etm_queue; -typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64, - size_t, u8 *); +typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *); struct cs_etmv3_trace_params { u32 reg_ctrl; @@ -119,7 +88,7 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, u64 start, u64 end, cs_etm_mem_cb_type cb_func); -int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, +int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, struct cs_etm_packet *packet); int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index de488b43f440..67b88b599a53 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/log2.h> #include <linux/types.h> +#include <linux/zalloc.h> #include <opencsd/ocsd_if_types.h> #include <stdlib.h> @@ -29,6 +30,7 @@ #include "thread.h" #include "thread_map.h" #include "thread-stack.h" +#include <tools/libc_compat.h> #include "util.h" #define MAX_TIMESTAMP (~0ULL) @@ -60,33 +62,55 @@ struct cs_etm_auxtrace { unsigned int pmu_type; }; -struct cs_etm_queue { - struct cs_etm_auxtrace *etm; - struct thread *thread; - struct cs_etm_decoder *decoder; - struct auxtrace_buffer *buffer; - union perf_event *event_buf; - unsigned int queue_nr; +struct cs_etm_traceid_queue { + u8 trace_chan_id; pid_t pid, tid; - int cpu; - u64 offset; u64 period_instructions; + size_t last_branch_pos; + union perf_event *event_buf; + struct thread *thread; struct branch_stack *last_branch; struct branch_stack *last_branch_rb; - size_t last_branch_pos; struct cs_etm_packet *prev_packet; struct cs_etm_packet *packet; + struct cs_etm_packet_queue packet_queue; +}; + +struct cs_etm_queue { + struct cs_etm_auxtrace *etm; + struct cs_etm_decoder *decoder; + struct auxtrace_buffer *buffer; + unsigned int queue_nr; + u8 pending_timestamp; + u64 offset; const unsigned char *buf; size_t buf_len, buf_used; + /* Conversion between traceID and index in traceid_queues array */ + struct intlist *traceid_queues_list; + struct cs_etm_traceid_queue **traceid_queues; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); +static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); +static int cs_etm__get_data_block(struct cs_etm_queue *etmq); +static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); /* PTMs ETMIDR [11:8] set to b0011 */ #define ETMIDR_PTM_VERSION 0x00000300 +/* + * A struct auxtrace_heap_item only has a queue_nr and a timestamp to + * work with. One option is to modify to auxtrace_heap_XYZ() API or simply + * encode the etm queue number as the upper 16 bit and the channel as + * the lower 16 bit. + */ +#define TO_CS_QUEUE_NR(queue_nr, trace_id_chan) \ + (queue_nr << 16 | trace_chan_id) +#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) +#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) + static u32 cs_etm__get_v7_protocol_version(u32 etmidr) { etmidr &= ETMIDR_PTM_VERSION; @@ -125,6 +149,216 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) return 0; } +void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, + u8 trace_chan_id) +{ + /* + * Wnen a timestamp packet is encountered the backend code + * is stopped so that the front end has time to process packets + * that were accumulated in the traceID queue. Since there can + * be more than one channel per cs_etm_queue, we need to specify + * what traceID queue needs servicing. + */ + etmq->pending_timestamp = trace_chan_id; +} + +static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, + u8 *trace_chan_id) +{ + struct cs_etm_packet_queue *packet_queue; + + if (!etmq->pending_timestamp) + return 0; + + if (trace_chan_id) + *trace_chan_id = etmq->pending_timestamp; + + packet_queue = cs_etm__etmq_get_packet_queue(etmq, + etmq->pending_timestamp); + if (!packet_queue) + return 0; + + /* Acknowledge pending status */ + etmq->pending_timestamp = 0; + + /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ + return packet_queue->timestamp; +} + +static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) +{ + int i; + + queue->head = 0; + queue->tail = 0; + queue->packet_count = 0; + for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { + queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; + queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; + queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; + queue->packet_buffer[i].instr_count = 0; + queue->packet_buffer[i].last_instr_taken_branch = false; + queue->packet_buffer[i].last_instr_size = 0; + queue->packet_buffer[i].last_instr_type = 0; + queue->packet_buffer[i].last_instr_subtype = 0; + queue->packet_buffer[i].last_instr_cond = 0; + queue->packet_buffer[i].flags = 0; + queue->packet_buffer[i].exception_number = UINT32_MAX; + queue->packet_buffer[i].trace_chan_id = UINT8_MAX; + queue->packet_buffer[i].cpu = INT_MIN; + } +} + +static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) +{ + int idx; + struct int_node *inode; + struct cs_etm_traceid_queue *tidq; + struct intlist *traceid_queues_list = etmq->traceid_queues_list; + + intlist__for_each_entry(inode, traceid_queues_list) { + idx = (int)(intptr_t)inode->priv; + tidq = etmq->traceid_queues[idx]; + cs_etm__clear_packet_queue(&tidq->packet_queue); + } +} + +static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq, + u8 trace_chan_id) +{ + int rc = -ENOMEM; + struct auxtrace_queue *queue; + struct cs_etm_auxtrace *etm = etmq->etm; + + cs_etm__clear_packet_queue(&tidq->packet_queue); + + queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; + tidq->tid = queue->tid; + tidq->pid = -1; + tidq->trace_chan_id = trace_chan_id; + + tidq->packet = zalloc(sizeof(struct cs_etm_packet)); + if (!tidq->packet) + goto out; + + tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); + if (!tidq->prev_packet) + goto out_free; + + if (etm->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += etm->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + tidq->last_branch = zalloc(sz); + if (!tidq->last_branch) + goto out_free; + tidq->last_branch_rb = zalloc(sz); + if (!tidq->last_branch_rb) + goto out_free; + } + + tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!tidq->event_buf) + goto out_free; + + return 0; + +out_free: + zfree(&tidq->last_branch_rb); + zfree(&tidq->last_branch); + zfree(&tidq->prev_packet); + zfree(&tidq->packet); +out: + return rc; +} + +static struct cs_etm_traceid_queue +*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) +{ + int idx; + struct int_node *inode; + struct intlist *traceid_queues_list; + struct cs_etm_traceid_queue *tidq, **traceid_queues; + struct cs_etm_auxtrace *etm = etmq->etm; + + if (etm->timeless_decoding) + trace_chan_id = CS_ETM_PER_THREAD_TRACEID; + + traceid_queues_list = etmq->traceid_queues_list; + + /* + * Check if the traceid_queue exist for this traceID by looking + * in the queue list. + */ + inode = intlist__find(traceid_queues_list, trace_chan_id); + if (inode) { + idx = (int)(intptr_t)inode->priv; + return etmq->traceid_queues[idx]; + } + + /* We couldn't find a traceid_queue for this traceID, allocate one */ + tidq = malloc(sizeof(*tidq)); + if (!tidq) + return NULL; + + memset(tidq, 0, sizeof(*tidq)); + + /* Get a valid index for the new traceid_queue */ + idx = intlist__nr_entries(traceid_queues_list); + /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ + inode = intlist__findnew(traceid_queues_list, trace_chan_id); + if (!inode) + goto out_free; + + /* Associate this traceID with this index */ + inode->priv = (void *)(intptr_t)idx; + + if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) + goto out_free; + + /* Grow the traceid_queues array by one unit */ + traceid_queues = etmq->traceid_queues; + traceid_queues = reallocarray(traceid_queues, + idx + 1, + sizeof(*traceid_queues)); + + /* + * On failure reallocarray() returns NULL and the original block of + * memory is left untouched. + */ + if (!traceid_queues) + goto out_free; + + traceid_queues[idx] = tidq; + etmq->traceid_queues = traceid_queues; + + return etmq->traceid_queues[idx]; + +out_free: + /* + * Function intlist__remove() removes the inode from the list + * and delete the memory associated to it. + */ + intlist__remove(traceid_queues_list, inode); + free(tidq); + + return NULL; +} + +struct cs_etm_packet_queue +*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) +{ + struct cs_etm_traceid_queue *tidq; + + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); + if (tidq) + return &tidq->packet_queue; + + return NULL; +} + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -276,15 +510,52 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL; - if (!etm->timeless_decoding) - return -EINVAL; - ret = cs_etm__update_queues(etm); if (ret < 0) return ret; - return cs_etm__process_timeless_queues(etm, -1); + if (etm->timeless_decoding) + return cs_etm__process_timeless_queues(etm, -1); + + return cs_etm__process_queues(etm); +} + +static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) +{ + int idx; + uintptr_t priv; + struct int_node *inode, *tmp; + struct cs_etm_traceid_queue *tidq; + struct intlist *traceid_queues_list = etmq->traceid_queues_list; + + intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { + priv = (uintptr_t)inode->priv; + idx = priv; + + /* Free this traceid_queue from the array */ + tidq = etmq->traceid_queues[idx]; + thread__zput(tidq->thread); + zfree(&tidq->event_buf); + zfree(&tidq->last_branch); + zfree(&tidq->last_branch_rb); + zfree(&tidq->prev_packet); + zfree(&tidq->packet); + zfree(&tidq); + + /* + * Function intlist__remove() removes the inode from the list + * and delete the memory associated to it. + */ + intlist__remove(traceid_queues_list, inode); + } + + /* Then the RB tree itself */ + intlist__delete(traceid_queues_list); + etmq->traceid_queues_list = NULL; + + /* finally free the traceid_queues array */ + zfree(&etmq->traceid_queues); } static void cs_etm__free_queue(void *priv) @@ -294,13 +565,8 @@ static void cs_etm__free_queue(void *priv) if (!etmq) return; - thread__zput(etmq->thread); cs_etm_decoder__free(etmq->decoder); - zfree(&etmq->event_buf); - zfree(&etmq->last_branch); - zfree(&etmq->last_branch_rb); - zfree(&etmq->prev_packet); - zfree(&etmq->packet); + cs_etm__free_traceid_queues(etmq); free(etmq); } @@ -365,23 +631,27 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) } } -static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, - size_t size, u8 *buffer) +static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, + u64 address, size_t size, u8 *buffer) { u8 cpumode; u64 offset; int len; - struct thread *thread; - struct machine *machine; - struct addr_location al; + struct thread *thread; + struct machine *machine; + struct addr_location al; + struct cs_etm_traceid_queue *tidq; if (!etmq) return 0; machine = etmq->etm->machine; cpumode = cs_etm__cpu_mode(etmq, address); + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); + if (!tidq) + return 0; - thread = etmq->thread; + thread = tidq->thread; if (!thread) { if (cpumode != PERF_RECORD_MISC_KERNEL) return 0; @@ -412,35 +682,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; - size_t szp = sizeof(struct cs_etm_packet); etmq = zalloc(sizeof(*etmq)); if (!etmq) return NULL; - etmq->packet = zalloc(szp); - if (!etmq->packet) - goto out_free; - - etmq->prev_packet = zalloc(szp); - if (!etmq->prev_packet) - goto out_free; - - if (etm->synth_opts.last_branch) { - size_t sz = sizeof(struct branch_stack); - - sz += etm->synth_opts.last_branch_sz * - sizeof(struct branch_entry); - etmq->last_branch = zalloc(sz); - if (!etmq->last_branch) - goto out_free; - etmq->last_branch_rb = zalloc(sz); - if (!etmq->last_branch_rb) - goto out_free; - } - - etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); - if (!etmq->event_buf) + etmq->traceid_queues_list = intlist__new(NULL); + if (!etmq->traceid_queues_list) goto out_free; /* Use metadata to fill in trace parameters for trace decoder */ @@ -477,12 +725,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) out_free_decoder: cs_etm_decoder__free(etmq->decoder); out_free: - zfree(&t_params); - zfree(&etmq->event_buf); - zfree(&etmq->last_branch); - zfree(&etmq->last_branch_rb); - zfree(&etmq->prev_packet); - zfree(&etmq->packet); + intlist__delete(etmq->traceid_queues_list); free(etmq); return NULL; @@ -493,6 +736,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, unsigned int queue_nr) { int ret = 0; + unsigned int cs_queue_nr; + u8 trace_chan_id; + u64 timestamp; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) @@ -508,12 +754,69 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; - etmq->cpu = queue->cpu; - etmq->tid = queue->tid; - etmq->pid = -1; etmq->offset = 0; - etmq->period_instructions = 0; + if (etm->timeless_decoding) + goto out; + + /* + * We are under a CPU-wide trace scenario. As such we need to know + * when the code that generated the traces started to execute so that + * it can be correlated with execution on other CPUs. So we get a + * handle on the beginning of traces and decode until we find a + * timestamp. The timestamp is then added to the auxtrace min heap + * in order to know what nibble (of all the etmqs) to decode first. + */ + while (1) { + /* + * Fetch an aux_buffer from this etmq. Bail if no more + * blocks or an error has been encountered. + */ + ret = cs_etm__get_data_block(etmq); + if (ret <= 0) + goto out; + + /* + * Run decoder on the trace block. The decoder will stop when + * encountering a timestamp, a full packet queue or the end of + * trace for that block. + */ + ret = cs_etm__decode_data_block(etmq); + if (ret) + goto out; + + /* + * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all + * the timestamp calculation for us. + */ + timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + + /* We found a timestamp, no need to continue. */ + if (timestamp) + break; + + /* + * We didn't find a timestamp so empty all the traceid packet + * queues before looking for another timestamp packet, either + * in the current data block or a new one. Packets that were + * just decoded are useless since no timestamp has been + * associated with them. As such simply discard them. + */ + cs_etm__clear_all_packet_queues(etmq); + } + + /* + * We have a timestamp. Add it to the min heap to reflect when + * instructions conveyed by the range packets of this traceID queue + * started to execute. Once the same has been done for all the traceID + * queues of each etmq, redenring and decoding can start in + * chronological order. + * + * Note that packets decoded above are still in the traceID's packet + * queue and will be processed in cs_etm__process_queues(). + */ + cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_id_chan); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); out: return ret; } @@ -545,10 +848,12 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) return 0; } -static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) +static inline +void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { - struct branch_stack *bs_src = etmq->last_branch_rb; - struct branch_stack *bs_dst = etmq->last_branch; + struct branch_stack *bs_src = tidq->last_branch_rb; + struct branch_stack *bs_dst = tidq->last_branch; size_t nr = 0; /* @@ -568,9 +873,9 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) * two steps. First, copy the branches from the most recently inserted * branch ->last_branch_pos until the end of bs_src->entries buffer. */ - nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; + nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; memcpy(&bs_dst->entries[0], - &bs_src->entries[etmq->last_branch_pos], + &bs_src->entries[tidq->last_branch_pos], sizeof(struct branch_entry) * nr); /* @@ -583,21 +888,24 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { memcpy(&bs_dst->entries[nr], &bs_src->entries[0], - sizeof(struct branch_entry) * etmq->last_branch_pos); + sizeof(struct branch_entry) * tidq->last_branch_pos); } } -static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) +static inline +void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) { - etmq->last_branch_pos = 0; - etmq->last_branch_rb->nr = 0; + tidq->last_branch_pos = 0; + tidq->last_branch_rb->nr = 0; } static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, - u64 addr) { + u8 trace_chan_id, u64 addr) +{ u8 instrBytes[2]; - cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes); + cs_etm__mem_access(etmq, trace_chan_id, addr, + ARRAY_SIZE(instrBytes), instrBytes); /* * T32 instruction size is indicated by bits[15:11] of the first * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 @@ -626,6 +934,7 @@ u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) } static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, + u64 trace_chan_id, const struct cs_etm_packet *packet, u64 offset) { @@ -633,7 +942,8 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, u64 addr = packet->start_addr; while (offset > 0) { - addr += cs_etm__t32_instr_size(etmq, addr); + addr += cs_etm__t32_instr_size(etmq, + trace_chan_id, addr); offset--; } return addr; @@ -643,9 +953,10 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, return packet->start_addr + offset * 4; } -static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) +static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { - struct branch_stack *bs = etmq->last_branch_rb; + struct branch_stack *bs = tidq->last_branch_rb; struct branch_entry *be; /* @@ -654,14 +965,14 @@ static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) * buffer down. After writing the first element of the stack, move the * insert position back to the end of the buffer. */ - if (!etmq->last_branch_pos) - etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; + if (!tidq->last_branch_pos) + tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; - etmq->last_branch_pos -= 1; + tidq->last_branch_pos -= 1; - be = &bs->entries[etmq->last_branch_pos]; - be->from = cs_etm__last_executed_instr(etmq->prev_packet); - be->to = cs_etm__first_executed_instr(etmq->packet); + be = &bs->entries[tidq->last_branch_pos]; + be->from = cs_etm__last_executed_instr(tidq->prev_packet); + be->to = cs_etm__first_executed_instr(tidq->packet); /* No support for mispredict */ be->flags.mispred = 0; be->flags.predicted = 1; @@ -725,31 +1036,53 @@ cs_etm__get_trace(struct cs_etm_queue *etmq) } static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, - struct auxtrace_queue *queue) + struct cs_etm_traceid_queue *tidq) { - struct cs_etm_queue *etmq = queue->priv; + if ((!tidq->thread) && (tidq->tid != -1)) + tidq->thread = machine__find_thread(etm->machine, -1, + tidq->tid); - /* CPU-wide tracing isn't supported yet */ - if (queue->tid == -1) - return; + if (tidq->thread) + tidq->pid = tidq->thread->pid_; +} - if ((!etmq->thread) && (etmq->tid != -1)) - etmq->thread = machine__find_thread(etm->machine, -1, - etmq->tid); +int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, + pid_t tid, u8 trace_chan_id) +{ + int cpu, err = -EINVAL; + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_traceid_queue *tidq; - if (etmq->thread) { - etmq->pid = etmq->thread->pid_; - if (queue->cpu == -1) - etmq->cpu = etmq->thread->cpu; - } + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); + if (!tidq) + return err; + + if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) + return err; + + err = machine__set_current_tid(etm->machine, cpu, tid, tid); + if (err) + return err; + + tidq->tid = tid; + thread__zput(tidq->thread); + + cs_etm__set_pid_tid_cpu(etm, tidq); + return 0; +} + +bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) +{ + return !!etmq->etm->timeless_decoding; } static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq, u64 addr, u64 period) { int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; - union perf_event *event = etmq->event_buf; + union perf_event *event = tidq->event_buf; struct perf_sample sample = {.ip = 0,}; event->sample.header.type = PERF_RECORD_SAMPLE; @@ -757,19 +1090,19 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, event->sample.header.size = sizeof(struct perf_event_header); sample.ip = addr; - sample.pid = etmq->pid; - sample.tid = etmq->tid; + sample.pid = tidq->pid; + sample.tid = tidq->tid; sample.id = etmq->etm->instructions_id; sample.stream_id = etmq->etm->instructions_id; sample.period = period; - sample.cpu = etmq->packet->cpu; - sample.flags = etmq->prev_packet->flags; + sample.cpu = tidq->packet->cpu; + sample.flags = tidq->prev_packet->flags; sample.insn_len = 1; sample.cpumode = event->sample.header.misc; if (etm->synth_opts.last_branch) { - cs_etm__copy_last_branch_rb(etmq); - sample.branch_stack = etmq->last_branch; + cs_etm__copy_last_branch_rb(etmq, tidq); + sample.branch_stack = tidq->last_branch; } if (etm->synth_opts.inject) { @@ -787,7 +1120,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, ret); if (etm->synth_opts.last_branch) - cs_etm__reset_last_branch_rb(etmq); + cs_etm__reset_last_branch_rb(tidq); return ret; } @@ -796,33 +1129,34 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, * The cs etm packet encodes an instruction range between a branch target * and the next taken branch. Generate sample accordingly. */ -static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) +static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; struct perf_sample sample = {.ip = 0,}; - union perf_event *event = etmq->event_buf; + union perf_event *event = tidq->event_buf; struct dummy_branch_stack { u64 nr; struct branch_entry entries; } dummy_bs; u64 ip; - ip = cs_etm__last_executed_instr(etmq->prev_packet); + ip = cs_etm__last_executed_instr(tidq->prev_packet); event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); sample.ip = ip; - sample.pid = etmq->pid; - sample.tid = etmq->tid; - sample.addr = cs_etm__first_executed_instr(etmq->packet); + sample.pid = tidq->pid; + sample.tid = tidq->tid; + sample.addr = cs_etm__first_executed_instr(tidq->packet); sample.id = etmq->etm->branches_id; sample.stream_id = etmq->etm->branches_id; sample.period = 1; - sample.cpu = etmq->packet->cpu; - sample.flags = etmq->prev_packet->flags; + sample.cpu = tidq->packet->cpu; + sample.flags = tidq->prev_packet->flags; sample.cpumode = event->sample.header.misc; /* @@ -965,33 +1299,35 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, return 0; } -static int cs_etm__sample(struct cs_etm_queue *etmq) +static int cs_etm__sample(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; int ret; - u64 instrs_executed = etmq->packet->instr_count; + u8 trace_chan_id = tidq->trace_chan_id; + u64 instrs_executed = tidq->packet->instr_count; - etmq->period_instructions += instrs_executed; + tidq->period_instructions += instrs_executed; /* * Record a branch when the last instruction in * PREV_PACKET is a branch. */ if (etm->synth_opts.last_branch && - etmq->prev_packet->sample_type == CS_ETM_RANGE && - etmq->prev_packet->last_instr_taken_branch) - cs_etm__update_last_branch_rb(etmq); + tidq->prev_packet->sample_type == CS_ETM_RANGE && + tidq->prev_packet->last_instr_taken_branch) + cs_etm__update_last_branch_rb(etmq, tidq); if (etm->sample_instructions && - etmq->period_instructions >= etm->instructions_sample_period) { + tidq->period_instructions >= etm->instructions_sample_period) { /* * Emit instruction sample periodically * TODO: allow period to be defined in cycles and clock time */ /* Get number of instructions executed after the sample point */ - u64 instrs_over = etmq->period_instructions - + u64 instrs_over = tidq->period_instructions - etm->instructions_sample_period; /* @@ -1000,31 +1336,32 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) * executed, but PC has not advanced to next instruction) */ u64 offset = (instrs_executed - instrs_over - 1); - u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset); + u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, + tidq->packet, offset); ret = cs_etm__synth_instruction_sample( - etmq, addr, etm->instructions_sample_period); + etmq, tidq, addr, etm->instructions_sample_period); if (ret) return ret; /* Carry remaining instructions into next sample period */ - etmq->period_instructions = instrs_over; + tidq->period_instructions = instrs_over; } if (etm->sample_branches) { bool generate_sample = false; /* Generate sample for tracing on packet */ - if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) + if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) generate_sample = true; /* Generate sample for branch taken packet */ - if (etmq->prev_packet->sample_type == CS_ETM_RANGE && - etmq->prev_packet->last_instr_taken_branch) + if (tidq->prev_packet->sample_type == CS_ETM_RANGE && + tidq->prev_packet->last_instr_taken_branch) generate_sample = true; if (generate_sample) { - ret = cs_etm__synth_branch_sample(etmq); + ret = cs_etm__synth_branch_sample(etmq, tidq); if (ret) return ret; } @@ -1035,15 +1372,15 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. */ - tmp = etmq->packet; - etmq->packet = etmq->prev_packet; - etmq->prev_packet = tmp; + tmp = tidq->packet; + tidq->packet = tidq->prev_packet; + tidq->prev_packet = tmp; } return 0; } -static int cs_etm__exception(struct cs_etm_queue *etmq) +static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) { /* * When the exception packet is inserted, whether the last instruction @@ -1056,24 +1393,25 @@ static int cs_etm__exception(struct cs_etm_queue *etmq) * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful * for generating instruction and branch samples. */ - if (etmq->prev_packet->sample_type == CS_ETM_RANGE) - etmq->prev_packet->last_instr_taken_branch = true; + if (tidq->prev_packet->sample_type == CS_ETM_RANGE) + tidq->prev_packet->last_instr_taken_branch = true; return 0; } -static int cs_etm__flush(struct cs_etm_queue *etmq) +static int cs_etm__flush(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { int err = 0; struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; /* Handle start tracing packet */ - if (etmq->prev_packet->sample_type == CS_ETM_EMPTY) + if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) goto swap_packet; if (etmq->etm->synth_opts.last_branch && - etmq->prev_packet->sample_type == CS_ETM_RANGE) { + tidq->prev_packet->sample_type == CS_ETM_RANGE) { /* * Generate a last branch event for the branches left in the * circular buffer at the end of the trace. @@ -1081,21 +1419,21 @@ static int cs_etm__flush(struct cs_etm_queue *etmq) * Use the address of the end of the last reported execution * range */ - u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); + u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( - etmq, addr, - etmq->period_instructions); + etmq, tidq, addr, + tidq->period_instructions); if (err) return err; - etmq->period_instructions = 0; + tidq->period_instructions = 0; } if (etm->sample_branches && - etmq->prev_packet->sample_type == CS_ETM_RANGE) { - err = cs_etm__synth_branch_sample(etmq); + tidq->prev_packet->sample_type == CS_ETM_RANGE) { + err = cs_etm__synth_branch_sample(etmq, tidq); if (err) return err; } @@ -1106,15 +1444,16 @@ swap_packet: * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. */ - tmp = etmq->packet; - etmq->packet = etmq->prev_packet; - etmq->prev_packet = tmp; + tmp = tidq->packet; + tidq->packet = tidq->prev_packet; + tidq->prev_packet = tmp; } return err; } -static int cs_etm__end_block(struct cs_etm_queue *etmq) +static int cs_etm__end_block(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { int err; @@ -1128,20 +1467,20 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) * the trace. */ if (etmq->etm->synth_opts.last_branch && - etmq->prev_packet->sample_type == CS_ETM_RANGE) { + tidq->prev_packet->sample_type == CS_ETM_RANGE) { /* * Use the address of the end of the last reported execution * range. */ - u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); + u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( - etmq, addr, - etmq->period_instructions); + etmq, tidq, addr, + tidq->period_instructions); if (err) return err; - etmq->period_instructions = 0; + tidq->period_instructions = 0; } return 0; @@ -1173,12 +1512,13 @@ static int cs_etm__get_data_block(struct cs_etm_queue *etmq) return etmq->buf_len; } -static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, +static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, struct cs_etm_packet *packet, u64 end_addr) { - u16 instr16; - u32 instr32; + /* Initialise to keep compiler happy */ + u16 instr16 = 0; + u32 instr32 = 0; u64 addr; switch (packet->isa) { @@ -1196,7 +1536,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, * so below only read 2 bytes as instruction size for T32. */ addr = end_addr - 2; - cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); + cs_etm__mem_access(etmq, trace_chan_id, addr, + sizeof(instr16), (u8 *)&instr16); if ((instr16 & 0xFF00) == 0xDF00) return true; @@ -1211,7 +1552,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, * +---------+---------+-------------------------+ */ addr = end_addr - 4; - cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); + cs_etm__mem_access(etmq, trace_chan_id, addr, + sizeof(instr32), (u8 *)&instr32); if ((instr32 & 0x0F000000) == 0x0F000000 && (instr32 & 0xF0000000) != 0xF0000000) return true; @@ -1227,7 +1569,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, * +-----------------------+---------+-----------+ */ addr = end_addr - 4; - cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); + cs_etm__mem_access(etmq, trace_chan_id, addr, + sizeof(instr32), (u8 *)&instr32); if ((instr32 & 0xFFE0001F) == 0xd4000001) return true; @@ -1240,10 +1583,12 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, return false; } -static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) +static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq, u64 magic) { - struct cs_etm_packet *packet = etmq->packet; - struct cs_etm_packet *prev_packet = etmq->prev_packet; + u8 trace_chan_id = tidq->trace_chan_id; + struct cs_etm_packet *packet = tidq->packet; + struct cs_etm_packet *prev_packet = tidq->prev_packet; if (magic == __perf_cs_etmv3_magic) if (packet->exception_number == CS_ETMV3_EXC_SVC) @@ -1256,7 +1601,7 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) */ if (magic == __perf_cs_etmv4_magic) { if (packet->exception_number == CS_ETMV4_EXC_CALL && - cs_etm__is_svc_instr(etmq, prev_packet, + cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, prev_packet->end_addr)) return true; } @@ -1264,9 +1609,10 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) return false; } -static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) +static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, + u64 magic) { - struct cs_etm_packet *packet = etmq->packet; + struct cs_etm_packet *packet = tidq->packet; if (magic == __perf_cs_etmv3_magic) if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || @@ -1289,10 +1635,13 @@ static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) return false; } -static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) +static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq, + u64 magic) { - struct cs_etm_packet *packet = etmq->packet; - struct cs_etm_packet *prev_packet = etmq->prev_packet; + u8 trace_chan_id = tidq->trace_chan_id; + struct cs_etm_packet *packet = tidq->packet; + struct cs_etm_packet *prev_packet = tidq->prev_packet; if (magic == __perf_cs_etmv3_magic) if (packet->exception_number == CS_ETMV3_EXC_SMC || @@ -1316,7 +1665,7 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) * (SMC, HVC) are taken as sync exceptions. */ if (packet->exception_number == CS_ETMV4_EXC_CALL && - !cs_etm__is_svc_instr(etmq, prev_packet, + !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, prev_packet->end_addr)) return true; @@ -1335,10 +1684,12 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) return false; } -static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) +static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { - struct cs_etm_packet *packet = etmq->packet; - struct cs_etm_packet *prev_packet = etmq->prev_packet; + struct cs_etm_packet *packet = tidq->packet; + struct cs_etm_packet *prev_packet = tidq->prev_packet; + u8 trace_chan_id = tidq->trace_chan_id; u64 magic; int ret; @@ -1419,7 +1770,8 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT) && - cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) + cs_etm__is_svc_instr(etmq, trace_chan_id, + packet, packet->start_addr)) prev_packet->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET; @@ -1440,7 +1792,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) return ret; /* The exception is for system call. */ - if (cs_etm__is_syscall(etmq, magic)) + if (cs_etm__is_syscall(etmq, tidq, magic)) packet->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET; @@ -1448,7 +1800,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) * The exceptions are triggered by external signals from bus, * interrupt controller, debug module, PE reset or halt. */ - else if (cs_etm__is_async_exception(etmq, magic)) + else if (cs_etm__is_async_exception(tidq, magic)) packet->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | @@ -1457,7 +1809,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) * Otherwise, exception is caused by trap, instruction & * data fault, or alignment errors. */ - else if (cs_etm__is_sync_exception(etmq, magic)) + else if (cs_etm__is_sync_exception(etmq, tidq, magic)) packet->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT; @@ -1539,75 +1891,106 @@ out: return ret; } -static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) +static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { int ret; + struct cs_etm_packet_queue *packet_queue; - /* Process each packet in this chunk */ - while (1) { - ret = cs_etm_decoder__get_packet(etmq->decoder, - etmq->packet); - if (ret <= 0) - /* - * Stop processing this chunk on - * end of data or error - */ - break; + packet_queue = &tidq->packet_queue; + /* Process each packet in this chunk */ + while (1) { + ret = cs_etm_decoder__get_packet(packet_queue, + tidq->packet); + if (ret <= 0) /* - * Since packet addresses are swapped in packet - * handling within below switch() statements, - * thus setting sample flags must be called - * prior to switch() statement to use address - * information before packets swapping. + * Stop processing this chunk on + * end of data or error */ - ret = cs_etm__set_sample_flags(etmq); - if (ret < 0) - break; - - switch (etmq->packet->sample_type) { - case CS_ETM_RANGE: - /* - * If the packet contains an instruction - * range, generate instruction sequence - * events. - */ - cs_etm__sample(etmq); - break; - case CS_ETM_EXCEPTION: - case CS_ETM_EXCEPTION_RET: - /* - * If the exception packet is coming, - * make sure the previous instruction - * range packet to be handled properly. - */ - cs_etm__exception(etmq); - break; - case CS_ETM_DISCONTINUITY: - /* - * Discontinuity in trace, flush - * previous branch stack - */ - cs_etm__flush(etmq); - break; - case CS_ETM_EMPTY: - /* - * Should not receive empty packet, - * report error. - */ - pr_err("CS ETM Trace: empty packet\n"); - return -EINVAL; - default: - break; - } + break; + + /* + * Since packet addresses are swapped in packet + * handling within below switch() statements, + * thus setting sample flags must be called + * prior to switch() statement to use address + * information before packets swapping. + */ + ret = cs_etm__set_sample_flags(etmq, tidq); + if (ret < 0) + break; + + switch (tidq->packet->sample_type) { + case CS_ETM_RANGE: + /* + * If the packet contains an instruction + * range, generate instruction sequence + * events. + */ + cs_etm__sample(etmq, tidq); + break; + case CS_ETM_EXCEPTION: + case CS_ETM_EXCEPTION_RET: + /* + * If the exception packet is coming, + * make sure the previous instruction + * range packet to be handled properly. + */ + cs_etm__exception(tidq); + break; + case CS_ETM_DISCONTINUITY: + /* + * Discontinuity in trace, flush + * previous branch stack + */ + cs_etm__flush(etmq, tidq); + break; + case CS_ETM_EMPTY: + /* + * Should not receive empty packet, + * report error. + */ + pr_err("CS ETM Trace: empty packet\n"); + return -EINVAL; + default: + break; } + } return ret; } +static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) +{ + int idx; + struct int_node *inode; + struct cs_etm_traceid_queue *tidq; + struct intlist *traceid_queues_list = etmq->traceid_queues_list; + + intlist__for_each_entry(inode, traceid_queues_list) { + idx = (int)(intptr_t)inode->priv; + tidq = etmq->traceid_queues[idx]; + + /* Ignore return value */ + cs_etm__process_traceid_queue(etmq, tidq); + + /* + * Generate an instruction sample with the remaining + * branchstack entries. + */ + cs_etm__flush(etmq, tidq); + } +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { int err = 0; + struct cs_etm_traceid_queue *tidq; + + tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); + if (!tidq) + return -EINVAL; /* Go through each buffer in the queue and decode them one by one */ while (1) { @@ -1626,13 +2009,13 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) * an error occurs other than hoping the next one will * be better. */ - err = cs_etm__process_decoder_queue(etmq); + err = cs_etm__process_traceid_queue(etmq, tidq); } while (etmq->buf_len); if (err == 0) /* Flush any remaining branch stack entries */ - err = cs_etm__end_block(etmq); + err = cs_etm__end_block(etmq, tidq); } return err; @@ -1647,9 +2030,19 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, for (i = 0; i < queues->nr_queues; i++) { struct auxtrace_queue *queue = &etm->queues.queue_array[i]; struct cs_etm_queue *etmq = queue->priv; + struct cs_etm_traceid_queue *tidq; + + if (!etmq) + continue; + + tidq = cs_etm__etmq_get_traceid_queue(etmq, + CS_ETM_PER_THREAD_TRACEID); + + if (!tidq) + continue; - if (etmq && ((tid == -1) || (etmq->tid == tid))) { - cs_etm__set_pid_tid_cpu(etm, queue); + if ((tid == -1) || (tidq->tid == tid)) { + cs_etm__set_pid_tid_cpu(etm, tidq); cs_etm__run_decoder(etmq); } } @@ -1657,6 +2050,164 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, return 0; } +static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) +{ + int ret = 0; + unsigned int cs_queue_nr, queue_nr; + u8 trace_chan_id; + u64 timestamp; + struct auxtrace_queue *queue; + struct cs_etm_queue *etmq; + struct cs_etm_traceid_queue *tidq; + + while (1) { + if (!etm->heap.heap_cnt) + goto out; + + /* Take the entry at the top of the min heap */ + cs_queue_nr = etm->heap.heap_array[0].queue_nr; + queue_nr = TO_QUEUE_NR(cs_queue_nr); + trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); + queue = &etm->queues.queue_array[queue_nr]; + etmq = queue->priv; + + /* + * Remove the top entry from the heap since we are about + * to process it. + */ + auxtrace_heap__pop(&etm->heap); + + tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); + if (!tidq) { + /* + * No traceID queue has been allocated for this traceID, + * which means something somewhere went very wrong. No + * other choice than simply exit. + */ + ret = -EINVAL; + goto out; + } + + /* + * Packets associated with this timestamp are already in + * the etmq's traceID queue, so process them. + */ + ret = cs_etm__process_traceid_queue(etmq, tidq); + if (ret < 0) + goto out; + + /* + * Packets for this timestamp have been processed, time to + * move on to the next timestamp, fetching a new auxtrace_buffer + * if need be. + */ +refetch: + ret = cs_etm__get_data_block(etmq); + if (ret < 0) + goto out; + + /* + * No more auxtrace_buffers to process in this etmq, simply + * move on to another entry in the auxtrace_heap. + */ + if (!ret) + continue; + + ret = cs_etm__decode_data_block(etmq); + if (ret) + goto out; + + timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + + if (!timestamp) { + /* + * Function cs_etm__decode_data_block() returns when + * there is no more traces to decode in the current + * auxtrace_buffer OR when a timestamp has been + * encountered on any of the traceID queues. Since we + * did not get a timestamp, there is no more traces to + * process in this auxtrace_buffer. As such empty and + * flush all traceID queues. + */ + cs_etm__clear_all_traceid_queues(etmq); + + /* Fetch another auxtrace_buffer for this etmq */ + goto refetch; + } + + /* + * Add to the min heap the timestamp for packets that have + * just been decoded. They will be processed and synthesized + * during the next call to cs_etm__process_traceid_queue() for + * this queue/traceID. + */ + cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + } + +out: + return ret; +} + +static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, + union perf_event *event) +{ + struct thread *th; + + if (etm->timeless_decoding) + return 0; + + /* + * Add the tid/pid to the log so that we can get a match when + * we get a contextID from the decoder. + */ + th = machine__findnew_thread(etm->machine, + event->itrace_start.pid, + event->itrace_start.tid); + if (!th) + return -ENOMEM; + + thread__put(th); + + return 0; +} + +static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, + union perf_event *event) +{ + struct thread *th; + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + + /* + * Context switch in per-thread mode are irrelevant since perf + * will start/stop tracing as the process is scheduled. + */ + if (etm->timeless_decoding) + return 0; + + /* + * SWITCH_IN events carry the next process to be switched out while + * SWITCH_OUT events carry the process to be switched in. As such + * we don't care about IN events. + */ + if (!out) + return 0; + + /* + * Add the tid/pid to the log so that we can get a match when + * we get a contextID from the decoder. + */ + th = machine__findnew_thread(etm->machine, + event->context_switch.next_prev_pid, + event->context_switch.next_prev_tid); + if (!th) + return -ENOMEM; + + thread__put(th); + + return 0; +} + static int cs_etm__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -1676,9 +2227,6 @@ static int cs_etm__process_event(struct perf_session *session, return -EINVAL; } - if (!etm->timeless_decoding) - return -EINVAL; - if (sample->time && (sample->time != (u64) -1)) timestamp = sample->time; else @@ -1690,10 +2238,20 @@ static int cs_etm__process_event(struct perf_session *session, return err; } - if (event->header.type == PERF_RECORD_EXIT) + if (etm->timeless_decoding && + event->header.type == PERF_RECORD_EXIT) return cs_etm__process_timeless_queues(etm, event->fork.tid); + if (event->header.type == PERF_RECORD_ITRACE_START) + return cs_etm__process_itrace_start(etm, event); + else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) + return cs_etm__process_switch_cpu_wide(etm, event); + + if (!etm->timeless_decoding && + event->header.type == PERF_RECORD_AUX) + return cs_etm__process_queues(etm); + return 0; } @@ -1980,7 +2538,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, return 0; } - if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + if (session->itrace_synth_opts->set) { etm->synth_opts = *session->itrace_synth_opts; } else { itrace_synth_opts__set_default(&etm->synth_opts, @@ -2010,7 +2568,7 @@ err_free_etm: err_free_metadata: /* No need to check @metadata[j], free(NULL) is supported */ for (j = 0; j < num_cpu; j++) - free(metadata[j]); + zfree(&metadata[j]); zfree(&metadata); err_free_traceid_list: intlist__delete(traceid_list); diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 0e97c196147a..bc848fd095f4 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -9,6 +9,7 @@ #include "util/event.h" #include "util/session.h" +#include <linux/bits.h> /* Versionning header in case things need tro change in the future. That way * decoding of old snapshot is still possible. @@ -97,12 +98,72 @@ enum { CS_ETMV4_EXC_END = 31, }; +enum cs_etm_sample_type { + CS_ETM_EMPTY, + CS_ETM_RANGE, + CS_ETM_DISCONTINUITY, + CS_ETM_EXCEPTION, + CS_ETM_EXCEPTION_RET, +}; + +enum cs_etm_isa { + CS_ETM_ISA_UNKNOWN, + CS_ETM_ISA_A64, + CS_ETM_ISA_A32, + CS_ETM_ISA_T32, +}; + /* RB tree for quick conversion between traceID and metadata pointers */ struct intlist *traceid_list; +struct cs_etm_queue; + +struct cs_etm_packet { + enum cs_etm_sample_type sample_type; + enum cs_etm_isa isa; + u64 start_addr; + u64 end_addr; + u32 instr_count; + u32 last_instr_type; + u32 last_instr_subtype; + u32 flags; + u32 exception_number; + u8 last_instr_cond; + u8 last_instr_taken_branch; + u8 last_instr_size; + u8 trace_chan_id; + int cpu; +}; + +#define CS_ETM_PACKET_MAX_BUFFER 1024 + +/* + * When working with per-thread scenarios the process under trace can + * be scheduled on any CPU and as such, more than one traceID may be + * associated with the same process. Since a traceID of '0' is illegal + * as per the CoreSight architecture, use that specific value to + * identify the queue where all packets (with any traceID) are + * aggregated. + */ +#define CS_ETM_PER_THREAD_TRACEID 0 + +struct cs_etm_packet_queue { + u32 packet_count; + u32 head; + u32 tail; + u32 instr_count; + u64 timestamp; + u64 next_timestamp; + struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER]; +}; + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) +#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL + +#define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) + #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) #define __perf_cs_etmv3_magic 0x3030303030303030ULL @@ -114,6 +175,13 @@ struct intlist *traceid_list; int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, + pid_t tid, u8 trace_chan_id); +bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); +void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, + u8 trace_chan_id); +struct cs_etm_packet_queue +*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id); #else static inline int cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, @@ -127,6 +195,32 @@ static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused, { return -1; } + +static inline int cs_etm__etmq_set_tid( + struct cs_etm_queue *etmq __maybe_unused, + pid_t tid __maybe_unused, + u8 trace_chan_id __maybe_unused) +{ + return -1; +} + +static inline bool cs_etm__etmq_is_timeless( + struct cs_etm_queue *etmq __maybe_unused) +{ + /* What else to return? */ + return true; +} + +static inline void cs_etm__etmq_set_traceid_queue_timestamp( + struct cs_etm_queue *etmq __maybe_unused, + u8 trace_chan_id __maybe_unused) {} + +static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue( + struct cs_etm_queue *etmq __maybe_unused, + u8 trace_chan_id __maybe_unused) +{ + return NULL; +} #endif #endif diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c deleted file mode 100644 index ee4c1e8ed54b..000000000000 --- a/tools/perf/util/ctype.c +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Sane locale-independent, ASCII ctype. - * - * No surprises, and works with signed and unsigned chars. - */ -#include "sane_ctype.h" - -enum { - S = GIT_SPACE, - A = GIT_ALPHA, - D = GIT_DIGIT, - G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ - R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ - P = GIT_PRINT_EXTRA, /* printable - alpha - digit - glob - regex */ - - PS = GIT_SPACE | GIT_PRINT_EXTRA, -}; - -unsigned char sane_ctype[256] = { -/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ - - 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ - PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ - D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ - A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P, /* 80.. 95 */ - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ - A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ - /* Nothing in the 128.. range */ -}; - -const char *graph_line = - "_____________________________________________________________________" - "_____________________________________________________________________" - "_____________________________________________________________________"; -const char *graph_dotted_line = - "---------------------------------------------------------------------" - "---------------------------------------------------------------------" - "---------------------------------------------------------------------"; -const char *spaces = - " " - " " - " "; -const char *dots = - "....................................................................." - "....................................................................." - "....................................................................."; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 26af43ad9ddd..ddbcd59f2d9b 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1,16 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * CTF writing support via babeltrace. * * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com> * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include <errno.h> #include <inttypes.h> #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/zalloc.h> #include <babeltrace/ctf-writer/writer.h> #include <babeltrace/ctf-writer/clock.h> #include <babeltrace/ctf-writer/stream.h> @@ -23,14 +23,13 @@ #include "asm/bug.h" #include "data-convert-bt.h" #include "session.h" -#include "util.h" #include "debug.h" #include "tool.h" #include "evlist.h" #include "evsel.h" #include "machine.h" #include "config.h" -#include "sane_ctype.h" +#include <linux/ctype.h> #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) @@ -271,7 +270,7 @@ static int string_set_value(struct bt_ctf_field *field, const char *string) if (i > 0) strncpy(buffer, string, i); } - strncat(buffer + p, numstr, 4); + memcpy(buffer + p, numstr, 4); p += 3; } } @@ -310,7 +309,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, if (flags & TEP_FIELD_IS_DYNAMIC) { unsigned long long tmp_val; - tmp_val = tep_read_number(fmtf->event->pevent, + tmp_val = tep_read_number(fmtf->event->tep, data + offset, len); offset = tmp_val; len = offset >> 16; @@ -354,7 +353,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, unsigned long long value_int; value_int = tep_read_number( - fmtf->event->pevent, + fmtf->event->tep, data + offset + i * len, len); if (!(flags & TEP_FIELD_IS_SIGNED)) @@ -1354,7 +1353,7 @@ static void free_streams(struct ctf_writer *cw) for (cpu = 0; cpu < cw->stream_cnt; cpu++) ctf_stream__delete(cw->stream[cpu]); - free(cw->stream); + zfree(&cw->stream); } static int ctf_writer__setup_env(struct ctf_writer *cw, diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 6a64f713710d..1d1b97a92c3f 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/zalloc.h> #include <sys/types.h> #include <sys/stat.h> #include <errno.h> @@ -20,7 +21,7 @@ static void close_dir(struct perf_data_file *files, int nr) { while (--nr >= 1) { close(files[nr].fd); - free(files[nr].path); + zfree(&files[nr].path); } free(files); } diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index d7315a00c731..2394c7506abe 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -1,19 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * db-export.c: Support for exporting data suitable for import to a database * Copyright (c) 2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <errno.h> +#include <stdlib.h> #include "evsel.h" #include "machine.h" @@ -22,11 +14,11 @@ #include "symbol.h" #include "map.h" #include "event.h" -#include "util.h" #include "thread-stack.h" #include "callchain.h" #include "call-path.h" #include "db-export.h" +#include <linux/zalloc.h> struct deferred_export { struct list_head node; @@ -42,7 +34,7 @@ static int db_export__deferred(struct db_export *dbe) de = list_entry(dbe->deferred.next, struct deferred_export, node); err = dbe->export_comm(dbe, de->comm); - list_del(&de->node); + list_del_init(&de->node); free(de); if (err) return err; @@ -58,7 +50,7 @@ static void db_export__free_deferred(struct db_export *dbe) while (!list_empty(&dbe->deferred)) { de = list_entry(dbe->deferred.next, struct deferred_export, node); - list_del(&de->node); + list_del_init(&de->node); free(de); } } diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index 4e2424c89df9..e8a64028a386 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * db-export.h: Support for exporting data suitable for import to a database * Copyright (c) 2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef __PERF_DB_EXPORT_H diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 3d6459626c2a..3780fe42453b 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -7,6 +7,7 @@ #include <string.h> #include <stdarg.h> #include <stdio.h> +#include <stdlib.h> #include <sys/wait.h> #include <api/debug.h> #include <linux/time64.h> @@ -21,7 +22,7 @@ #include "util.h" #include "target.h" -#include "sane_ctype.h" +#include <linux/ctype.h> int verbose; bool dump_trace = false, quiet = false; diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index e4c486756053..763328c151e9 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -1,14 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include <sys/types.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> -#include "util.h" #include "debug.h" #include "symbol.h" #include "demangle-java.h" -#include "sane_ctype.h" +#include <linux/ctype.h> +#include <linux/kernel.h> enum { MODE_PREFIX = 0, diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index e059976d9d93..ebc9d46c15a7 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <asm/bug.h> #include <linux/kernel.h> +#include <linux/string.h> +#include <linux/zalloc.h> #include <sys/time.h> #include <sys/resource.h> #include <sys/types.h> @@ -9,6 +11,8 @@ #include <errno.h> #include <fcntl.h> #include <libgen.h> +#include <bpf/libbpf.h> +#include "bpf-event.h" #include "compress.h" #include "namespaces.h" #include "path.h" @@ -18,7 +22,7 @@ #include "dso.h" #include "machine.h" #include "auxtrace.h" -#include "util.h" +#include "util.h" /* O_CLOEXEC for older systems */ #include "debug.h" #include "string2.h" #include "vdso.h" @@ -392,7 +396,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, return -ENOMEM; } - strxfrchar(m->name, '-', '_'); + strreplace(m->name, '-', '_'); } return 0; @@ -430,7 +434,7 @@ static void dso__list_add(struct dso *dso) static void dso__list_del(struct dso *dso) { - list_del(&dso->data.open_entry); + list_del_init(&dso->data.open_entry); WARN_ONCE(dso__data_open_cnt <= 0, "DSO data fd counter out of bounds."); dso__data_open_cnt--; @@ -706,6 +710,44 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) return false; } +static ssize_t bpf_read(struct dso *dso, u64 offset, char *data) +{ + struct bpf_prog_info_node *node; + ssize_t size = DSO__DATA_CACHE_SIZE; + u64 len; + u8 *buf; + + node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id); + if (!node || !node->info_linear) { + dso->data.status = DSO_DATA_STATUS_ERROR; + return -1; + } + + len = node->info_linear->info.jited_prog_len; + buf = (u8 *)(uintptr_t)node->info_linear->info.jited_prog_insns; + + if (offset >= len) + return -1; + + size = (ssize_t)min(len - offset, (u64)size); + memcpy(data, buf + offset, size); + return size; +} + +static int bpf_size(struct dso *dso) +{ + struct bpf_prog_info_node *node; + + node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id); + if (!node || !node->info_linear) { + dso->data.status = DSO_DATA_STATUS_ERROR; + return -1; + } + + dso->data.file_size = node->info_linear->info.jited_prog_len; + return 0; +} + static void dso_cache__free(struct dso *dso) { @@ -794,48 +836,53 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset, return cache_size; } -static ssize_t -dso_cache__read(struct dso *dso, struct machine *machine, - u64 offset, u8 *data, ssize_t size) +static ssize_t file_read(struct dso *dso, struct machine *machine, + u64 offset, char *data) { - struct dso_cache *cache; - struct dso_cache *old; ssize_t ret; - do { - u64 cache_offset; + pthread_mutex_lock(&dso__data_open_lock); - cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); - if (!cache) - return -ENOMEM; + /* + * dso->data.fd might be closed if other thread opened another + * file (dso) due to open file limit (RLIMIT_NOFILE). + */ + try_to_open_dso(dso, machine); - pthread_mutex_lock(&dso__data_open_lock); + if (dso->data.fd < 0) { + dso->data.status = DSO_DATA_STATUS_ERROR; + ret = -errno; + goto out; + } - /* - * dso->data.fd might be closed if other thread opened another - * file (dso) due to open file limit (RLIMIT_NOFILE). - */ - try_to_open_dso(dso, machine); + ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset); +out: + pthread_mutex_unlock(&dso__data_open_lock); + return ret; +} - if (dso->data.fd < 0) { - ret = -errno; - dso->data.status = DSO_DATA_STATUS_ERROR; - break; - } +static ssize_t +dso_cache__read(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) +{ + u64 cache_offset = offset & DSO__DATA_CACHE_MASK; + struct dso_cache *cache; + struct dso_cache *old; + ssize_t ret; - cache_offset = offset & DSO__DATA_CACHE_MASK; + cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); + if (!cache) + return -ENOMEM; - ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset); - if (ret <= 0) - break; + if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) + ret = bpf_read(dso, cache_offset, cache->data); + else + ret = file_read(dso, machine, cache_offset, cache->data); + if (ret > 0) { cache->offset = cache_offset; cache->size = ret; - } while (0); - - pthread_mutex_unlock(&dso__data_open_lock); - if (ret > 0) { old = dso_cache__insert(dso, cache); if (old) { /* we lose the race */ @@ -898,18 +945,12 @@ static ssize_t cached_read(struct dso *dso, struct machine *machine, return r; } -int dso__data_file_size(struct dso *dso, struct machine *machine) +static int file_size(struct dso *dso, struct machine *machine) { int ret = 0; struct stat st; char sbuf[STRERR_BUFSIZE]; - if (dso->data.file_size) - return 0; - - if (dso->data.status == DSO_DATA_STATUS_ERROR) - return -1; - pthread_mutex_lock(&dso__data_open_lock); /* @@ -938,6 +979,20 @@ out: return ret; } +int dso__data_file_size(struct dso *dso, struct machine *machine) +{ + if (dso->data.file_size) + return 0; + + if (dso->data.status == DSO_DATA_STATUS_ERROR) + return -1; + + if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) + return bpf_size(dso); + + return file_size(dso, machine); +} + /** * dso__data_size - Return dso data size * @dso: dso object diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 7eb7de5aee44..03b2de1f5a35 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1,26 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * dwarf-aux.c : libdw auxiliary interfaces - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #include <errno.h> #include <inttypes.h> #include <stdbool.h> -#include "util.h" +#include <stdlib.h> #include "debug.h" #include "dwarf-aux.h" #include "string2.h" diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 8ac53bf1ec4e..0489b0cf8e2c 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _DWARF_AUX_H #define _DWARF_AUX_H /* * dwarf-aux.h : libdw auxiliary interfaces - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #include <dwarf.h> diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6a3eaf7d9353..9909ec40c6d2 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,12 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include "cpumap.h" #include "env.h" -#include "sane_ctype.h" -#include "util.h" +#include <linux/ctype.h> +#include <linux/zalloc.h> #include "bpf-event.h" #include <errno.h> #include <sys/utsname.h> #include <bpf/libbpf.h> +#include <stdlib.h> struct perf_env perf_env; @@ -186,7 +187,7 @@ void perf_env__exit(struct perf_env *env) zfree(&env->caches); for (i = 0; i < env->nr_memory_nodes; i++) - free(env->memory_nodes[i].set); + zfree(&env->memory_nodes[i].set); zfree(&env->memory_nodes); } @@ -246,6 +247,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) for (cpu = 0; cpu < nr_cpus; ++cpu) { env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); + env->cpu[cpu].die_id = cpu_map__get_die_id(cpu); } env->nr_cpus_avail = nr_cpus; @@ -285,9 +287,9 @@ int perf_env__nr_cpus_avail(struct perf_env *env) void cpu_cache_level__free(struct cpu_cache_level *cache) { - free(cache->type); - free(cache->map); - free(cache->size); + zfree(&cache->type); + zfree(&cache->map); + zfree(&cache->size); } /* diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 4f8e2b485c01..d5d9865aa812 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -9,6 +9,7 @@ struct cpu_topology_map { int socket_id; + int die_id; int core_id; }; @@ -49,6 +50,7 @@ struct perf_env { int nr_cmdline; int nr_sibling_cores; + int nr_sibling_dies; int nr_sibling_threads; int nr_numa_nodes; int nr_memory_nodes; @@ -57,11 +59,17 @@ struct perf_env { char *cmdline; const char **cmdline_argv; char *sibling_cores; + char *sibling_dies; char *sibling_threads; char *pmu_mappings; struct cpu_topology_map *cpu; struct cpu_cache_level *caches; int caches_cnt; + u32 comp_ratio; + u32 comp_ver; + u32 comp_type; + u32 comp_level; + u32 comp_mmap_len; struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; @@ -80,6 +88,12 @@ struct perf_env { } bpf_progs; }; +enum perf_compress_type { + PERF_COMP_NONE = 0, + PERF_COMP_ZSTD, + PERF_COMP_MAX +}; + struct bpf_prog_info_node; struct btf_node; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index ba7be74fad6e..f1f4848947ce 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -11,6 +11,7 @@ #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ #include <api/fs/fs.h> #include <linux/perf_event.h> +#include <linux/zalloc.h> #include "event.h" #include "debug.h" #include "hist.h" @@ -20,7 +21,7 @@ #include "strlist.h" #include "thread.h" #include "thread_map.h" -#include "sane_ctype.h" +#include <linux/ctype.h> #include "map.h" #include "symbol.h" #include "symbol/kallsyms.h" @@ -68,6 +69,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE", [PERF_RECORD_TIME_CONV] = "TIME_CONV", [PERF_RECORD_HEADER_FEATURE] = "FEATURE", + [PERF_RECORD_COMPRESSED] = "COMPRESSED", }; static const char *perf_ns__names[] = { @@ -157,9 +159,7 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, if (name) { char *nl; - name += 5; /* strlen("Name:") */ - name = ltrim(name); - + name = skip_spaces(name + 5); /* strlen("Name:") */ nl = strchr(name, '\n'); if (nl) *nl = '\0'; @@ -856,7 +856,7 @@ free_threads: free(synthesize_threads); free_dirent: for (i = 0; i < n; i++) - free(dirent[i]); + zfree(&dirent[i]); free(dirent); return err; @@ -1485,7 +1485,7 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp) { - return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n", + return fprintf(fp, " addr %" PRIx64 " len %u type %u flags 0x%x name %s\n", event->ksymbol_event.addr, event->ksymbol_event.len, event->ksymbol_event.ksym_type, event->ksymbol_event.flags, event->ksymbol_event.name); @@ -1493,7 +1493,7 @@ size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp) size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp) { - return fprintf(fp, " bpf event with type %u, flags %u, id %u\n", + return fprintf(fp, " type %u, flags %u, id %u\n", event->bpf_event.type, event->bpf_event.flags, event->bpf_event.id); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 36ae7e92dab1..1f1da6082806 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -6,6 +6,7 @@ #include <stdio.h> #include <linux/kernel.h> #include <linux/bpf.h> +#include <linux/perf_event.h> #include "../perf.h" #include "build-id.h" @@ -203,6 +204,8 @@ struct perf_sample { u64 period; u64 weight; u64 transaction; + u64 insn_cnt; + u64 cyc_cnt; u32 cpu; u32 raw_size; u64 data_src; @@ -254,6 +257,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_EVENT_UPDATE = 78, PERF_RECORD_TIME_CONV = 79, PERF_RECORD_HEADER_FEATURE = 80, + PERF_RECORD_COMPRESSED = 81, PERF_RECORD_HEADER_MAX }; @@ -626,6 +630,11 @@ struct feature_event { char data[]; }; +struct compressed_event { + struct perf_event_header header; + char data[]; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -659,6 +668,7 @@ union perf_event { struct feature_event feat; struct ksymbol_event ksymbol_event; struct bpf_event bpf_event; + struct compressed_event pack; }; void perf_event__print_totals(void); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 51ead577533f..b0364d923f76 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> * * Parts came from builtin-{top,stat,record}.c, see those files for further * copyright notes. - * - * Released under the GPL v2. (and only v2, not any later version) */ -#include "util.h" #include <api/fs/fs.h> #include <errno.h> #include <inttypes.h> @@ -34,6 +32,7 @@ #include <linux/hash.h> #include <linux/log2.h> #include <linux/err.h> +#include <linux/zalloc.h> #ifdef LACKS_SIGQUEUE_PROTOTYPE int sigqueue(pid_t pid, int sig, const union sigval value); @@ -1009,7 +1008,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite, int nr_cblocks, int affinity) + bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, + int comp_level) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; @@ -1019,7 +1019,8 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity }; + struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, + .comp_level = comp_level }; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); @@ -1051,7 +1052,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS); + return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 6a94785b9100..49354fe24d5f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -177,7 +177,8 @@ unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite, int nr_cblocks, int affinity); + bool auxtrace_overwrite, int nr_cblocks, + int affinity, int flush, int comp_level); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 966360844fff..ebb46da4dfe5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> * * Parts came from builtin-{top,stat,record}.c, see those files for further * copyright notes. - * - * Released under the GPL v2. (and only v2, not any later version) */ #include <byteswap.h> @@ -18,6 +17,7 @@ #include <linux/perf_event.h> #include <linux/compiler.h> #include <linux/err.h> +#include <linux/zalloc.h> #include <sys/ioctl.h> #include <sys/resource.h> #include <sys/types.h> @@ -28,7 +28,6 @@ #include "event.h" #include "evsel.h" #include "evlist.h" -#include "util.h" #include "cpumap.h" #include "thread_map.h" #include "target.h" @@ -36,10 +35,11 @@ #include "debug.h" #include "trace-event.h" #include "stat.h" +#include "string2.h" #include "memswap.h" #include "util/parse-branch-options.h" -#include "sane_ctype.h" +#include <linux/ctype.h> struct perf_missing_features perf_missing_features; @@ -580,10 +580,19 @@ static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size) return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } +static int perf_evsel__tool_name(char *bf, size_t size) +{ + int ret = scnprintf(bf, size, "duration_time"); + return ret; +} + const char *perf_evsel__name(struct perf_evsel *evsel) { char bf[128]; + if (!evsel) + goto out_unknown; + if (evsel->name) return evsel->name; @@ -601,7 +610,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel) break; case PERF_TYPE_SOFTWARE: - perf_evsel__sw_name(evsel, bf, sizeof(bf)); + if (evsel->tool_event) + perf_evsel__tool_name(bf, sizeof(bf)); + else + perf_evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: @@ -620,7 +632,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel) evsel->name = strdup(bf); - return evsel->name ?: "unknown"; + if (evsel->name) + return evsel->name; +out_unknown: + return "unknown"; } const char *perf_evsel__group_name(struct perf_evsel *evsel) @@ -671,6 +686,10 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel, attr->sample_max_stack = param->max_stack; + if (opts->kernel_callchains) + attr->exclude_callchain_user = 1; + if (opts->user_callchains) + attr->exclude_callchain_kernel = 1; if (param->record_mode == CALLCHAIN_LBR) { if (!opts->branch_stack) { if (attr->exclude_user) { @@ -693,7 +712,14 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel, if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); perf_evsel__set_sample_bit(evsel, STACK_USER); - attr->sample_regs_user |= PERF_REGS_MASK; + if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) { + attr->sample_regs_user |= DWARF_MINIMAL_REGS; + pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, " + "specifying a subset with --user-regs may render DWARF unwinding unreliable, " + "so the minimal registers set (IP, SP) is explicitly forced.\n"); + } else { + attr->sample_regs_user |= PERF_REGS_MASK; + } attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; } else { @@ -804,6 +830,8 @@ static void apply_config_terms(struct perf_evsel *evsel, break; case PERF_EVSEL__CONFIG_TERM_DRV_CFG: break; + case PERF_EVSEL__CONFIG_TERM_PERCORE: + break; default: break; } @@ -1126,9 +1154,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { - if (evsel->system_wide) - nthreads = 1; - evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); if (evsel->fd) { @@ -1273,7 +1298,7 @@ static void perf_evsel__free_config_terms(struct perf_evsel *evsel) struct perf_evsel_config_term *term, *h; list_for_each_entry_safe(term, h, &evsel->config_terms, list) { - list_del(&term->list); + list_del_init(&term->list); free(term); } } @@ -1775,14 +1800,8 @@ static int perf_event_open(struct perf_evsel *evsel, if (fd >= 0) break; - /* - * Do quick precise_ip fallback if: - * - there is precise_ip set in perf_event_attr - * - maximum precise is requested - * - sys_perf_event_open failed with ENOTSUP error, - * which is associated with wrong precise_ip - */ - if (!precise_ip || !evsel->precise_max || (errno != ENOTSUP)) + /* Do not try less precise if not requested. */ + if (!evsel->precise_max) break; /* diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0f2c6c93d721..cad54e8ba522 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -50,6 +50,7 @@ enum term_type { PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, + PERF_EVSEL__CONFIG_TERM_PERCORE, }; struct perf_evsel_config_term { @@ -67,6 +68,7 @@ struct perf_evsel_config_term { bool overwrite; char *branch; unsigned long max_events; + bool percore; } val; bool weak; }; @@ -75,6 +77,11 @@ struct perf_stat_evsel; typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data); +enum perf_tool_event { + PERF_TOOL_NONE = 0, + PERF_TOOL_DURATION_TIME = 1, +}; + /** struct perf_evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -121,6 +128,7 @@ struct perf_evsel { unsigned int sample_size; int id_pos; int is_pos; + enum perf_tool_event tool_event; bool uniquified_name; bool snapshot; bool supported; @@ -152,6 +160,7 @@ struct perf_evsel { struct perf_evsel **metric_events; bool collect_stat; bool weak_group; + bool percore; const char *pmu_name; struct { perf_evsel__sb_cb_t *cb; diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index aafbe54fd3fa..7001247ebbd6 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * genelf.c * Copyright (C) 2014, Google, Inc * * Contributed by: * Stephane Eranian <eranian@gmail.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include <sys/types.h> diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c index 40789d8603d0..995e490c17fa 100644 --- a/tools/perf/util/genelf_debug.c +++ b/tools/perf/util/genelf_debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * genelf_debug.c * Copyright (C) 2015, Google, Inc @@ -5,8 +6,6 @@ * Contributed by: * Stephane Eranian <eranian@google.com> * - * Released under the GPL v2. - * * based on GPLv2 source code from Oprofile * @remark Copyright 2007 OProfile authors * @author Philippe Elie diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c index 267aa609a582..01f32f26552d 100644 --- a/tools/perf/util/get_current_dir_name.c +++ b/tools/perf/util/get_current_dir_name.c @@ -1,8 +1,8 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> +// SPDX-License-Identifier: LGPL-2.1 +// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> // #ifndef HAVE_GET_CURRENT_DIR_NAME -#include "util.h" +#include "get_current_dir_name.h" #include <unistd.h> #include <stdlib.h> #include <stdlib.h> diff --git a/tools/perf/util/get_current_dir_name.h b/tools/perf/util/get_current_dir_name.h new file mode 100644 index 000000000000..69f7d5537d32 --- /dev/null +++ b/tools/perf/util/get_current_dir_name.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 +// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> +// +#ifndef __PERF_GET_CURRENT_DIR_NAME_H +#ifndef HAVE_GET_CURRENT_DIR_NAME +char *get_current_dir_name(void); +#endif // HAVE_GET_CURRENT_DIR_NAME +#endif // __PERF_GET_CURRENT_DIR_NAME_H diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2d2af2ac2b1e..c24db7f4909c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <inttypes.h> -#include "util.h" #include "string2.h" #include <sys/param.h> #include <sys/types.h> @@ -13,7 +12,9 @@ #include <linux/list.h> #include <linux/kernel.h> #include <linux/bitops.h> +#include <linux/string.h> #include <linux/stringify.h> +#include <linux/zalloc.h> #include <sys/stat.h> #include <sys/utsname.h> #include <linux/time64.h> @@ -43,7 +44,7 @@ #include "cputopo.h" #include "bpf-event.h" -#include "sane_ctype.h" +#include <linux/ctype.h> /* * magic2 = "PERFILE2" @@ -416,10 +417,8 @@ static int __write_cpudesc(struct feat_fd *ff, const char *cpuinfo_proc) while (*p) { if (isspace(*p)) { char *r = p + 1; - char *q = r; + char *q = skip_spaces(r); *p = ' '; - while (*q && isspace(*q)) - q++; if (q != (p+1)) while ((*r++ = *q++)); } @@ -599,6 +598,27 @@ static int write_cpu_topology(struct feat_fd *ff, if (ret < 0) return ret; } + + if (!tp->die_sib) + goto done; + + ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib)); + if (ret < 0) + goto done; + + for (i = 0; i < tp->die_sib; i++) { + ret = do_write_string(ff, tp->die_siblings[i]); + if (ret < 0) + goto done; + } + + for (j = 0; j < perf_env.nr_cpus_avail; j++) { + ret = do_write(ff, &perf_env.cpu[j].die_id, + sizeof(perf_env.cpu[j].die_id)); + if (ret < 0) + return ret; + } + done: cpu_topology__delete(tp); return ret; @@ -1028,26 +1048,26 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev return -1; cache->type[len] = 0; - cache->type = rtrim(cache->type); + cache->type = strim(cache->type); scnprintf(file, PATH_MAX, "%s/size", path); if (sysfs__read_str(file, &cache->size, &len)) { - free(cache->type); + zfree(&cache->type); return -1; } cache->size[len] = 0; - cache->size = rtrim(cache->size); + cache->size = strim(cache->size); scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); if (sysfs__read_str(file, &cache->map, &len)) { - free(cache->map); - free(cache->type); + zfree(&cache->map); + zfree(&cache->type); return -1; } cache->map[len] = 0; - cache->map = rtrim(cache->map); + cache->map = strim(cache->map); return 0; } @@ -1100,7 +1120,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp) return 0; } -#define MAX_CACHES 2000 +#define MAX_CACHES (MAX_NR_CPUS * 4) static int write_cache(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) @@ -1344,6 +1364,30 @@ out: return ret; } +static int write_compressed(struct feat_fd *ff __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + int ret; + + ret = do_write(ff, &(ff->ph->env.comp_ver), sizeof(ff->ph->env.comp_ver)); + if (ret) + return ret; + + ret = do_write(ff, &(ff->ph->env.comp_type), sizeof(ff->ph->env.comp_type)); + if (ret) + return ret; + + ret = do_write(ff, &(ff->ph->env.comp_level), sizeof(ff->ph->env.comp_level)); + if (ret) + return ret; + + ret = do_write(ff, &(ff->ph->env.comp_ratio), sizeof(ff->ph->env.comp_ratio)); + if (ret) + return ret; + + return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len)); +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1415,10 +1459,20 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) str = ph->env.sibling_cores; for (i = 0; i < nr; i++) { - fprintf(fp, "# sibling cores : %s\n", str); + fprintf(fp, "# sibling sockets : %s\n", str); str += strlen(str) + 1; } + if (ph->env.nr_sibling_dies) { + nr = ph->env.nr_sibling_dies; + str = ph->env.sibling_dies; + + for (i = 0; i < nr; i++) { + fprintf(fp, "# sibling dies : %s\n", str); + str += strlen(str) + 1; + } + } + nr = ph->env.nr_sibling_threads; str = ph->env.sibling_threads; @@ -1427,12 +1481,28 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) str += strlen(str) + 1; } - if (ph->env.cpu != NULL) { - for (i = 0; i < cpu_nr; i++) - fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i, - ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id); - } else - fprintf(fp, "# Core ID and Socket ID information is not available\n"); + if (ph->env.nr_sibling_dies) { + if (ph->env.cpu != NULL) { + for (i = 0; i < cpu_nr; i++) + fprintf(fp, "# CPU %d: Core ID %d, " + "Die ID %d, Socket ID %d\n", + i, ph->env.cpu[i].core_id, + ph->env.cpu[i].die_id, + ph->env.cpu[i].socket_id); + } else + fprintf(fp, "# Core ID, Die ID and Socket ID " + "information is not available\n"); + } else { + if (ph->env.cpu != NULL) { + for (i = 0; i < cpu_nr; i++) + fprintf(fp, "# CPU %d: Core ID %d, " + "Socket ID %d\n", + i, ph->env.cpu[i].core_id, + ph->env.cpu[i].socket_id); + } else + fprintf(fp, "# Core ID and Socket ID " + "information is not available\n"); + } } static void print_clockid(struct feat_fd *ff, FILE *fp) @@ -1688,6 +1758,13 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused) } } +static void print_compressed(struct feat_fd *ff, FILE *fp) +{ + fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n", + ff->ph->env.comp_type == PERF_COMP_ZSTD ? "Zstd" : "Unknown", + ff->ph->env.comp_level, ff->ph->env.comp_ratio); +} + static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { const char *delimiter = "# pmu mappings: "; @@ -2183,6 +2260,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) goto free_cpu; ph->env.cpu[i].core_id = nr; + size += sizeof(u32); if (do_read_u32(ff, &nr)) goto free_cpu; @@ -2194,6 +2272,40 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) } ph->env.cpu[i].socket_id = nr; + size += sizeof(u32); + } + + /* + * The header may be from old perf, + * which doesn't include die information. + */ + if (ff->size <= size) + return 0; + + if (do_read_u32(ff, &nr)) + return -1; + + ph->env.nr_sibling_dies = nr; + size += sizeof(u32); + + for (i = 0; i < nr; i++) { + str = do_read_string(ff); + if (!str) + goto error; + + /* include a NULL character at the end */ + if (strbuf_add(&sb, str, strlen(str) + 1) < 0) + goto error; + size += string_size(str); + free(str); + } + ph->env.sibling_dies = strbuf_detach(&sb, NULL); + + for (i = 0; i < (u32)cpu_nr; i++) { + if (do_read_u32(ff, &nr)) + goto free_cpu; + + ph->env.cpu[i].die_id = nr; } return 0; @@ -2667,6 +2779,27 @@ out: return err; } +static int process_compressed(struct feat_fd *ff, + void *data __maybe_unused) +{ + if (do_read_u32(ff, &(ff->ph->env.comp_ver))) + return -1; + + if (do_read_u32(ff, &(ff->ph->env.comp_type))) + return -1; + + if (do_read_u32(ff, &(ff->ph->env.comp_level))) + return -1; + + if (do_read_u32(ff, &(ff->ph->env.comp_ratio))) + return -1; + + if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len))) + return -1; + + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2730,6 +2863,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(DIR_FORMAT, dir_format, false), FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), FEAT_OPR(BPF_BTF, bpf_btf, false), + FEAT_OPR(COMPRESSED, compressed, false), }; struct header_print_data { @@ -3549,6 +3683,7 @@ int perf_event__synthesize_features(struct perf_tool *tool, return -ENOMEM; ff.size = sz - sz_hdr; + ff.ph = &session->header; for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { if (!feat_ops[feat].synthesize) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 386da49e1bfa..5b3abe4172e2 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -42,6 +42,7 @@ enum { HEADER_DIR_FORMAT, HEADER_BPF_PROG_INFO, HEADER_BPF_BTF, + HEADER_COMPRESSED, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 4f07a5ba5030..ab9e16123626 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -3,9 +3,11 @@ #include "config.h" #include <poll.h> #include <stdio.h> +#include <stdlib.h> #include <subcmd/help.h> #include "../builtin.h" #include "levenshtein.h" +#include <linux/zalloc.h> static int autocorrect; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7ace7a10054d..f24fd1954f6c 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include "callchain.h" -#include "util.h" #include "build-id.h" #include "hist.h" #include "map.h" @@ -20,6 +19,7 @@ #include <inttypes.h> #include <sys/param.h> #include <linux/time64.h> +#include <linux/zalloc.h> static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he); @@ -376,6 +376,24 @@ void hists__delete_entries(struct hists *hists) } } +struct hist_entry *hists__get_entry(struct hists *hists, int idx) +{ + struct rb_node *next = rb_first_cached(&hists->entries); + struct hist_entry *n; + int i = 0; + + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + if (i == idx) + return n; + + next = rb_next(&n->rb_node); + i++; + } + + return NULL; +} + /* * histogram, sorted on item, collects periods */ @@ -454,16 +472,16 @@ static int hist_entry__init(struct hist_entry *he, return 0; err_srcline: - free(he->srcline); + zfree(&he->srcline); err_rawdata: - free(he->raw_data); + zfree(&he->raw_data); err_infos: if (he->branch_info) { map__put(he->branch_info->from.map); map__put(he->branch_info->to.map); - free(he->branch_info); + zfree(&he->branch_info); } if (he->mem_info) { map__put(he->mem_info->iaddr.map); @@ -471,7 +489,7 @@ err_infos: } err: map__zput(he->ms.map); - free(he->stat_acc); + zfree(&he->stat_acc); return -ENOMEM; } @@ -574,6 +592,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, */ mem_info__zput(entry->mem_info); + block_info__zput(entry->block_info); + /* If the map of an existing hist_entry has * become out-of-date due to an exec() or * similar, update it. Otherwise we will @@ -645,6 +665,7 @@ __hists__add_entry(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, + struct block_info *block_info, struct perf_sample *sample, bool sample_self, struct hist_entry_ops *ops) @@ -677,6 +698,7 @@ __hists__add_entry(struct hists *hists, .hists = hists, .branch_info = bi, .mem_info = mi, + .block_info = block_info, .transaction = sample->transaction, .raw_data = sample->raw_data, .raw_size = sample->raw_size, @@ -699,7 +721,7 @@ struct hist_entry *hists__add_entry(struct hists *hists, struct perf_sample *sample, bool sample_self) { - return __hists__add_entry(hists, al, sym_parent, bi, mi, + return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, sample, sample_self, NULL); } @@ -712,10 +734,22 @@ struct hist_entry *hists__add_entry_ops(struct hists *hists, struct perf_sample *sample, bool sample_self) { - return __hists__add_entry(hists, al, sym_parent, bi, mi, + return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, sample, sample_self, ops); } +struct hist_entry *hists__add_entry_block(struct hists *hists, + struct addr_location *al, + struct block_info *block_info) +{ + struct hist_entry entry = { + .block_info = block_info, + .hists = hists, + }, *he = hists__findnew_entry(hists, &entry, al, false); + + return he; +} + static int iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, struct addr_location *al __maybe_unused) @@ -1213,14 +1247,17 @@ void hist_entry__delete(struct hist_entry *he) mem_info__zput(he->mem_info); } + if (he->block_info) + block_info__zput(he->block_info); + zfree(&he->res_samples); zfree(&he->stat_acc); free_srcline(he->srcline); if (he->srcfile && he->srcfile[0]) - free(he->srcfile); + zfree(&he->srcfile); free_callchain(he->callchain); - free(he->trace_output); - free(he->raw_data); + zfree(&he->trace_output); + zfree(&he->raw_data); ops->free(he); } @@ -2561,7 +2598,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh char unit; int printed; const struct dso *dso = hists->dso_filter; - const struct thread *thread = hists->thread_filter; + struct thread *thread = hists->thread_filter; int socket_id = hists->socket_filter; unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; u64 nr_events = hists->stats.total_period; @@ -2704,10 +2741,10 @@ static void hists_evsel__exit(struct perf_evsel *evsel) list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { - list_del(&fmt->list); + list_del_init(&fmt->list); free(fmt); } - list_del(&node->list); + list_del_init(&node->list); free(node); } } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 76ff6c6d03b8..24635f36148d 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -16,6 +16,7 @@ struct addr_location; struct map_symbol; struct mem_info; struct branch_info; +struct block_info; struct symbol; enum hist_filter { @@ -149,6 +150,10 @@ struct hist_entry *hists__add_entry_ops(struct hists *hists, struct perf_sample *sample, bool sample_self); +struct hist_entry *hists__add_entry_block(struct hists *hists, + struct addr_location *al, + struct block_info *bi); + int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg); @@ -178,6 +183,8 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); void hists__delete_entries(struct hists *hists); void hists__output_recalc_col_len(struct hists *hists, int max_rows); +struct hist_entry *hists__get_entry(struct hists *hists, int idx); + u64 hists__total_period(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); @@ -243,6 +250,7 @@ struct perf_hpp { size_t size; const char *sep; void *ptr; + bool skip; }; struct perf_hpp_fmt { diff --git a/tools/perf/util/include/linux/ctype.h b/tools/perf/util/include/linux/ctype.h deleted file mode 100644 index a53d4ee1e0b7..000000000000 --- a/tools/perf/util/include/linux/ctype.h +++ /dev/null @@ -1 +0,0 @@ -#include "../util.h" diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 47025bc727e1..5560e95afdda 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel-bts.c: Intel Processor Trace support * Copyright (c) 2013-2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <endian.h> @@ -21,6 +12,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <linux/zalloc.h> #include "cpumap.h" #include "color.h" @@ -30,7 +22,6 @@ #include "map.h" #include "symbol.h" #include "session.h" -#include "util.h" #include "thread.h" #include "thread-stack.h" #include "debug.h" @@ -900,13 +891,12 @@ int intel_bts_process_auxtrace_info(union perf_event *event, if (dump_trace) return 0; - if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + if (session->itrace_synth_opts->set) { bts->synth_opts = *session->itrace_synth_opts; } else { itrace_synth_opts__set_default(&bts->synth_opts, session->itrace_synth_opts->default_no_sample); - if (session->itrace_synth_opts) - bts->synth_opts.thread_stack = + bts->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; } diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h index ca65e21b3e83..53d5aa02766a 100644 --- a/tools/perf/util/intel-bts.h +++ b/tools/perf/util/intel-bts.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * intel-bts.h: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef INCLUDE__PERF_INTEL_BTS_H__ diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c index 906d94aa0a24..446c0413a27c 100644 --- a/tools/perf/util/intel-pt-decoder/inat.c +++ b/tools/perf/util/intel-pt-decoder/inat.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * x86 instruction attribute tables * * Written by Masami Hiramatsu <mhiramat@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #include "insn.h" diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h index 52dc8d911173..877827b7c2c3 100644 --- a/tools/perf/util/intel-pt-decoder/inat.h +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INAT_H #define _ASM_X86_INAT_H /* * x86 instruction attributes * * Written by Masami Hiramatsu <mhiramat@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #include "inat_types.h" diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h index cb3c20ce39cf..b047efa9ddc2 100644 --- a/tools/perf/util/intel-pt-decoder/inat_types.h +++ b/tools/perf/util/intel-pt-decoder/inat_types.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INAT_TYPES_H #define _ASM_X86_INAT_TYPES_H /* * x86 instruction attributes * * Written by Masami Hiramatsu <mhiramat@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ /* Instruction attributes */ diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c index ca983e2bea8b..82783bf43b74 100644 --- a/tools/perf/util/intel-pt-decoder/insn.c +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * x86 instruction analysis * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h index 2669c9f748e4..37a4c390750b 100644 --- a/tools/perf/util/intel-pt-decoder/insn.h +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -1,22 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _ASM_X86_INSN_H #define _ASM_X86_INSN_H /* * x86 instruction analysis * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) IBM Corporation, 2009 */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 872fab163585..3bfdf2b7a96a 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel_pt_decoder.c: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef _GNU_SOURCE @@ -23,9 +14,9 @@ #include <stdint.h> #include <inttypes.h> #include <linux/compiler.h> +#include <linux/zalloc.h> #include "../cache.h" -#include "../util.h" #include "../auxtrace.h" #include "intel-pt-insn-decoder.h" @@ -58,6 +49,7 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_NO_IP, INTEL_PT_STATE_ERR_RESYNC, INTEL_PT_STATE_IN_SYNC, + INTEL_PT_STATE_TNT_CONT, INTEL_PT_STATE_TNT, INTEL_PT_STATE_TIP, INTEL_PT_STATE_TIP_PGD, @@ -72,8 +64,9 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) case INTEL_PT_STATE_NO_IP: case INTEL_PT_STATE_ERR_RESYNC: case INTEL_PT_STATE_IN_SYNC: - case INTEL_PT_STATE_TNT: + case INTEL_PT_STATE_TNT_CONT: return true; + case INTEL_PT_STATE_TNT: case INTEL_PT_STATE_TIP: case INTEL_PT_STATE_TIP_PGD: case INTEL_PT_STATE_FUP: @@ -102,6 +95,7 @@ struct intel_pt_decoder { uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data); bool (*pgd_ip)(uint64_t ip, void *data); + int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); void *data; struct intel_pt_state state; const unsigned char *buf; @@ -114,6 +108,7 @@ struct intel_pt_decoder { bool have_cyc; bool fixup_last_mtc; bool have_last_ip; + bool in_psb; enum intel_pt_param_flags flags; uint64_t pos; uint64_t last_ip; @@ -122,6 +117,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t buf_timestamp; uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; @@ -137,6 +133,10 @@ struct intel_pt_decoder { int mtc_shift; struct intel_pt_stack stack; enum intel_pt_pkt_state pkt_state; + enum intel_pt_pkt_ctx pkt_ctx; + enum intel_pt_pkt_ctx prev_pkt_ctx; + enum intel_pt_blk_type blk_type; + int blk_type_pos; struct intel_pt_pkt packet; struct intel_pt_pkt tnt; int pkt_step; @@ -158,6 +158,11 @@ struct intel_pt_decoder { uint64_t period_mask; uint64_t period_ticks; uint64_t last_masked_timestamp; + uint64_t tot_cyc_cnt; + uint64_t sample_tot_cyc_cnt; + uint64_t base_cyc_cnt; + uint64_t cyc_cnt_timestamp; + double tsc_to_cyc; bool continuous_period; bool overflow; bool set_fup_tx_flags; @@ -165,6 +170,8 @@ struct intel_pt_decoder { bool set_fup_mwait; bool set_fup_pwre; bool set_fup_exstop; + bool set_fup_bep; + bool sample_cyc; unsigned int fup_tx_flags; unsigned int tx_flags; uint64_t fup_ptw_payload; @@ -224,6 +231,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->get_trace = params->get_trace; decoder->walk_insn = params->walk_insn; decoder->pgd_ip = params->pgd_ip; + decoder->lookahead = params->lookahead; decoder->data = params->data; decoder->return_compression = params->return_compression; decoder->branch_enable = params->branch_enable; @@ -477,7 +485,21 @@ static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) return -EBADMSG; } -static int intel_pt_get_data(struct intel_pt_decoder *decoder) +static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) +{ + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; +} + +static void intel_pt_reposition(struct intel_pt_decoder *decoder) +{ + decoder->ip = 0; + decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->timestamp = 0; + decoder->have_tma = false; +} + +static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition) { struct intel_pt_buffer buffer = { .buf = 0, }; int ret; @@ -494,12 +516,10 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) intel_pt_log("No more data\n"); return -ENODATA; } - if (!buffer.consecutive) { - decoder->ip = 0; - decoder->pkt_state = INTEL_PT_STATE_NO_PSB; + decoder->buf_timestamp = buffer.ref_timestamp; + if (!buffer.consecutive || reposition) { + intel_pt_reposition(decoder); decoder->ref_timestamp = buffer.ref_timestamp; - decoder->timestamp = 0; - decoder->have_tma = false; decoder->state.trace_nr = buffer.trace_nr; intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", decoder->ref_timestamp); @@ -509,10 +529,11 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) return 0; } -static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder, + bool reposition) { if (!decoder->next_buf) - return intel_pt_get_data(decoder); + return intel_pt_get_data(decoder, reposition); decoder->buf = decoder->next_buf; decoder->len = decoder->next_len; @@ -531,7 +552,7 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) len = decoder->len; memcpy(buf, decoder->buf, len); - ret = intel_pt_get_data(decoder); + ret = intel_pt_get_data(decoder, false); if (ret) { decoder->pos += old_len; return ret < 0 ? ret : -EINVAL; @@ -543,7 +564,8 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) memcpy(buf + len, decoder->buf, n); len += n; - ret = intel_pt_get_packet(buf, len, &decoder->packet); + decoder->prev_pkt_ctx = decoder->pkt_ctx; + ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx); if (ret < (int)old_len) { decoder->next_buf = decoder->buf; decoder->next_len = decoder->len; @@ -578,6 +600,7 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, { struct intel_pt_pkt_info pkt_info; const unsigned char *buf = decoder->buf; + enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx; size_t len = decoder->len; int ret; @@ -596,7 +619,8 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, if (!len) return INTEL_PT_NEED_MORE_BYTES; - ret = intel_pt_get_packet(buf, len, &pkt_info.packet); + ret = intel_pt_get_packet(buf, len, &pkt_info.packet, + &pkt_ctx); if (!ret) return INTEL_PT_NEED_MORE_BYTES; if (ret < 0) @@ -671,6 +695,10 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) case INTEL_PT_MNT: case INTEL_PT_PTWRITE: case INTEL_PT_PTWRITE_IP: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: return 0; case INTEL_PT_MTC: @@ -857,13 +885,14 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) decoder->len -= decoder->pkt_step; if (!decoder->len) { - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; } + decoder->prev_pkt_ctx = decoder->pkt_ctx; ret = intel_pt_get_packet(decoder->buf, decoder->len, - &decoder->packet); + &decoder->packet, &decoder->pkt_ctx); if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { ret = intel_pt_get_split_packet(decoder); @@ -888,16 +917,20 @@ static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder) timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; masked_timestamp = timestamp & decoder->period_mask; if (decoder->continuous_period) { - if (masked_timestamp != decoder->last_masked_timestamp) + if (masked_timestamp > decoder->last_masked_timestamp) return 1; } else { timestamp += 1; masked_timestamp = timestamp & decoder->period_mask; - if (masked_timestamp != decoder->last_masked_timestamp) { + if (masked_timestamp > decoder->last_masked_timestamp) { decoder->last_masked_timestamp = masked_timestamp; decoder->continuous_period = true; } } + + if (masked_timestamp < decoder->last_masked_timestamp) + return decoder->period_ticks; + return decoder->period_ticks - (timestamp - masked_timestamp); } @@ -926,7 +959,10 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder) case INTEL_PT_PERIOD_TICKS: timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; masked_timestamp = timestamp & decoder->period_mask; - decoder->last_masked_timestamp = masked_timestamp; + if (masked_timestamp > decoder->last_masked_timestamp) + decoder->last_masked_timestamp = masked_timestamp; + else + decoder->last_masked_timestamp += decoder->period_ticks; break; case INTEL_PT_PERIOD_NONE: case INTEL_PT_PERIOD_MTC: @@ -1094,6 +1130,14 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) decoder->state.to_ip = 0; ret = true; } + if (decoder->set_fup_bep) { + decoder->set_fup_bep = false; + decoder->state.type |= INTEL_PT_BLK_ITEMS; + decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + ret = true; + } return ret; } @@ -1254,7 +1298,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) return -ENOENT; } decoder->tnt.count -= 1; - if (!decoder->tnt.count) + if (decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_TNT_CONT; + else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->tnt.payload <<= 1; decoder->state.from_ip = decoder->ip; @@ -1285,7 +1331,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { decoder->tnt.count -= 1; - if (!decoder->tnt.count) + if (decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_TNT_CONT; + else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; if (decoder->tnt.payload & BIT63) { decoder->tnt.payload <<= 1; @@ -1304,9 +1352,12 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) decoder->ip += intel_pt_insn.length; return 0; } + decoder->sample_cyc = false; decoder->ip += intel_pt_insn.length; - if (!decoder->tnt.count) + if (!decoder->tnt.count) { + intel_pt_update_sample_time(decoder); return -EAGAIN; + } decoder->tnt.payload <<= 1; continue; } @@ -1338,6 +1389,21 @@ static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip) return 0; } +static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp) +{ + timestamp |= (ref_timestamp & (0xffULL << 56)); + + if (timestamp < ref_timestamp) { + if (ref_timestamp - timestamp > (1ULL << 55)) + timestamp += (1ULL << 56); + } else { + if (timestamp - ref_timestamp > (1ULL << 55)) + timestamp -= (1ULL << 56); + } + + return timestamp; +} + static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) { uint64_t timestamp; @@ -1345,15 +1411,8 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) decoder->have_tma = false; if (decoder->ref_timestamp) { - timestamp = decoder->packet.payload | - (decoder->ref_timestamp & (0xffULL << 56)); - if (timestamp < decoder->ref_timestamp) { - if (decoder->ref_timestamp - timestamp > (1ULL << 55)) - timestamp += (1ULL << 56); - } else { - if (timestamp - decoder->ref_timestamp > (1ULL << 55)) - timestamp -= (1ULL << 56); - } + timestamp = intel_pt_8b_tsc(decoder->packet.payload, + decoder->ref_timestamp); decoder->tsc_timestamp = timestamp; decoder->timestamp = timestamp; decoder->ref_timestamp = 0; @@ -1397,6 +1456,42 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) return -EOVERFLOW; } +static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder) +{ + if (decoder->have_cyc) + return; + + decoder->cyc_cnt_timestamp = decoder->timestamp; + decoder->base_cyc_cnt = decoder->tot_cyc_cnt; +} + +static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder) +{ + decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp; + + if (decoder->pge) + intel_pt_mtc_cyc_cnt_pge(decoder); +} + +static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder) +{ + uint64_t tot_cyc_cnt, tsc_delta; + + if (decoder->have_cyc) + return; + + decoder->sample_cyc = true; + + if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp) + return; + + tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp; + tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt; + + if (tot_cyc_cnt > decoder->tot_cyc_cnt) + decoder->tot_cyc_cnt = tot_cyc_cnt; +} + static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) { uint32_t ctc = decoder->packet.payload; @@ -1406,6 +1501,11 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) if (!decoder->tsc_ctc_ratio_d) return; + if (decoder->pge && !decoder->in_psb) + intel_pt_mtc_cyc_cnt_pge(decoder); + else + intel_pt_mtc_cyc_cnt_upd(decoder); + decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; decoder->ctc_timestamp = decoder->tsc_timestamp - fc; if (decoder->tsc_ctc_mult) { @@ -1461,6 +1561,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) else decoder->timestamp = timestamp; + intel_pt_mtc_cyc_cnt_upd(decoder); + decoder->timestamp_insn_cnt = 0; decoder->last_mtc = mtc; @@ -1485,6 +1587,8 @@ static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) decoder->cbr = cbr; decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + + intel_pt_mtc_cyc_cnt_cbr(decoder); } static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) @@ -1494,6 +1598,9 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) decoder->have_cyc = true; decoder->cycle_cnt += decoder->packet.payload; + if (decoder->pge) + decoder->tot_cyc_cnt += decoder->packet.payload; + decoder->sample_cyc = true; if (!decoder->cyc_ref_timestamp) return; @@ -1516,19 +1623,62 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) intel_pt_log_to("Setting timestamp", decoder->timestamp); } +static void intel_pt_bbp(struct intel_pt_decoder *decoder) +{ + if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) { + memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask)); + decoder->state.items.is_32_bit = false; + } + decoder->blk_type = decoder->packet.payload; + decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type); + if (decoder->blk_type == INTEL_PT_GP_REGS) + decoder->state.items.is_32_bit = decoder->packet.count; + if (decoder->blk_type_pos < 0) { + intel_pt_log("WARNING: Unknown block type %u\n", + decoder->blk_type); + } else if (decoder->state.items.mask[decoder->blk_type_pos]) { + intel_pt_log("WARNING: Duplicate block type %u\n", + decoder->blk_type); + } +} + +static void intel_pt_bip(struct intel_pt_decoder *decoder) +{ + uint32_t id = decoder->packet.count; + uint32_t bit = 1 << id; + int pos = decoder->blk_type_pos; + + if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) { + intel_pt_log("WARNING: Unknown block item %u type %d\n", + id, decoder->blk_type); + return; + } + + if (decoder->state.items.mask[pos] & bit) { + intel_pt_log("WARNING: Duplicate block item %u type %d\n", + id, decoder->blk_type); + } + + decoder->state.items.mask[pos] |= bit; + decoder->state.items.val[pos][id] = decoder->packet.payload; +} + /* Walk PSB+ packets when already in sync. */ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) { int err; + decoder->in_psb = true; + while (1) { err = intel_pt_get_next_packet(decoder); if (err) - return err; + goto out; switch (decoder->packet.type) { case INTEL_PT_PSBEND: - return 0; + err = 0; + goto out; case INTEL_PT_TIP_PGD: case INTEL_PT_TIP_PGE: @@ -1544,12 +1694,18 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_MWAIT: case INTEL_PT_PWRE: case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); - return -EAGAIN; + err = -EAGAIN; + goto out; case INTEL_PT_OVF: - return intel_pt_overflow(decoder); + err = intel_pt_overflow(decoder); + goto out; case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); @@ -1595,6 +1751,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; } } +out: + decoder->in_psb = false; + + return err; } static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) @@ -1631,6 +1791,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_MWAIT: case INTEL_PT_PWRE: case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: intel_pt_log("ERROR: Missing TIP after FUP\n"); decoder->pkt_state = INTEL_PT_STATE_ERR3; decoder->pkt_step = 0; @@ -1668,6 +1832,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) decoder->state.to_ip = decoder->ip; } decoder->state.type |= INTEL_PT_TRACE_BEGIN; + intel_pt_mtc_cyc_cnt_pge(decoder); return 0; case INTEL_PT_TIP: @@ -1738,6 +1903,7 @@ next: case INTEL_PT_TIP_PGE: { decoder->pge = true; + intel_pt_mtc_cyc_cnt_pge(decoder); if (decoder->packet.count == 0) { intel_pt_log_at("Skipping zero TIP.PGE", decoder->pos); @@ -1809,6 +1975,13 @@ next: goto next; if (err) return err; + /* + * PSB+ CBR will not have changed but cater for the + * possibility of another CBR change that gets caught up + * in the PSB+. + */ + if (decoder->cbr != decoder->cbr_seen) + return 0; break; case INTEL_PT_PIP: @@ -1849,16 +2022,8 @@ next: case INTEL_PT_CBR: intel_pt_calc_cbr(decoder); - if (!decoder->branch_enable && - decoder->cbr != decoder->cbr_seen) { - decoder->cbr_seen = decoder->cbr; - decoder->state.type = INTEL_PT_CBR_CHG; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - decoder->state.cbr_payload = - decoder->packet.payload; + if (decoder->cbr != decoder->cbr_seen) return 0; - } break; case INTEL_PT_MODE_EXEC: @@ -1950,6 +2115,33 @@ next: decoder->state.pwrx_payload = decoder->packet.payload; return 0; + case INTEL_PT_BBP: + intel_pt_bbp(decoder); + break; + + case INTEL_PT_BIP: + intel_pt_bip(decoder); + break; + + case INTEL_PT_BEP: + decoder->state.type = INTEL_PT_BLK_ITEMS; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return 0; + + case INTEL_PT_BEP_IP: + err = intel_pt_get_next_packet(decoder); + if (err) + return err; + if (decoder->packet.type == INTEL_PT_FUP) { + decoder->set_fup_bep = true; + no_tip = true; + } else { + intel_pt_log_at("ERROR: Missing FUP after BEP", + decoder->pos); + } + goto next; + default: return intel_pt_bug(decoder); } @@ -1968,10 +2160,12 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) { int err; + decoder->in_psb = true; + while (1) { err = intel_pt_get_next_packet(decoder); if (err) - return err; + goto out; switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: @@ -1986,8 +2180,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) case INTEL_PT_MWAIT: case INTEL_PT_PWRE: case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: intel_pt_log("ERROR: Unexpected packet\n"); - return -ENOENT; + err = -ENOENT; + goto out; case INTEL_PT_FUP: decoder->pge = true; @@ -2046,16 +2245,20 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_ERR4; else decoder->pkt_state = INTEL_PT_STATE_ERR3; - return -ENOENT; + err = -ENOENT; + goto out; case INTEL_PT_BAD: /* Does not happen */ - return intel_pt_bug(decoder); + err = intel_pt_bug(decoder); + goto out; case INTEL_PT_OVF: - return intel_pt_overflow(decoder); + err = intel_pt_overflow(decoder); + goto out; case INTEL_PT_PSBEND: - return 0; + err = 0; + goto out; case INTEL_PT_PSB: case INTEL_PT_VMCS: @@ -2065,6 +2268,10 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) break; } } +out: + decoder->in_psb = false; + + return err; } static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) @@ -2079,18 +2286,30 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) switch (decoder->packet.type) { case INTEL_PT_TIP_PGD: decoder->continuous_period = false; - __fallthrough; + decoder->pge = false; + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (!decoder->ip) + break; + decoder->state.type |= INTEL_PT_TRACE_END; + return 0; + case INTEL_PT_TIP_PGE: + decoder->pge = true; + intel_pt_mtc_cyc_cnt_pge(decoder); + if (intel_pt_have_ip(decoder)) + intel_pt_set_ip(decoder); + if (!decoder->ip) + break; + decoder->state.type |= INTEL_PT_TRACE_BEGIN; + return 0; + case INTEL_PT_TIP: - decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; + decoder->pge = true; if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (!decoder->ip) break; - if (decoder->packet.type == INTEL_PT_TIP_PGE) - decoder->state.type |= INTEL_PT_TRACE_BEGIN; - if (decoder->packet.type == INTEL_PT_TIP_PGD) - decoder->state.type |= INTEL_PT_TRACE_END; return 0; case INTEL_PT_FUP: @@ -2171,6 +2390,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_MWAIT: case INTEL_PT_PWRE: case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: default: break; } @@ -2186,6 +2409,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) decoder->set_fup_mwait = false; decoder->set_fup_pwre = false; decoder->set_fup_exstop = false; + decoder->set_fup_bep = false; if (!decoder->branch_enable) { decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; @@ -2243,7 +2467,7 @@ static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder, decoder->pos += decoder->len; decoder->len = 0; - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; @@ -2269,7 +2493,7 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder) intel_pt_log("Scanning for PSB\n"); while (1) { if (!decoder->len) { - ret = intel_pt_get_next_data(decoder); + ret = intel_pt_get_next_data(decoder, false); if (ret) return ret; } @@ -2365,6 +2589,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_walk_trace(decoder); break; case INTEL_PT_STATE_TNT: + case INTEL_PT_STATE_TNT_CONT: err = intel_pt_walk_tnt(decoder); if (err == -EAGAIN) err = intel_pt_walk_trace(decoder); @@ -2396,18 +2621,24 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) if (err) { decoder->state.err = intel_pt_ext_err(err); decoder->state.from_ip = decoder->ip; - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + intel_pt_update_sample_time(decoder); + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; } else { decoder->state.err = 0; - if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { + if (decoder->cbr != decoder->cbr_seen) { decoder->cbr_seen = decoder->cbr; + if (!decoder->state.type) { + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + } decoder->state.type |= INTEL_PT_CBR_CHG; decoder->state.cbr_payload = decoder->cbr_payload; + decoder->state.cbr = decoder->cbr; } if (intel_pt_sample_time(decoder->pkt_state)) { - decoder->sample_timestamp = decoder->timestamp; - decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + intel_pt_update_sample_time(decoder); + if (decoder->sample_cyc) + decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; } } @@ -2415,6 +2646,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; + decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt; return &decoder->state; } @@ -2518,11 +2750,12 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc, size_t *rem) { + enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX; struct intel_pt_pkt packet; int ret; while (len) { - ret = intel_pt_get_packet(buf, len, &packet); + ret = intel_pt_get_packet(buf, len, &packet, &ctx); if (ret <= 0) return false; if (packet.type == INTEL_PT_TSC) { @@ -2724,3 +2957,131 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, return buf_b; /* No overlap */ } } + +/** + * struct fast_forward_data - data used by intel_pt_ff_cb(). + * @timestamp: timestamp to fast forward towards + * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than + * the fast forward timestamp. + */ +struct fast_forward_data { + uint64_t timestamp; + uint64_t buf_timestamp; +}; + +/** + * intel_pt_ff_cb - fast forward lookahead callback. + * @buffer: Intel PT trace buffer + * @data: opaque pointer to fast forward data (struct fast_forward_data) + * + * Determine if @buffer trace is past the fast forward timestamp. + * + * Return: 1 (stop lookahead) if @buffer trace is past the fast forward + * timestamp, and 0 otherwise. + */ +static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data) +{ + struct fast_forward_data *d = data; + unsigned char *buf; + uint64_t tsc; + size_t rem; + size_t len; + + buf = (unsigned char *)buffer->buf; + len = buffer->len; + + if (!intel_pt_next_psb(&buf, &len) || + !intel_pt_next_tsc(buf, len, &tsc, &rem)) + return 0; + + tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp); + + intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n", + tsc, buffer->ref_timestamp); + + /* + * If the buffer contains a timestamp earlier that the fast forward + * timestamp, then record it, else stop. + */ + if (tsc < d->timestamp) + d->buf_timestamp = buffer->ref_timestamp; + else + return 1; + + return 0; +} + +/** + * intel_pt_fast_forward - reposition decoder forwards. + * @decoder: Intel PT decoder + * @timestamp: timestamp to fast forward towards + * + * Reposition decoder at the last PSB with a timestamp earlier than @timestamp. + * + * Return: 0 on success or negative error code on failure. + */ +int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp) +{ + struct fast_forward_data d = { .timestamp = timestamp }; + unsigned char *buf; + size_t len; + int err; + + intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp); + + /* Find buffer timestamp of buffer to fast forward to */ + err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d); + if (err < 0) + return err; + + /* Walk to buffer with same buffer timestamp */ + if (d.buf_timestamp) { + do { + decoder->pos += decoder->len; + decoder->len = 0; + err = intel_pt_get_next_data(decoder, true); + /* -ENOLINK means non-consecutive trace */ + if (err && err != -ENOLINK) + return err; + } while (decoder->buf_timestamp != d.buf_timestamp); + } + + if (!decoder->buf) + return 0; + + buf = (unsigned char *)decoder->buf; + len = decoder->len; + + if (!intel_pt_next_psb(&buf, &len)) + return 0; + + /* + * Walk PSBs while the PSB timestamp is less than the fast forward + * timestamp. + */ + do { + uint64_t tsc; + size_t rem; + + if (!intel_pt_next_tsc(buf, len, &tsc, &rem)) + break; + tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp); + /* + * A TSC packet can slip past MTC packets but, after fast + * forward, decoding starts at the TSC timestamp. That means + * the timestamps may not be exactly the same as the timestamps + * that would have been decoded without fast forward. + */ + if (tsc < timestamp) { + intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc); + decoder->pos += decoder->len - len; + decoder->buf = buf; + decoder->len = len; + intel_pt_reposition(decoder); + } else { + break; + } + } while (intel_pt_step_psb(&buf, &len)); + + return 0; +} diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index ed088d4726ba..e289e463d635 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * intel_pt_decoder.h: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef INCLUDE__INTEL_PT_DECODER_H__ @@ -39,6 +30,7 @@ enum intel_pt_sample_type { INTEL_PT_CBR_CHG = 1 << 8, INTEL_PT_TRACE_BEGIN = 1 << 9, INTEL_PT_TRACE_END = 1 << 10, + INTEL_PT_BLK_ITEMS = 1 << 11, }; enum intel_pt_period_type { @@ -70,6 +62,141 @@ enum intel_pt_param_flags { INTEL_PT_FUP_WITH_NLIP = 1 << 0, }; +enum intel_pt_blk_type { + INTEL_PT_GP_REGS = 1, + INTEL_PT_PEBS_BASIC = 4, + INTEL_PT_PEBS_MEM = 5, + INTEL_PT_LBR_0 = 8, + INTEL_PT_LBR_1 = 9, + INTEL_PT_LBR_2 = 10, + INTEL_PT_XMM = 16, + INTEL_PT_BLK_TYPE_MAX +}; + +/* + * The block type numbers are not sequential but here they are given sequential + * positions to avoid wasting space for array placement. + */ +enum intel_pt_blk_type_pos { + INTEL_PT_GP_REGS_POS, + INTEL_PT_PEBS_BASIC_POS, + INTEL_PT_PEBS_MEM_POS, + INTEL_PT_LBR_0_POS, + INTEL_PT_LBR_1_POS, + INTEL_PT_LBR_2_POS, + INTEL_PT_XMM_POS, + INTEL_PT_BLK_TYPE_CNT +}; + +/* Get the array position for a block type */ +static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type) +{ +#define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1 + const int map[INTEL_PT_BLK_TYPE_MAX] = { + BLK_TYPE(GP_REGS), + BLK_TYPE(PEBS_BASIC), + BLK_TYPE(PEBS_MEM), + BLK_TYPE(LBR_0), + BLK_TYPE(LBR_1), + BLK_TYPE(LBR_2), + BLK_TYPE(XMM), + }; +#undef BLK_TYPE + + return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1; +} + +#define INTEL_PT_BLK_ITEM_ID_CNT 32 + +/* + * Use unions so that the block items can be accessed by name or by array index. + * There is an array of 32-bit masks for each block type, which indicate which + * values are present. Then arrays of 32 64-bit values for each block type. + */ +struct intel_pt_blk_items { + union { + uint32_t mask[INTEL_PT_BLK_TYPE_CNT]; + struct { + uint32_t has_rflags:1; + uint32_t has_rip:1; + uint32_t has_rax:1; + uint32_t has_rcx:1; + uint32_t has_rdx:1; + uint32_t has_rbx:1; + uint32_t has_rsp:1; + uint32_t has_rbp:1; + uint32_t has_rsi:1; + uint32_t has_rdi:1; + uint32_t has_r8:1; + uint32_t has_r9:1; + uint32_t has_r10:1; + uint32_t has_r11:1; + uint32_t has_r12:1; + uint32_t has_r13:1; + uint32_t has_r14:1; + uint32_t has_r15:1; + uint32_t has_unused_0:14; + uint32_t has_ip:1; + uint32_t has_applicable_counters:1; + uint32_t has_timestamp:1; + uint32_t has_unused_1:29; + uint32_t has_mem_access_address:1; + uint32_t has_mem_aux_info:1; + uint32_t has_mem_access_latency:1; + uint32_t has_tsx_aux_info:1; + uint32_t has_unused_2:28; + uint32_t has_lbr_0; + uint32_t has_lbr_1; + uint32_t has_lbr_2; + uint32_t has_xmm; + }; + }; + union { + uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT]; + struct { + struct { + uint64_t rflags; + uint64_t rip; + uint64_t rax; + uint64_t rcx; + uint64_t rdx; + uint64_t rbx; + uint64_t rsp; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18]; + }; + struct { + uint64_t ip; + uint64_t applicable_counters; + uint64_t timestamp; + uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3]; + }; + struct { + uint64_t mem_access_address; + uint64_t mem_aux_info; + uint64_t mem_access_latency; + uint64_t tsx_aux_info; + uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4]; + }; + uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT]; + uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT]; + uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT]; + uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT]; + }; + }; + bool is_32_bit; +}; + struct intel_pt_state { enum intel_pt_sample_type type; int err; @@ -77,6 +204,7 @@ struct intel_pt_state { uint64_t to_ip; uint64_t cr3; uint64_t tot_insn_cnt; + uint64_t tot_cyc_cnt; uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; @@ -85,10 +213,12 @@ struct intel_pt_state { uint64_t pwre_payload; uint64_t pwrx_payload; uint64_t cbr_payload; + uint32_t cbr; uint32_t flags; enum intel_pt_insn_op insn_op; int insn_len; char insn[INTEL_PT_INSN_BUF_SZ]; + struct intel_pt_blk_items items; }; struct intel_pt_insn; @@ -101,12 +231,15 @@ struct intel_pt_buffer { uint64_t trace_nr; }; +typedef int (*intel_pt_lookahead_cb_t)(struct intel_pt_buffer *, void *); + struct intel_pt_params { int (*get_trace)(struct intel_pt_buffer *buffer, void *data); int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, void *data); bool (*pgd_ip)(uint64_t ip, void *data); + int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); void *data; bool return_compression; bool branch_enable; @@ -126,6 +259,8 @@ void intel_pt_decoder_free(struct intel_pt_decoder *decoder); const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); +int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp); + unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, bool have_tsc, bool *consecutive); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 1c0e289f01e6..598f56be9f17 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel_pt_insn_decoder.c: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <stdio.h> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h index 37ec5627ae9b..95a1eb0141ff 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * intel_pt_insn_decoder.h: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index 5e64da270f97..09feb5b07d32 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel_pt_log.c: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <stdio.h> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index cc084937f701..388661f89c44 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * intel_pt_log.h: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef INCLUDE__INTEL_PT_LOG_H__ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index d426761a549d..0ccf10a0bf44 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel_pt_pkt_decoder.c: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <stdio.h> @@ -71,6 +62,10 @@ static const char * const packet_name[] = { [INTEL_PT_MWAIT] = "MWAIT", [INTEL_PT_PWRE] = "PWRE", [INTEL_PT_PWRX] = "PWRX", + [INTEL_PT_BBP] = "BBP", + [INTEL_PT_BIP] = "BIP", + [INTEL_PT_BEP] = "BEP", + [INTEL_PT_BEP_IP] = "BEP", }; const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) @@ -289,6 +284,55 @@ static int intel_pt_get_pwrx(const unsigned char *buf, size_t len, return 7; } +static int intel_pt_get_bbp(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 3) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_BBP; + packet->count = buf[2] >> 7; + packet->payload = buf[2] & 0x1f; + return 3; +} + +static int intel_pt_get_bip_4(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 5) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_BIP; + packet->count = buf[0] >> 3; + memcpy_le64(&packet->payload, buf + 1, 4); + return 5; +} + +static int intel_pt_get_bip_8(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 9) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_BIP; + packet->count = buf[0] >> 3; + memcpy_le64(&packet->payload, buf + 1, 8); + return 9; +} + +static int intel_pt_get_bep(size_t len, struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_BEP; + return 2; +} + +static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_BEP_IP; + return 2; +} + static int intel_pt_get_ext(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { @@ -329,6 +373,12 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len, return intel_pt_get_pwre(buf, len, packet); case 0xA2: /* PWRX */ return intel_pt_get_pwrx(buf, len, packet); + case 0x63: /* BBP */ + return intel_pt_get_bbp(buf, len, packet); + case 0x33: /* BEP no IP */ + return intel_pt_get_bep(len, packet); + case 0xb3: /* BEP with IP */ + return intel_pt_get_bep_ip(len, packet); default: return INTEL_PT_BAD_PACKET; } @@ -477,7 +527,8 @@ static int intel_pt_get_mtc(const unsigned char *buf, size_t len, } static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, - struct intel_pt_pkt *packet) + struct intel_pt_pkt *packet, + enum intel_pt_pkt_ctx ctx) { unsigned int byte; @@ -487,6 +538,22 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, return INTEL_PT_NEED_MORE_BYTES; byte = buf[0]; + + switch (ctx) { + case INTEL_PT_NO_CTX: + break; + case INTEL_PT_BLK_4_CTX: + if ((byte & 0x7) == 4) + return intel_pt_get_bip_4(buf, len, packet); + break; + case INTEL_PT_BLK_8_CTX: + if ((byte & 0x7) == 4) + return intel_pt_get_bip_8(buf, len, packet); + break; + default: + break; + }; + if (!(byte & BIT(0))) { if (byte == 0) return intel_pt_get_pad(packet); @@ -525,15 +592,65 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, } } +void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet, + enum intel_pt_pkt_ctx *ctx) +{ + switch (packet->type) { + case INTEL_PT_BAD: + case INTEL_PT_PAD: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_MTC: + case INTEL_PT_FUP: + case INTEL_PT_CYC: + case INTEL_PT_CBR: + case INTEL_PT_MNT: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: + case INTEL_PT_BIP: + break; + case INTEL_PT_TNT: + case INTEL_PT_TIP: + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_PIP: + case INTEL_PT_OVF: + case INTEL_PT_VMCS: + case INTEL_PT_TRACESTOP: + case INTEL_PT_PSB: + case INTEL_PT_PSBEND: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: + *ctx = INTEL_PT_NO_CTX; + break; + case INTEL_PT_BBP: + if (packet->count) + *ctx = INTEL_PT_BLK_4_CTX; + else + *ctx = INTEL_PT_BLK_8_CTX; + break; + default: + break; + } +} + int intel_pt_get_packet(const unsigned char *buf, size_t len, - struct intel_pt_pkt *packet) + struct intel_pt_pkt *packet, enum intel_pt_pkt_ctx *ctx) { int ret; - ret = intel_pt_do_get_packet(buf, len, packet); + ret = intel_pt_do_get_packet(buf, len, packet, *ctx); if (ret > 0) { while (ret < 8 && len > (size_t)ret && !buf[ret]) ret += 1; + intel_pt_upd_pkt_ctx(packet, ctx); } return ret; } @@ -611,8 +728,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload); case INTEL_PT_PTWRITE_IP: return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload); + case INTEL_PT_BEP: case INTEL_PT_EXSTOP: return snprintf(buf, buf_len, "%s IP:0", name); + case INTEL_PT_BEP_IP: case INTEL_PT_EXSTOP_IP: return snprintf(buf, buf_len, "%s IP:1", name); case INTEL_PT_MWAIT: @@ -630,6 +749,12 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, (unsigned int)((payload >> 4) & 0xf), (unsigned int)(payload & 0xf), (unsigned int)((payload >> 8) & 0xf)); + case INTEL_PT_BBP: + return snprintf(buf, buf_len, "%s SZ %s-byte Type 0x%llx", + name, packet->count ? "4" : "8", payload); + case INTEL_PT_BIP: + return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx", + name, packet->count, payload); default: break; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h index 73ddc3a88d07..17ca9b56d72f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * intel_pt_pkt_decoder.h: Intel Processor Trace support * Copyright (c) 2013-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__ @@ -59,6 +50,10 @@ enum intel_pt_pkt_type { INTEL_PT_MWAIT, INTEL_PT_PWRE, INTEL_PT_PWRX, + INTEL_PT_BBP, + INTEL_PT_BIP, + INTEL_PT_BEP, + INTEL_PT_BEP_IP, }; struct intel_pt_pkt { @@ -67,10 +62,25 @@ struct intel_pt_pkt { uint64_t payload; }; +/* + * Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP + * packets only occur in the context of a block (i.e. between BBP and BEP), that + * context must be recorded and passed to the packet decoder. + */ +enum intel_pt_pkt_ctx { + INTEL_PT_NO_CTX, /* BIP packets are invalid */ + INTEL_PT_BLK_4_CTX, /* 4-byte BIP packets */ + INTEL_PT_BLK_8_CTX, /* 8-byte BIP packets */ +}; + const char *intel_pt_pkt_name(enum intel_pt_pkt_type); int intel_pt_get_packet(const unsigned char *buf, size_t len, - struct intel_pt_pkt *packet); + struct intel_pt_pkt *packet, + enum intel_pt_pkt_ctx *ctx); + +void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet, + enum intel_pt_pkt_ctx *ctx); int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 6d288237887b..df061599fef4 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * intel_pt.c: Intel Processor Trace support * Copyright (c) 2013-2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <inttypes.h> @@ -19,6 +10,7 @@ #include <errno.h> #include <linux/kernel.h> #include <linux/types.h> +#include <linux/zalloc.h> #include "../perf.h" #include "session.h" @@ -31,7 +23,6 @@ #include "evsel.h" #include "map.h" #include "color.h" -#include "util.h" #include "thread.h" #include "thread-stack.h" #include "symbol.h" @@ -42,6 +33,9 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "time-utils.h" + +#include "../arch/x86/include/uapi/asm/perf_regs.h" #include "intel-pt-decoder/intel-pt-log.h" #include "intel-pt-decoder/intel-pt-decoder.h" @@ -50,6 +44,11 @@ #define MAX_TIMESTAMP (~0ULL) +struct range { + u64 start; + u64 end; +}; + struct intel_pt { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -104,6 +103,9 @@ struct intel_pt { u64 pwrx_id; u64 cbr_id; + bool sample_pebs; + struct perf_evsel *pebs_evsel; + u64 tsc_bit; u64 mtc_bit; u64 mtc_freq_bits; @@ -118,6 +120,9 @@ struct intel_pt { char *filter; struct addr_filters filts; + + struct range *time_ranges; + unsigned int range_cnt; }; enum switch_state { @@ -154,9 +159,19 @@ struct intel_pt_queue { bool have_sample; u64 time; u64 timestamp; + u64 sel_timestamp; + bool sel_start; + unsigned int sel_idx; u32 flags; u16 insn_len; u64 last_insn_cnt; + u64 ipc_insn_cnt; + u64 ipc_cyc_cnt; + u64 last_in_insn_cnt; + u64 last_in_cyc_cnt; + u64 last_br_insn_cnt; + u64 last_br_cyc_cnt; + unsigned int cbr_seen; char insn[INTEL_PT_INSN_BUF_SZ]; }; @@ -168,13 +183,14 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused, int ret, pkt_len, i; char desc[INTEL_PT_PKT_DESC_MAX]; const char *color = PERF_COLOR_BLUE; + enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX; color_fprintf(stdout, color, ". ... Intel Processor Trace data: size %zu bytes\n", len); while (len) { - ret = intel_pt_get_packet(buf, len, &packet); + ret = intel_pt_get_packet(buf, len, &packet, &ctx); if (ret > 0) pkt_len = ret; else @@ -233,32 +249,13 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer * return 0; } -/* This function assumes data is processed sequentially only */ -static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +static int intel_pt_get_buffer(struct intel_pt_queue *ptq, + struct auxtrace_buffer *buffer, + struct auxtrace_buffer *old_buffer, + struct intel_pt_buffer *b) { - struct intel_pt_queue *ptq = data; - struct auxtrace_buffer *buffer = ptq->buffer; - struct auxtrace_buffer *old_buffer = ptq->old_buffer; - struct auxtrace_queue *queue; bool might_overlap; - if (ptq->stop) { - b->len = 0; - return 0; - } - - queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; - - buffer = auxtrace_buffer__next(queue, buffer); - if (!buffer) { - if (old_buffer) - auxtrace_buffer__drop_data(old_buffer); - b->len = 0; - return 0; - } - - ptq->buffer = buffer; - if (!buffer->data) { int fd = perf_data__fd(ptq->pt->session->data); @@ -288,6 +285,95 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) b->consecutive = true; } + return 0; +} + +/* Do not drop buffers with references - refer intel_pt_get_trace() */ +static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq, + struct auxtrace_buffer *buffer) +{ + if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer) + return; + + auxtrace_buffer__drop_data(buffer); +} + +/* Must be serialized with respect to intel_pt_get_trace() */ +static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb, + void *cb_data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer; + struct auxtrace_buffer *old_buffer = ptq->old_buffer; + struct auxtrace_queue *queue; + int err = 0; + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + while (1) { + struct intel_pt_buffer b = { .len = 0 }; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) + break; + + err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b); + if (err) + break; + + if (b.len) { + intel_pt_lookahead_drop_buffer(ptq, old_buffer); + old_buffer = buffer; + } else { + intel_pt_lookahead_drop_buffer(ptq, buffer); + continue; + } + + err = cb(&b, cb_data); + if (err) + break; + } + + if (buffer != old_buffer) + intel_pt_lookahead_drop_buffer(ptq, buffer); + intel_pt_lookahead_drop_buffer(ptq, old_buffer); + + return err; +} + +/* + * This function assumes data is processed sequentially only. + * Must be serialized with respect to intel_pt_lookahead() + */ +static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer; + struct auxtrace_buffer *old_buffer = ptq->old_buffer; + struct auxtrace_queue *queue; + int err; + + if (ptq->stop) { + b->len = 0; + return 0; + } + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + b->len = 0; + return 0; + } + + ptq->buffer = buffer; + + err = intel_pt_get_buffer(ptq, buffer, old_buffer, b); + if (err) + return err; + if (ptq->step_through_buffers) ptq->stop = true; @@ -807,6 +893,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.get_trace = intel_pt_get_trace; params.walk_insn = intel_pt_walk_next_insn; + params.lookahead = intel_pt_lookahead; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.branch_enable = intel_pt_branch_enable(pt); @@ -930,6 +1017,23 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->flags |= PERF_IP_FLAG_TRACE_END; } +static void intel_pt_setup_time_range(struct intel_pt *pt, + struct intel_pt_queue *ptq) +{ + if (!pt->range_cnt) + return; + + ptq->sel_timestamp = pt->time_ranges[0].start; + ptq->sel_idx = 0; + + if (ptq->sel_timestamp) { + ptq->sel_start = true; + } else { + ptq->sel_timestamp = pt->time_ranges[0].end; + ptq->sel_start = false; + } +} + static int intel_pt_setup_queue(struct intel_pt *pt, struct auxtrace_queue *queue, unsigned int queue_nr) @@ -949,11 +1053,15 @@ static int intel_pt_setup_queue(struct intel_pt *pt, ptq->cpu = queue->cpu; ptq->tid = queue->tid; + ptq->cbr_seen = UINT_MAX; + if (pt->sampling_mode && !pt->snapshot_mode && pt->timeless_decoding) ptq->step_through_buffers = true; ptq->sync_switch = pt->sync_switch; + + intel_pt_setup_time_range(pt, ptq); } if (!ptq->on_heap && @@ -968,6 +1076,14 @@ static int intel_pt_setup_queue(struct intel_pt *pt, intel_pt_log("queue %u getting timestamp\n", queue_nr); intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + if (ptq->sel_start && ptq->sel_timestamp) { + ret = intel_pt_fast_forward(ptq->decoder, + ptq->sel_timestamp); + if (ret) + return ret; + } + while (1) { state = intel_pt_decode(ptq->decoder); if (state->err) { @@ -987,6 +1103,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt, queue_nr, ptq->timestamp); ptq->state = state; ptq->have_sample = true; + if (ptq->sel_start && ptq->sel_timestamp && + ptq->timestamp < ptq->sel_timestamp) + ptq->have_sample = false; intel_pt_sample_flags(ptq); ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); if (ret) @@ -1068,28 +1187,48 @@ static inline bool intel_pt_skip_event(struct intel_pt *pt) pt->num_events++ < pt->synth_opts.initial_skip; } +/* + * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen. + * Also ensure CBR is first non-skipped event by allowing for 4 more samples + * from this decoder state. + */ +static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt) +{ + return pt->synth_opts.initial_skip && + pt->num_events + 4 < pt->synth_opts.initial_skip; +} + +static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, + union perf_event *event, + struct perf_sample *sample) +{ + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.size = sizeof(struct perf_event_header); + + sample->pid = ptq->pid; + sample->tid = ptq->tid; + sample->cpu = ptq->cpu; + sample->insn_len = ptq->insn_len; + memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); +} + static void intel_pt_prep_b_sample(struct intel_pt *pt, struct intel_pt_queue *ptq, union perf_event *event, struct perf_sample *sample) { + intel_pt_prep_a_sample(ptq, event, sample); + if (!pt->timeless_decoding) sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); sample->ip = ptq->state->from_ip; sample->cpumode = intel_pt_cpumode(pt, sample->ip); - sample->pid = ptq->pid; - sample->tid = ptq->tid; sample->addr = ptq->state->to_ip; sample->period = 1; - sample->cpu = ptq->cpu; sample->flags = ptq->flags; - sample->insn_len = ptq->insn_len; - memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); - event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = sample->cpumode; - event->sample.header.size = sizeof(struct perf_event_header); } static int intel_pt_inject_event(union perf_event *event, @@ -1162,6 +1301,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; + if (sample.cyc_cnt) { + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; + ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; + } + return intel_pt_deliver_synth_b_event(pt, event, &sample, pt->branches_sample_type); } @@ -1217,6 +1363,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.stream_id = ptq->pt->instructions_id; sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; + if (sample.cyc_cnt) { + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; + ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt; + } + ptq->last_insn_cnt = ptq->state->tot_insn_cnt; return intel_pt_deliver_synth_event(pt, ptq, event, &sample, @@ -1290,9 +1443,11 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) struct perf_synth_intel_cbr raw; u32 flags; - if (intel_pt_skip_event(pt)) + if (intel_pt_skip_cbr_event(pt)) return 0; + ptq->cbr_seen = ptq->state->cbr; + intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cbr_id; @@ -1410,6 +1565,261 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) pt->pwr_events_sample_type); } +/* + * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer + * intel_pt_add_gp_regs(). + */ +static const int pebs_gp_regs[] = { + [PERF_REG_X86_FLAGS] = 1, + [PERF_REG_X86_IP] = 2, + [PERF_REG_X86_AX] = 3, + [PERF_REG_X86_CX] = 4, + [PERF_REG_X86_DX] = 5, + [PERF_REG_X86_BX] = 6, + [PERF_REG_X86_SP] = 7, + [PERF_REG_X86_BP] = 8, + [PERF_REG_X86_SI] = 9, + [PERF_REG_X86_DI] = 10, + [PERF_REG_X86_R8] = 11, + [PERF_REG_X86_R9] = 12, + [PERF_REG_X86_R10] = 13, + [PERF_REG_X86_R11] = 14, + [PERF_REG_X86_R12] = 15, + [PERF_REG_X86_R13] = 16, + [PERF_REG_X86_R14] = 17, + [PERF_REG_X86_R15] = 18, +}; + +static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos, + const struct intel_pt_blk_items *items, + u64 regs_mask) +{ + const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS]; + u32 mask = items->mask[INTEL_PT_GP_REGS_POS]; + u32 bit; + int i; + + for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) { + /* Get the PEBS gp_regs array index */ + int n = pebs_gp_regs[i] - 1; + + if (n < 0) + continue; + /* + * Add only registers that were requested (i.e. 'regs_mask') and + * that were provided (i.e. 'mask'), and update the resulting + * mask (i.e. 'intr_regs->mask') accordingly. + */ + if (mask & 1 << n && regs_mask & bit) { + intr_regs->mask |= bit; + *pos++ = gp_regs[n]; + } + } + + return pos; +} + +#ifndef PERF_REG_X86_XMM0 +#define PERF_REG_X86_XMM0 32 +#endif + +static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos, + const struct intel_pt_blk_items *items, + u64 regs_mask) +{ + u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0); + const u64 *xmm = items->xmm; + + /* + * If there are any XMM registers, then there should be all of them. + * Nevertheless, follow the logic to add only registers that were + * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'), + * and update the resulting mask (i.e. 'intr_regs->mask') accordingly. + */ + intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0; + + for (; mask; mask >>= 1, xmm++) { + if (mask & 1) + *pos++ = *xmm; + } +} + +#define LBR_INFO_MISPRED (1ULL << 63) +#define LBR_INFO_IN_TX (1ULL << 62) +#define LBR_INFO_ABORT (1ULL << 61) +#define LBR_INFO_CYCLES 0xffff + +/* Refer kernel's intel_pmu_store_pebs_lbrs() */ +static u64 intel_pt_lbr_flags(u64 info) +{ + union { + struct branch_flags flags; + u64 result; + } u = { + .flags = { + .mispred = !!(info & LBR_INFO_MISPRED), + .predicted = !(info & LBR_INFO_MISPRED), + .in_tx = !!(info & LBR_INFO_IN_TX), + .abort = !!(info & LBR_INFO_ABORT), + .cycles = info & LBR_INFO_CYCLES, + } + }; + + return u.result; +} + +static void intel_pt_add_lbrs(struct branch_stack *br_stack, + const struct intel_pt_blk_items *items) +{ + u64 *to; + int i; + + br_stack->nr = 0; + + to = &br_stack->entries[0].from; + + for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) { + u32 mask = items->mask[i]; + const u64 *from = items->val[i]; + + for (; mask; mask >>= 3, from += 3) { + if ((mask & 7) == 7) { + *to++ = from[0]; + *to++ = from[1]; + *to++ = intel_pt_lbr_flags(from[2]); + br_stack->nr += 1; + } + } + } +} + +/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ +#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3) + +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_blk_items *items = &ptq->state->items; + struct perf_sample sample = { .ip = 0, }; + union perf_event *event = ptq->event_buf; + struct intel_pt *pt = ptq->pt; + struct perf_evsel *evsel = pt->pebs_evsel; + u64 sample_type = evsel->attr.sample_type; + u64 id = evsel->id[0]; + u8 cpumode; + + if (intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_a_sample(ptq, event, &sample); + + sample.id = id; + sample.stream_id = id; + + if (!evsel->attr.freq) + sample.period = evsel->attr.sample_period; + + /* No support for non-zero CS base */ + if (items->has_ip) + sample.ip = items->ip; + else if (items->has_rip) + sample.ip = items->rip; + else + sample.ip = ptq->state->from_ip; + + /* No support for guest mode at this time */ + cpumode = sample.ip < ptq->pt->kernel_start ? + PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; + + event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; + + sample.cpumode = cpumode; + + if (sample_type & PERF_SAMPLE_TIME) { + u64 timestamp = 0; + + if (items->has_timestamp) + timestamp = items->timestamp; + else if (!pt->timeless_decoding) + timestamp = ptq->timestamp; + if (timestamp) + sample.time = tsc_to_perf_time(timestamp, &pt->tc); + } + + if (sample_type & PERF_SAMPLE_CALLCHAIN && + pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip, + pt->kernel_start); + sample.callchain = ptq->chain; + } + + if (sample_type & PERF_SAMPLE_REGS_INTR && + items->mask[INTEL_PT_GP_REGS_POS]) { + u64 regs[sizeof(sample.intr_regs.mask)]; + u64 regs_mask = evsel->attr.sample_regs_intr; + u64 *pos; + + sample.intr_regs.abi = items->is_32_bit ? + PERF_SAMPLE_REGS_ABI_32 : + PERF_SAMPLE_REGS_ABI_64; + sample.intr_regs.regs = regs; + + pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + + intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); + } + + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + struct { + struct branch_stack br_stack; + struct branch_entry entries[LBRS_MAX]; + } br; + + if (items->mask[INTEL_PT_LBR_0_POS] || + items->mask[INTEL_PT_LBR_1_POS] || + items->mask[INTEL_PT_LBR_2_POS]) { + intel_pt_add_lbrs(&br.br_stack, items); + sample.branch_stack = &br.br_stack; + } else if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } else { + br.br_stack.nr = 0; + sample.branch_stack = &br.br_stack; + } + } + + if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) + sample.addr = items->mem_access_address; + + if (sample_type & PERF_SAMPLE_WEIGHT) { + /* + * Refer kernel's setup_pebs_adaptive_sample_data() and + * intel_hsw_weight(). + */ + if (items->has_mem_access_latency) + sample.weight = items->mem_access_latency; + if (!sample.weight && items->has_tsx_aux_info) { + /* Cycles last block */ + sample.weight = (u32)items->tsx_aux_info; + } + } + + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { + u64 ax = items->has_rax ? items->rax : 0; + /* Refer kernel's intel_hsw_transaction() */ + u64 txn = (u8)(items->tsx_aux_info >> 32); + + /* For RTM XABORTs also log the abort code from AX */ + if (txn & PERF_TXN_TRANSACTION && ax & 1) + txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; + sample.transaction = txn; + } + + return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); +} + static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, pid_t pid, pid_t tid, u64 ip, u64 timestamp) { @@ -1474,8 +1884,7 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) } #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ - INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ - INTEL_PT_CBR_CHG) + INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT) static int intel_pt_sample(struct intel_pt_queue *ptq) { @@ -1488,31 +1897,52 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { - if (state->type & INTEL_PT_CBR_CHG) { + if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) { + /* + * Cycle count and instruction count only go together to create + * a valid IPC ratio when the cycle count changes. + */ + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + } + + /* + * Do PEBS first to allow for the possibility that the PEBS timestamp + * precedes the current timestamp. + */ + if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) { + err = intel_pt_synth_pebs_sample(ptq); + if (err) + return err; + } + + if (pt->sample_pwr_events) { + if (ptq->state->cbr != ptq->cbr_seen) { err = intel_pt_synth_cbr_sample(ptq); if (err) return err; } - if (state->type & INTEL_PT_MWAIT_OP) { - err = intel_pt_synth_mwait_sample(ptq); - if (err) - return err; - } - if (state->type & INTEL_PT_PWR_ENTRY) { - err = intel_pt_synth_pwre_sample(ptq); - if (err) - return err; - } - if (state->type & INTEL_PT_EX_STOP) { - err = intel_pt_synth_exstop_sample(ptq); - if (err) - return err; - } - if (state->type & INTEL_PT_PWR_EXIT) { - err = intel_pt_synth_pwrx_sample(ptq); - if (err) - return err; + if (state->type & INTEL_PT_PWR_EVT) { + if (state->type & INTEL_PT_MWAIT_OP) { + err = intel_pt_synth_mwait_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_PWR_ENTRY) { + err = intel_pt_synth_pwre_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_EX_STOP) { + err = intel_pt_synth_exstop_sample(ptq); + if (err) + return err; + } + if (state->type & INTEL_PT_PWR_EXIT) { + err = intel_pt_synth_pwrx_sample(ptq); + if (err) + return err; + } } } @@ -1650,10 +2080,83 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt) } } +/* + * To filter against time ranges, it is only necessary to look at the next start + * or end time. + */ +static bool intel_pt_next_time(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + + if (ptq->sel_start) { + /* Next time is an end time */ + ptq->sel_start = false; + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end; + return true; + } else if (ptq->sel_idx + 1 < pt->range_cnt) { + /* Next time is a start time */ + ptq->sel_start = true; + ptq->sel_idx += 1; + ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start; + return true; + } + + /* No next time */ + return false; +} + +static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp) +{ + int err; + + while (1) { + if (ptq->sel_start) { + if (ptq->timestamp >= ptq->sel_timestamp) { + /* After start time, so consider next time */ + intel_pt_next_time(ptq); + if (!ptq->sel_timestamp) { + /* No end time */ + return 0; + } + /* Check against end time */ + continue; + } + /* Before start time, so fast forward */ + ptq->have_sample = false; + if (ptq->sel_timestamp > *ff_timestamp) { + if (ptq->sync_switch) { + intel_pt_next_tid(ptq->pt, ptq); + ptq->switch_state = INTEL_PT_SS_UNKNOWN; + } + *ff_timestamp = ptq->sel_timestamp; + err = intel_pt_fast_forward(ptq->decoder, + ptq->sel_timestamp); + if (err) + return err; + } + return 0; + } else if (ptq->timestamp > ptq->sel_timestamp) { + /* After end time, so consider next time */ + if (!intel_pt_next_time(ptq)) { + /* No next time range, so stop decoding */ + ptq->have_sample = false; + ptq->switch_state = INTEL_PT_SS_NOT_TRACING; + return 1; + } + /* Check against next start time */ + continue; + } else { + /* Before end time */ + return 0; + } + } +} + static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) { const struct intel_pt_state *state = ptq->state; struct intel_pt *pt = ptq->pt; + u64 ff_timestamp = 0; int err; if (!pt->kernel_start) { @@ -1718,6 +2221,12 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) ptq->timestamp = state->timestamp; } + if (ptq->sel_timestamp) { + err = intel_pt_time_filter(ptq, &ff_timestamp); + if (err) + return err; + } + if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { *timestamp = ptq->timestamp; return 0; @@ -1859,7 +2368,6 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, switch (ptq->switch_state) { case INTEL_PT_SS_NOT_TRACING: - ptq->next_tid = -1; break; case INTEL_PT_SS_UNKNOWN: case INTEL_PT_SS_TRACING: @@ -1879,13 +2387,14 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, ptq->switch_state = INTEL_PT_SS_TRACING; break; case INTEL_PT_SS_EXPECTING_SWITCH_IP: - ptq->next_tid = tid; intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); break; default: break; } + ptq->next_tid = -1; + return 1; } @@ -1914,6 +2423,44 @@ static int intel_pt_process_switch(struct intel_pt *pt, return machine__set_current_tid(pt->machine, cpu, -1, tid); } +static int intel_pt_context_switch_in(struct intel_pt *pt, + struct perf_sample *sample) +{ + pid_t pid = sample->pid; + pid_t tid = sample->tid; + int cpu = sample->cpu; + + if (pt->sync_switch) { + struct intel_pt_queue *ptq; + + ptq = intel_pt_cpu_to_ptq(pt, cpu); + if (ptq && ptq->sync_switch) { + ptq->next_tid = -1; + switch (ptq->switch_state) { + case INTEL_PT_SS_NOT_TRACING: + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_TRACING: + break; + case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + default: + break; + } + } + } + + /* + * If the current tid has not been updated yet, ensure it is now that + * a "switch in" event has occurred. + */ + if (machine__get_current_tid(pt->machine, cpu) == tid) + return 0; + + return machine__set_current_tid(pt->machine, cpu, pid, tid); +} + static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) { @@ -1925,7 +2472,7 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, if (pt->have_sched_switch == 3) { if (!out) - return 0; + return intel_pt_context_switch_in(pt, sample); if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { pr_err("Expecting CPU-wide context switch event\n"); return -EINVAL; @@ -2085,6 +2632,7 @@ static void intel_pt_free(struct perf_session *session) thread__put(pt->unknown_thread); addr_filters__exit(&pt->filts); zfree(&pt->filter); + zfree(&pt->time_ranges); free(pt); } @@ -2382,6 +2930,85 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data) return 0; } +/* Find least TSC which converts to ns or later */ +static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt) +{ + u64 tsc, tm; + + tsc = perf_time_to_tsc(ns, &pt->tc); + + while (1) { + tm = tsc_to_perf_time(tsc, &pt->tc); + if (tm < ns) + break; + tsc -= 1; + } + + while (tm < ns) + tm = tsc_to_perf_time(++tsc, &pt->tc); + + return tsc; +} + +/* Find greatest TSC which converts to ns or earlier */ +static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt) +{ + u64 tsc, tm; + + tsc = perf_time_to_tsc(ns, &pt->tc); + + while (1) { + tm = tsc_to_perf_time(tsc, &pt->tc); + if (tm > ns) + break; + tsc += 1; + } + + while (tm > ns) + tm = tsc_to_perf_time(--tsc, &pt->tc); + + return tsc; +} + +static int intel_pt_setup_time_ranges(struct intel_pt *pt, + struct itrace_synth_opts *opts) +{ + struct perf_time_interval *p = opts->ptime_range; + int n = opts->range_num; + int i; + + if (!n || !p || pt->timeless_decoding) + return 0; + + pt->time_ranges = calloc(n, sizeof(struct range)); + if (!pt->time_ranges) + return -ENOMEM; + + pt->range_cnt = n; + + intel_pt_log("%s: %u range(s)\n", __func__, n); + + for (i = 0; i < n; i++) { + struct range *r = &pt->time_ranges[i]; + u64 ts = p[i].start; + u64 te = p[i].end; + + /* + * Take care to ensure the TSC range matches the perf-time range + * when converted back to perf-time. + */ + r->start = ts ? intel_pt_tsc_start(ts, pt) : 0; + r->end = te ? intel_pt_tsc_end(te, pt) : 0; + + intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n", + i, ts, te); + intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n", + i, r->start, r->end); + } + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", @@ -2583,17 +3210,17 @@ int intel_pt_process_auxtrace_info(union perf_event *event, goto err_delete_thread; } - if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + if (session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { itrace_synth_opts__set_default(&pt->synth_opts, session->itrace_synth_opts->default_no_sample); - if (use_browser != -1) { + if (!session->itrace_synth_opts->default_no_sample && + !session->itrace_synth_opts->inject) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; } - if (session->itrace_synth_opts) - pt->synth_opts.thread_stack = + pt->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; } @@ -2613,6 +3240,10 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; } + err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts); + if (err) + goto err_delete_thread; + if (pt->synth_opts.calls) pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_TRACE_END; @@ -2653,6 +3284,7 @@ err_free_queues: err_free: addr_filters__exit(&pt->filts); zfree(&pt->filter); + zfree(&pt->time_ranges); free(pt); return err; } diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h index e13b14e5a37b..c7d6068e3a6b 100644 --- a/tools/perf/util/intel-pt.h +++ b/tools/perf/util/intel-pt.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * intel_pt.h: Intel Processor Trace support * Copyright (c) 2013-2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef INCLUDE__PERF_INTEL_PT_H__ diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c index 89715b64a315..84e5304e151a 100644 --- a/tools/perf/util/intlist.c +++ b/tools/perf/util/intlist.c @@ -1,8 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Based on intlist.c by: * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Licensed under the GPLv2. */ #include <errno.h> diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index eda28d3570bc..18c34f0c1966 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -28,7 +28,8 @@ #include "genelf.h" #include "../builtin.h" -#include "sane_ctype.h" +#include <linux/ctype.h> +#include <linux/zalloc.h> struct jit_buf_desc { struct perf_data *output; @@ -431,14 +432,12 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size); if (jd->debug_data && jd->nr_debug_entries) { - free(jd->debug_data); - jd->debug_data = NULL; + zfree(&jd->debug_data); jd->nr_debug_entries = 0; } if (jd->unwinding_data && jd->eh_frame_hdr_size) { - free(jd->unwinding_data); - jd->unwinding_data = NULL; + zfree(&jd->unwinding_data); jd->eh_frame_hdr_size = 0; jd->unwinding_mapped_size = 0; jd->unwinding_size = 0; diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h index c6b9b67f43bf..f2c3823cc81a 100644 --- a/tools/perf/util/jitdump.h +++ b/tools/perf/util/jitdump.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * jitdump.h: jitted code info encapsulation file format * diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 5b0b60f00275..9f0470ecbca9 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -9,6 +9,7 @@ #include <stdio.h> #include <stdlib.h> #include <linux/err.h> +#include <linux/zalloc.h> #include "debug.h" #include "llvm-utils.h" #include "config.h" @@ -352,8 +353,7 @@ void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) " \toption in [llvm] to \"\" to suppress this detection.\n\n", *kbuild_dir); - free(*kbuild_dir); - *kbuild_dir = NULL; + zfree(kbuild_dir); goto errout; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 3c520baa198c..cf826eca3aaf 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -24,9 +24,10 @@ #include "asm/bug.h" #include "bpf-event.h" -#include "sane_ctype.h" +#include <linux/ctype.h> #include <symbol/kallsyms.h> #include <linux/mman.h> +#include <linux/zalloc.h> static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); @@ -209,6 +210,18 @@ void machine__exit(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; + struct thread *thread, *n; + /* + * Forget about the dead, at this point whatever threads were + * left in the dead lists better have a reference count taken + * by who is using them, and then, when they drop those references + * and it finally hits zero, thread__put() will check and see that + * its not in the dead threads list and will not try to remove it + * from there, just calling thread__delete() straight away. + */ + list_for_each_entry_safe(thread, n, &threads->dead, node) + list_del_init(&thread->node); + exit_rwsem(&threads->lock); } } @@ -704,12 +717,12 @@ static int machine__process_ksymbol_register(struct machine *machine, return -ENOMEM; map->start = event->ksymbol_event.addr; - map->pgoff = map->start; map->end = map->start + event->ksymbol_event.len; map_groups__insert(&machine->kmaps, map); } - sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len, + sym = symbol__new(map->map_ip(map, map->start), + event->ksymbol_event.len, 0, 0, event->ksymbol_event.name); if (!sym) return -ENOMEM; @@ -797,7 +810,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, out: /* put the dso here, corresponding to machine__findnew_module_dso */ dso__put(dso); - free(m.name); + zfree(&m.name); return map; } @@ -924,7 +937,8 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; * symbol_name if it's not that important. */ static int machine__get_running_kernel_start(struct machine *machine, - const char **symbol_name, u64 *start) + const char **symbol_name, + u64 *start, u64 *end) { char filename[PATH_MAX]; int i, err = -1; @@ -949,6 +963,11 @@ static int machine__get_running_kernel_start(struct machine *machine, *symbol_name = name; *start = addr; + + err = kallsyms__get_function_start(filename, "_etext", &addr); + if (!err) + *end = addr; + return 0; } @@ -1234,9 +1253,10 @@ static char *get_kernel_version(const char *root_dir) if (!file) return NULL; - version[0] = '\0'; tmp = fgets(version, sizeof(version), file); fclose(file); + if (!tmp) + return NULL; name = strstr(version, prefix); if (!name) @@ -1330,7 +1350,7 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, if (m.kmod) ret = map_groups__set_module_path(mg, path, &m); - free(m.name); + zfree(&m.name); if (ret) goto out; @@ -1440,7 +1460,7 @@ int machine__create_kernel_maps(struct machine *machine) struct dso *kernel = machine__get_kernel(machine); const char *name = NULL; struct map *map; - u64 addr = 0; + u64 start = 0, end = ~0ULL; int ret; if (kernel == NULL) @@ -1459,9 +1479,9 @@ int machine__create_kernel_maps(struct machine *machine) "continuing anyway...\n", machine->pid); } - if (!machine__get_running_kernel_start(machine, &name, &addr)) { + if (!machine__get_running_kernel_start(machine, &name, &start, &end)) { if (name && - map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) { + map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, start)) { machine__destroy_kernel_maps(machine); ret = -1; goto out_put; @@ -1471,16 +1491,19 @@ int machine__create_kernel_maps(struct machine *machine) * we have a real start address now, so re-order the kmaps * assume it's the last in the kmaps */ - machine__update_kernel_mmap(machine, addr, ~0ULL); + machine__update_kernel_mmap(machine, start, end); } if (machine__create_extra_kernel_maps(machine, kernel)) pr_debug("Problems creating extra kernel maps, continuing anyway...\n"); - /* update end address of the kernel map using adjacent module address */ - map = map__next(machine__kernel_map(machine)); - if (map) - machine__set_kernel_mmap(machine, addr, map->start); + if (end == ~0ULL) { + /* update end address of the kernel map using adjacent module address */ + map = map__next(machine__kernel_map(machine)); + if (map) + machine__set_kernel_mmap(machine, start, map->start); + } + out_put: dso__put(kernel); return ret; @@ -1748,9 +1771,11 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, if (threads->last_match == th) threads__set_last_match(threads, NULL); - BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) down_write(&threads->lock); + + BUG_ON(refcount_read(&th->refcnt) == 0); + rb_erase_cached(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); --threads->nr; @@ -1760,9 +1785,16 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, * will be called and we will remove it from the dead_threads list. */ list_add_tail(&th->node, &threads->dead); + + /* + * We need to do the put here because if this is the last refcount, + * then we will be touching the threads->dead head when removing the + * thread. + */ + thread__put(th); + if (lock) up_write(&threads->lock); - thread__put(th); } void machine__remove_thread(struct machine *machine, struct thread *th) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index ee71efb9db62..668410b1d426 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -12,10 +12,10 @@ #include "thread.h" #include "vdso.h" #include "build-id.h" -#include "util.h" #include "debug.h" #include "machine.h" #include <linux/string.h> +#include <linux/zalloc.h> #include "srcline.h" #include "namespaces.h" #include "unwind.h" @@ -405,6 +405,7 @@ size_t map__fprintf(struct map *map, FILE *fp) size_t map__fprintf_dsoname(struct map *map, FILE *fp) { + char buf[symbol_conf.pad_output_len_dso + 1]; const char *dsoname = "[unknown]"; if (map && map->dso) { @@ -414,6 +415,11 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp) dsoname = map->dso->name; } + if (symbol_conf.pad_output_len_dso) { + scnprintf_pad(buf, symbol_conf.pad_output_len_dso, "%s", dsoname); + dsoname = buf; + } + return fprintf(fp, "%s", dsoname); } @@ -470,8 +476,11 @@ int map__fprintf_srccode(struct map *map, u64 addr, goto out_free_line; ret = fprintf(fp, "|%-8d %.*s", line, len, srccode); - state->srcfile = srcfile; - state->line = line; + + if (state) { + state->srcfile = srcfile; + state->line = line; + } return ret; out_free_line: diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h index 4dcda33e0fdf..5f25efa6d6bc 100644 --- a/tools/perf/util/map_groups.h +++ b/tools/perf/util/map_groups.h @@ -88,4 +88,6 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); +int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map); + #endif // __PERF_MAP_GROUPS_H diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c index c6fd81c02586..cacc2fc4dcbd 100644 --- a/tools/perf/util/mem2node.c +++ b/tools/perf/util/mem2node.c @@ -1,8 +1,8 @@ #include <errno.h> #include <inttypes.h> #include <linux/bitmap.h> +#include <linux/zalloc.h> #include "mem2node.h" -#include "util.h" struct phys_entry { struct rb_node rb_node; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b8d864ed4afe..416a9015405e 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1,15 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2017, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ /* Manage metrics and groups of metrics from JSON files */ @@ -26,7 +17,8 @@ #include "pmu-events/pmu-events.h" #include "strlist.h" #include <assert.h> -#include <ctype.h> +#include <linux/ctype.h> +#include <linux/zalloc.h> struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct perf_evsel *evsel, @@ -94,26 +86,49 @@ struct egroup { const char *metric_expr; }; -static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist, - const char **ids, - int idnum, - struct perf_evsel **metric_events) +static bool record_evsel(int *ind, struct perf_evsel **start, + int idnum, + struct perf_evsel **metric_events, + struct perf_evsel *ev) +{ + metric_events[*ind] = ev; + if (*ind == 0) + *start = ev; + if (++*ind == idnum) { + metric_events[*ind] = NULL; + return true; + } + return false; +} + +static struct perf_evsel *find_evsel_group(struct perf_evlist *perf_evlist, + const char **ids, + int idnum, + struct perf_evsel **metric_events) { struct perf_evsel *ev, *start = NULL; int ind = 0; evlist__for_each_entry (perf_evlist, ev) { + if (ev->collect_stat) + continue; if (!strcmp(ev->name, ids[ind])) { - metric_events[ind] = ev; - if (ind == 0) - start = ev; - if (++ind == idnum) { - metric_events[ind] = NULL; + if (record_evsel(&ind, &start, idnum, + metric_events, ev)) return start; - } } else { + /* + * We saw some other event that is not + * in our list of events. Discard + * the whole match and start again. + */ ind = 0; start = NULL; + if (!strcmp(ev->name, ids[ind])) { + if (record_evsel(&ind, &start, idnum, + metric_events, ev)) + return start; + } } } /* @@ -143,8 +158,8 @@ static int metricgroup__setup_events(struct list_head *groups, ret = -ENOMEM; break; } - evsel = find_evsel(perf_evlist, eg->ids, eg->idnum, - metric_events); + evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum, + metric_events); if (!evsel) { pr_debug("Cannot resolve %s: %s\n", eg->metric_name, eg->metric_expr); @@ -221,7 +236,7 @@ static struct rb_node *mep_new(struct rblist *rl __maybe_unused, goto out_name; return &me->nd; out_name: - free((char *)me->name); + zfree(&me->name); out_me: free(me); return NULL; @@ -249,7 +264,7 @@ static void mep_delete(struct rblist *rl __maybe_unused, struct mep *me = container_of(nd, struct mep, nd); strlist__delete(me->metrics); - free((void *)me->name); + zfree(&me->name); free(me); } @@ -317,10 +332,9 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, struct mep *me; char *s; + g = skip_spaces(g); if (*g == 0) g = "No_group"; - while (isspace(*g)) - g++; if (filter && !strstr(g, filter)) continue; if (raw) @@ -362,7 +376,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, struct mep *me = container_of(node, struct mep, nd); if (metricgroups) - printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n"); + printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n"); if (metrics) metricgroup__print_strlist(me->metrics, raw); next = rb_next(node); @@ -396,6 +410,7 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, const char **ids; int idnum; struct egroup *eg; + bool no_group = false; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); @@ -406,11 +421,25 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, strbuf_addf(events, ","); for (j = 0; j < idnum; j++) { pr_debug("found event %s\n", ids[j]); + /* + * Duration time maps to a software event and can make + * groups not count. Always use it outside a + * group. + */ + if (!strcmp(ids[j], "duration_time")) { + if (j > 0) + strbuf_addf(events, "}:W,"); + strbuf_addf(events, "duration_time"); + no_group = true; + continue; + } strbuf_addf(events, "%s%s", - j == 0 ? "{" : ",", + j == 0 || no_group ? "{" : ",", ids[j]); + no_group = false; } - strbuf_addf(events, "}:W"); + if (!no_group) + strbuf_addf(events, "}:W"); eg = malloc(sizeof(struct egroup)); if (!eg) { @@ -461,8 +490,9 @@ static void metricgroup__free_egroups(struct list_head *group_list) list_for_each_entry_safe (eg, egtmp, group_list, nd) { for (i = 0; i < eg->idnum; i++) - free((char *)eg->ids[i]); - free(eg->ids); + zfree(&eg->ids[i]); + zfree(&eg->ids); + list_del_init(&eg->nd); free(eg); } } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index cdc7740fc181..9f0b6391af33 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -1,15 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> * * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further * copyright notes. - * - * Released under the GPL v2. (and only v2, not any later version) */ #include <sys/mman.h> #include <inttypes.h> #include <asm/bug.h> +#include <linux/zalloc.h> #ifdef HAVE_LIBNUMA_SUPPORT #include <numaif.h> #endif @@ -157,6 +157,10 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb } #ifdef HAVE_AIO_SUPPORT +static int perf_mmap__aio_enabled(struct perf_mmap *map) +{ + return map->aio.nr_cblocks > 0; +} #ifdef HAVE_LIBNUMA_SUPPORT static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) @@ -198,7 +202,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi return 0; } -#else +#else /* !HAVE_LIBNUMA_SUPPORT */ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) { map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); @@ -285,81 +289,12 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map) zfree(&map->aio.cblocks); zfree(&map->aio.aiocb); } - -int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx, - int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off), - off_t *off) +#else /* !HAVE_AIO_SUPPORT */ +static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused) { - u64 head = perf_mmap__read_head(md); - unsigned char *data = md->base + page_size; - unsigned long size, size0 = 0; - void *buf; - int rc = 0; - - rc = perf_mmap__read_init(md); - if (rc < 0) - return (rc == -EAGAIN) ? 0 : -1; - - /* - * md->base data is copied into md->data[idx] buffer to - * release space in the kernel buffer as fast as possible, - * thru perf_mmap__consume() below. - * - * That lets the kernel to proceed with storing more - * profiling data into the kernel buffer earlier than other - * per-cpu kernel buffers are handled. - * - * Coping can be done in two steps in case the chunk of - * profiling data crosses the upper bound of the kernel buffer. - * In this case we first move part of data from md->start - * till the upper bound and then the reminder from the - * beginning of the kernel buffer till the end of - * the data chunk. - */ - - size = md->end - md->start; - - if ((md->start & md->mask) + size != (md->end & md->mask)) { - buf = &data[md->start & md->mask]; - size = md->mask + 1 - (md->start & md->mask); - md->start += size; - memcpy(md->aio.data[idx], buf, size); - size0 = size; - } - - buf = &data[md->start & md->mask]; - size = md->end - md->start; - md->start += size; - memcpy(md->aio.data[idx] + size0, buf, size); - - /* - * Increment md->refcount to guard md->data[idx] buffer - * from premature deallocation because md object can be - * released earlier than aio write request started - * on mmap->data[idx] is complete. - * - * perf_mmap__put() is done at record__aio_complete() - * after started request completion. - */ - perf_mmap__get(md); - - md->prev = head; - perf_mmap__consume(md); - - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off); - if (!rc) { - *off += size0 + size; - } else { - /* - * Decrement md->refcount back if aio write - * operation failed to start. - */ - perf_mmap__put(md); - } - - return rc; + return 0; } -#else + static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused, struct mmap_params *mp __maybe_unused) { @@ -374,6 +309,10 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused) void perf_mmap__munmap(struct perf_mmap *map) { perf_mmap__aio_munmap(map); + if (map->data != NULL) { + munmap(map->data, perf_mmap__mmap_len(map)); + map->data = NULL; + } if (map->base != NULL) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; @@ -440,6 +379,21 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c perf_mmap__setup_affinity_mask(map, mp); + map->flush = mp->flush; + + map->comp_level = mp->comp_level; + + if (map->comp_level && !perf_mmap__aio_enabled(map)) { + map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if (map->data == MAP_FAILED) { + pr_debug2("failed to mmap data buffer, error %d\n", + errno); + map->data = NULL; + return -1; + } + } + if (auxtrace_mmap__mmap(&map->auxtrace_mmap, &mp->auxtrace_mp, map->base, fd)) return -1; @@ -492,7 +446,7 @@ static int __perf_mmap__read_init(struct perf_mmap *md) md->start = md->overwrite ? head : old; md->end = md->overwrite ? old : head; - if (md->start == md->end) + if ((md->end - md->start) < md->flush) return -EAGAIN; size = md->end - md->start; @@ -538,7 +492,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to, rc = perf_mmap__read_init(md); if (rc < 0) - return (rc == -EAGAIN) ? 0 : -1; + return (rc == -EAGAIN) ? 1 : -1; size = md->end - md->start; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index e566c19b242b..274ce389cd84 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -39,6 +39,9 @@ struct perf_mmap { } aio; #endif cpu_set_t affinity_mask; + u64 flush; + void *data; + int comp_level; }; /* @@ -70,7 +73,7 @@ enum bkw_mmap_state { }; struct mmap_params { - int prot, mask, nr_cblocks, affinity; + int prot, mask, nr_cblocks, affinity, flush, comp_level; struct auxtrace_mmap_params auxtrace_mp; }; @@ -98,18 +101,6 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map); int perf_mmap__push(struct perf_mmap *md, void *to, int push(struct perf_mmap *map, void *to, void *buf, size_t size)); -#ifdef HAVE_AIO_SUPPORT -int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx, - int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off), - off_t *off); -#else -static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused, - int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused, - off_t *off __maybe_unused) -{ - return 0; -} -#endif size_t perf_mmap__mmap_len(struct perf_mmap *map); diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index aed170bd4384..46d3a7754897 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -1,14 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. * * Copyright (C) 2017 Hari Bathini, IBM Corporation */ #include "namespaces.h" -#include "util.h" #include "event.h" +#include "get_current_dir_name.h" #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -19,6 +17,7 @@ #include <string.h> #include <unistd.h> #include <asm/bug.h> +#include <linux/zalloc.h> struct namespaces *namespaces__new(struct namespaces_event *event) { diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index d5f46c09ea31..004430c0de93 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -1,7 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. * * Copyright (C) 2017 Hari Bathini, IBM Corporation */ @@ -15,6 +13,10 @@ #include <linux/refcount.h> #include <linux/types.h> +#ifndef HAVE_SETNS_SUPPORT +int setns(int fd, int nstype); +#endif + struct namespaces_event; struct namespaces { diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 989fed6f43b5..bb5f34b7ab44 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -138,7 +138,7 @@ static struct ordered_event *alloc_event(struct ordered_events *oe, if (!list_empty(cache)) { new = list_entry(cache->next, struct ordered_event, list); - list_del(&new->list); + list_del_init(&new->list); } else if (oe->buffer) { new = &oe->buffer->event[oe->buffer_idx]; if (++oe->buffer_idx == MAX_SAMPLE_BUFFER) @@ -394,13 +394,13 @@ void ordered_events__free(struct ordered_events *oe) * yet, we need to free only allocated ones ... */ if (oe->buffer) { - list_del(&oe->buffer->list); + list_del_init(&oe->buffer->list); ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe); } /* ... and continue with the rest */ list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) { - list_del(&buffer->list); + list_del_init(&buffer->list); ordered_events_buffer__free(buffer, MAX_SAMPLE_BUFFER, oe); } } diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index bd779d9f4d1e..726e8d9e8c54 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include "perf.h" -#include "util/util.h" #include "util/debug.h" #include <subcmd/parse-options.h> #include "util/parse-branch-options.h" +#include <stdlib.h> #define BRANCH_OPT(n, m) \ { .name = n, .mode = (m) } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5ef4939408f2..371ff3aee769 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/hw_breakpoint.h> #include <linux/err.h> +#include <linux/zalloc.h> #include <dirent.h> #include <errno.h> #include <sys/ioctl.h> @@ -317,10 +318,12 @@ static struct perf_evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name, struct perf_pmu *pmu, - struct list_head *config_terms, bool auto_merge_stats) + struct list_head *config_terms, bool auto_merge_stats, + const char *cpu_list) { struct perf_evsel *evsel; - struct cpu_map *cpus = pmu ? pmu->cpus : NULL; + struct cpu_map *cpus = pmu ? pmu->cpus : + cpu_list ? cpu_map__new(cpu_list) : NULL; event_attr_init(attr); @@ -348,7 +351,25 @@ static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name, struct list_head *config_terms) { - return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, name, NULL, config_terms, false, NULL) ? 0 : -ENOMEM; +} + +static int add_event_tool(struct list_head *list, int *idx, + enum perf_tool_event tool_event) +{ + struct perf_evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_DUMMY, + }; + + evsel = __add_event(list, idx, &attr, NULL, NULL, NULL, false, "0"); + if (!evsel) + return -ENOMEM; + evsel->tool_event = tool_event; + if (tool_event == PERF_TOOL_DURATION_TIME) + evsel->unit = strdup("ns"); + return 0; } static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) @@ -631,7 +652,7 @@ static int add_bpf_event(const char *group, const char *event, int fd, pr_debug("Failed to add BPF event %s:%s\n", group, event); list_for_each_entry_safe(evsel, tmp, &new_evsels, node) { - list_del(&evsel->node); + list_del_init(&evsel->node); perf_evsel__delete(evsel); } return err; @@ -930,6 +951,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", + [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", }; static bool config_term_shrinked; @@ -950,6 +972,7 @@ config_term_avail(int term_type, struct parse_events_error *err) case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: + case PARSE_EVENTS__TERM_TYPE_PERCORE: return true; default: if (!err) @@ -1041,6 +1064,14 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_PERCORE: + CHECK_TYPE_VAL(NUM); + if ((unsigned int)term->val.num > 1) { + err->str = strdup("expected 0 or 1"); + err->idx = term->err_val; + return -EINVAL; + } + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1179,6 +1210,10 @@ do { \ case PARSE_EVENTS__TERM_TYPE_DRV_CFG: ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str); break; + case PARSE_EVENTS__TERM_TYPE_PERCORE: + ADD_CONFIG_TERM(PERCORE, percore, + term->val.num ? true : false); + break; default: break; } @@ -1233,6 +1268,25 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, get_config_name(head_config), &config_terms); } +int parse_events_add_tool(struct parse_events_state *parse_state, + struct list_head *list, + enum perf_tool_event tool_event) +{ + return add_event_tool(list, &parse_state->idx, tool_event); +} + +static bool config_term_percore(struct list_head *config_terms) +{ + struct perf_evsel_config_term *term; + + list_for_each_entry(term, config_terms, list) { + if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE) + return term->val.percore; + } + + return false; +} + int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, @@ -1267,7 +1321,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); + evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, + auto_merge_stats, NULL); if (evsel) { evsel->pmu_name = name; evsel->use_uncore_alias = use_uncore_alias; @@ -1295,7 +1350,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, get_config_name(head_config), pmu, - &config_terms, auto_merge_stats); + &config_terms, auto_merge_stats, NULL); if (evsel) { evsel->unit = info.unit; evsel->scale = info.scale; @@ -1305,6 +1360,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->metric_name = info.metric_name; evsel->pmu_name = name; evsel->use_uncore_alias = use_uncore_alias; + evsel->percore = config_term_percore(&evsel->config_terms); } return evsel ? 0 : -ENOMEM; @@ -2429,6 +2485,25 @@ out_enomem: return evt_num; } +static void print_tool_event(const char *name, const char *event_glob, + bool name_only) +{ + if (event_glob && !strglobmatch(name, event_glob)) + return; + if (name_only) + printf("%s ", name); + else + printf(" %-50s [%s]\n", name, "Tool event"); + +} + +void print_tool_events(const char *event_glob, bool name_only) +{ + print_tool_event("duration_time", event_glob, name_only); + if (pager_in_use()) + printf("\n"); +} + void print_symbol_events(const char *event_glob, unsigned type, struct event_symbol *syms, unsigned max, bool name_only) @@ -2512,6 +2587,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_symbol_events(event_glob, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX, name_only); + print_tool_events(event_glob, name_only); print_hwcache_events(event_glob, name_only); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 5ed035cbcbb7..f7139e1a2fd3 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -75,6 +75,7 @@ enum { PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, PARSE_EVENTS__TERM_TYPE_OVERWRITE, PARSE_EVENTS__TERM_TYPE_DRV_CFG, + PARSE_EVENTS__TERM_TYPE_PERCORE, __PARSE_EVENTS__TERM_TYPE_NR, }; @@ -160,6 +161,10 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, struct list_head *head_config); +enum perf_tool_event; +int parse_events_add_tool(struct parse_events_state *parse_state, + struct list_head *list, + enum perf_tool_event tool_event); int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2, struct parse_events_error *error, @@ -200,6 +205,7 @@ extern struct event_symbol event_symbols_sw[]; void print_symbol_events(const char *event_glob, unsigned type, struct event_symbol *syms, unsigned max, bool name_only); +void print_tool_events(const char *event_glob, bool name_only); void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 7805c71aaae2..ca6098874fe2 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -15,6 +15,7 @@ #include "../perf.h" #include "parse-events.h" #include "parse-events-bison.h" +#include "evsel.h" char *parse_events_get_text(yyscan_t yyscanner); YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); @@ -154,6 +155,14 @@ static int sym(yyscan_t scanner, int type, int config) return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW; } +static int tool(yyscan_t scanner, enum perf_tool_event event) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + + yylval->num = event; + return PE_VALUE_SYM_TOOL; +} + static int term(yyscan_t scanner, int type) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -274,6 +283,7 @@ inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } +percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } @@ -322,7 +332,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } -duration_time { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } +duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } /* diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 44819bdb037d..f1c36ed1cf36 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -14,6 +14,7 @@ #include <linux/types.h> #include "util.h" #include "pmu.h" +#include "evsel.h" #include "debug.h" #include "parse-events.h" #include "parse-events-bison.h" @@ -45,6 +46,7 @@ static void inc_group_count(struct list_head *list, %token PE_START_EVENTS PE_START_TERMS %token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM +%token PE_VALUE_SYM_TOOL %token PE_EVENT_NAME %token PE_NAME %token PE_BPF_OBJECT PE_BPF_SOURCE @@ -58,6 +60,7 @@ static void inc_group_count(struct list_head *list, %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW %type <num> PE_VALUE_SYM_SW +%type <num> PE_VALUE_SYM_TOOL %type <num> PE_RAW %type <num> PE_TERM %type <str> PE_NAME @@ -321,6 +324,15 @@ value_sym sep_slash_slash_dc ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, NULL)); $$ = list; } +| +PE_VALUE_SYM_TOOL sep_slash_slash_dc +{ + struct list_head *list; + + ALLOC_LIST(list); + ABORT_ON(parse_events_add_tool(_parse_state, list, $1)); + $$ = list; +} event_legacy_cache: PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config @@ -468,7 +480,6 @@ event_bpf_file: PE_BPF_OBJECT opt_event_config { struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; struct list_head *list; ALLOC_LIST(list); @@ -614,7 +625,6 @@ PE_TERM PE_NAME array '=' PE_NAME { struct parse_events_term *term; - int i; ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $4, &@1, &@4)); diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index e6599e290f46..ef46c2848808 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -1,17 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 -#include "perf.h" -#include "util/util.h" +#include <stdbool.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <stdio.h> #include "util/debug.h" #include <subcmd/parse-options.h> +#include "util/perf_regs.h" #include "util/parse-regs-options.h" -int -parse_regs(const struct option *opt, const char *str, int unset) +static int +__parse_regs(const struct option *opt, const char *str, int unset, bool intr) { uint64_t *mode = (uint64_t *)opt->value; const struct sample_reg *r; char *s, *os = NULL, *p; int ret = -1; + uint64_t mask; if (unset) return 0; @@ -22,6 +27,11 @@ parse_regs(const struct option *opt, const char *str, int unset) if (*mode) return -1; + if (intr) + mask = arch__intr_reg_mask(); + else + mask = arch__user_reg_mask(); + /* str may be NULL in case no arg is passed to -I */ if (str) { /* because str is read-only */ @@ -37,19 +47,20 @@ parse_regs(const struct option *opt, const char *str, int unset) if (!strcmp(s, "?")) { fprintf(stderr, "available registers: "); for (r = sample_reg_masks; r->name; r++) { - fprintf(stderr, "%s ", r->name); + if (r->mask & mask) + fprintf(stderr, "%s ", r->name); } fputc('\n', stderr); /* just printing available regs */ return -1; } for (r = sample_reg_masks; r->name; r++) { - if (!strcasecmp(s, r->name)) + if ((r->mask & mask) && !strcasecmp(s, r->name)) break; } if (!r->name) { - ui__warning("unknown register %s," - " check man page\n", s); + ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n", + s, intr ? "-I" : "--user-regs="); goto error; } @@ -65,8 +76,20 @@ parse_regs(const struct option *opt, const char *str, int unset) /* default to all possible regs */ if (*mode == 0) - *mode = PERF_REGS_MASK; + *mode = mask; error: free(os); return ret; } + +int +parse_user_regs(const struct option *opt, const char *str, int unset) +{ + return __parse_regs(opt, str, unset, false); +} + +int +parse_intr_regs(const struct option *opt, const char *str, int unset) +{ + return __parse_regs(opt, str, unset, true); +} diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h index cdefb1acf6be..2b23d25c6394 100644 --- a/tools/perf/util/parse-regs-options.h +++ b/tools/perf/util/parse-regs-options.h @@ -2,5 +2,6 @@ #ifndef _PERF_PARSE_REGS_OPTIONS_H #define _PERF_PARSE_REGS_OPTIONS_H 1 struct option; -int parse_regs(const struct option *opt, const char *str, int unset); +int parse_user_regs(const struct option *opt, const char *str, int unset); +int parse_intr_regs(const struct option *opt, const char *str, int unset); #endif /* _PERF_PARSE_REGS_OPTIONS_H */ diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 2acfcc527cac..2774cec1f15f 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -13,6 +13,16 @@ int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, return SDT_ARG_SKIP; } +uint64_t __weak arch__intr_reg_mask(void) +{ + return PERF_REGS_MASK; +} + +uint64_t __weak arch__user_reg_mask(void) +{ + return PERF_REGS_MASK; +} + #ifdef HAVE_PERF_REGS_SUPPORT int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index c9319f8d17a6..47fe34e5f7d5 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -12,6 +12,7 @@ struct sample_reg { uint64_t mask; }; #define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } +#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } #define SMPL_REG_END { .name = NULL } extern const struct sample_reg sample_reg_masks[]; @@ -22,16 +23,22 @@ enum { }; int arch_sdt_arg_parse_op(char *old_op, char **new_op); +uint64_t arch__intr_reg_mask(void); +uint64_t arch__user_reg_mask(void); #ifdef HAVE_PERF_REGS_SUPPORT #include <perf_regs.h> +#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) + int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); #else #define PERF_REGS_MASK 0 #define PERF_REGS_MAX 0 +#define DWARF_MINIMAL_REGS PERF_REGS_MASK + static inline const char *perf_reg_name(int id __maybe_unused) { return NULL; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e0429f4ef335..f32b710347db 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/list.h> #include <linux/compiler.h> +#include <linux/string.h> +#include <linux/zalloc.h> #include <sys/types.h> #include <errno.h> #include <fcntl.h> @@ -13,7 +15,6 @@ #include <api/fs/fs.h> #include <locale.h> #include <regex.h> -#include "util.h" #include "pmu.h" #include "parse-events.h" #include "cpumap.h" @@ -394,7 +395,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; /* Remove trailing newline from sysfs file */ - rtrim(buf); + strim(buf); return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, NULL, NULL, NULL); @@ -700,6 +701,46 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } +static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) +{ + char *tmp = NULL, *tok, *str; + bool res; + + str = strdup(pmu_name); + if (!str) + return false; + + /* + * uncore alias may be from different PMU with common prefix + */ + tok = strtok_r(str, ",", &tmp); + if (strncmp(pmu_name, tok, strlen(tok))) { + res = false; + goto out; + } + + /* + * Match more complex aliases where the alias name is a comma-delimited + * list of tokens, orderly contained in the matching PMU name. + * + * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we + * match "socket" in "socketX_pmunameY" and then "pmuname" in + * "pmunameY". + */ + for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { + name = strstr(name, tok); + if (!name) { + res = false; + goto out; + } + } + + res = true; +out: + free(str); + return res; +} + /* * From the pmu_events_map, find the table of PMU events that corresponds * to the current running CPU. Then, add all PMU events from that table @@ -709,9 +750,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { int i; struct pmu_events_map *map; - struct pmu_event *pe; const char *name = pmu->name; - const char *pname; map = perf_pmu__find_map(pmu); if (!map) @@ -722,28 +761,22 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) */ i = 0; while (1) { + const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu"; + struct pmu_event *pe = &map->table[i++]; + const char *pname = pe->pmu ? pe->pmu : cpu_name; - pe = &map->table[i++]; if (!pe->name) { if (pe->metric_group || pe->metric_name) continue; break; } - if (!is_arm_pmu_core(name)) { - pname = pe->pmu ? pe->pmu : "cpu"; - - /* - * uncore alias may be from different PMU - * with common prefix - */ - if (pmu_is_uncore(name) && - !strncmp(pname, name, strlen(pname))) - goto new_alias; + if (pmu_is_uncore(name) && + pmu_uncore_alias_match(pname, name)) + goto new_alias; - if (strcmp(pname, name)) - continue; - } + if (strcmp(pname, name)) + continue; new_alias: /* need type casts to override 'const' */ @@ -1212,7 +1245,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, info->metric_expr = alias->metric_expr; info->metric_name = alias->metric_name; - list_del(&term->list); + list_del_init(&term->list); free(term); } @@ -1343,7 +1376,7 @@ static void wordwrap(char *s, int start, int max, int corr) break; s += wlen; column += n; - s = ltrim(s); + s = skip_spaces(s); } } diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c index 23e367063446..599a1543871d 100644 --- a/tools/perf/util/print_binary.c +++ b/tools/perf/util/print_binary.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "print_binary.h" #include <linux/log2.h> -#include "sane_ctype.h" +#include <linux/ctype.h> int binary__fprintf(unsigned char *data, size_t len, size_t bytes_per_line, binary__fprintf_t printer, diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 198e09ff611e..0c3b55d0617d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * probe-event.c : perf-probe definition to probe_events format converter * * Written by Masami Hiramatsu <mhiramat@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #include <inttypes.h> @@ -33,7 +19,6 @@ #include <limits.h> #include <elf.h> -#include "util.h" #include "event.h" #include "namespaces.h" #include "strlist.h" @@ -53,7 +38,8 @@ #include "session.h" #include "string2.h" -#include "sane_ctype.h" +#include <linux/ctype.h> +#include <linux/zalloc.h> #define PERFPROBE_GROUP "probe" @@ -228,9 +214,9 @@ out: static void clear_perf_probe_point(struct perf_probe_point *pp) { - free(pp->file); - free(pp->function); - free(pp->lazy_line); + zfree(&pp->file); + zfree(&pp->function); + zfree(&pp->lazy_line); } static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) @@ -1189,12 +1175,11 @@ int show_available_vars(struct perf_probe_event *pevs __maybe_unused, void line_range__clear(struct line_range *lr) { - free(lr->function); - free(lr->file); - free(lr->path); - free(lr->comp_dir); + zfree(&lr->function); + zfree(&lr->file); + zfree(&lr->path); + zfree(&lr->comp_dir); intlist__delete(lr->line_list); - memset(lr, 0, sizeof(*lr)); } int line_range__init(struct line_range *lr) @@ -2217,15 +2202,15 @@ void clear_perf_probe_event(struct perf_probe_event *pev) struct perf_probe_arg_field *field, *next; int i; - free(pev->event); - free(pev->group); - free(pev->target); + zfree(&pev->event); + zfree(&pev->group); + zfree(&pev->target); clear_perf_probe_point(&pev->point); for (i = 0; i < pev->nargs; i++) { - free(pev->args[i].name); - free(pev->args[i].var); - free(pev->args[i].type); + zfree(&pev->args[i].name); + zfree(&pev->args[i].var); + zfree(&pev->args[i].type); field = pev->args[i].field; while (field) { next = field->next; @@ -2234,8 +2219,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev) field = next; } } - free(pev->args); - memset(pev, 0, sizeof(*pev)); + zfree(&pev->args); } #define strdup_or_goto(str, label) \ @@ -2316,15 +2300,15 @@ void clear_probe_trace_event(struct probe_trace_event *tev) struct probe_trace_arg_ref *ref, *next; int i; - free(tev->event); - free(tev->group); - free(tev->point.symbol); - free(tev->point.realname); - free(tev->point.module); + zfree(&tev->event); + zfree(&tev->group); + zfree(&tev->point.symbol); + zfree(&tev->point.realname); + zfree(&tev->point.module); for (i = 0; i < tev->nargs; i++) { - free(tev->args[i].name); - free(tev->args[i].value); - free(tev->args[i].type); + zfree(&tev->args[i].name); + zfree(&tev->args[i].value); + zfree(&tev->args[i].type); ref = tev->args[i].ref; while (ref) { next = ref->next; @@ -2332,8 +2316,7 @@ void clear_probe_trace_event(struct probe_trace_event *tev) ref = next; } } - free(tev->args); - memset(tev, 0, sizeof(*tev)); + zfree(&tev->args); } struct kprobe_blacklist_node { @@ -2350,8 +2333,8 @@ static void kprobe_blacklist__delete(struct list_head *blacklist) while (!list_empty(blacklist)) { node = list_first_entry(blacklist, struct kprobe_blacklist_node, list); - list_del(&node->list); - free(node->symbol); + list_del_init(&node->list); + zfree(&node->symbol); free(node); } } diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 4062bc4412a9..c2998f90b23c 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -1,18 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * probe-file.c : operate ftrace k/uprobe events files * * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #include <errno.h> #include <fcntl.h> @@ -20,8 +10,8 @@ #include <sys/types.h> #include <sys/uio.h> #include <unistd.h> +#include <linux/zalloc.h> #include "namespaces.h" -#include "util.h" #include "event.h" #include "strlist.h" #include "strfilter.h" diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c37fbef1711d..7d8c99734928 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * probe-finder.c : C expression to kprobe event converter * * Written by Masami Hiramatsu <mhiramat@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #include <inttypes.h> @@ -33,11 +19,11 @@ #include <dwarf-regs.h> #include <linux/bitops.h> +#include <linux/zalloc.h> #include "event.h" #include "dso.h" #include "debug.h" #include "intlist.h" -#include "util.h" #include "strlist.h" #include "symbol.h" #include "probe-finder.h" diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 16252980ff00..670c477bf8cf 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -5,7 +5,7 @@ #include <stdbool.h> #include "intlist.h" #include "probe-event.h" -#include "sane_ctype.h" +#include <linux/ctype.h> #define MAX_PROBE_BUFFER 1024 #define MAX_PROBES 128 diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c index 797fe1ae2d2e..28de8a4c2ce8 100644 --- a/tools/perf/util/pstack.c +++ b/tools/perf/util/pstack.c @@ -5,10 +5,10 @@ * (c) 2010 Arnaldo Carvalho de Melo <acme@redhat.com> */ -#include "util.h" #include "pstack.h" #include "debug.h" #include <linux/kernel.h> +#include <linux/zalloc.h> #include <stdlib.h> struct pstack { diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 7aa0ea64544e..ceb8afdf9a89 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -6,7 +6,7 @@ # util/python.c -util/ctype.c +../lib/ctype.c util/evlist.c util/evsel.c util/cpumap.c @@ -16,7 +16,9 @@ util/namespaces.c ../lib/bitmap.c ../lib/find_bit.c ../lib/hweight.c +../lib/string.c ../lib/vsprintf.c +../lib/zalloc.c util/thread_map.c util/util.c util/xyarray.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index dda0ac978b1e..1e5b6718dcea 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -12,6 +12,7 @@ #include "print_binary.h" #include "thread_map.h" #include "mmap.h" +#include "util.h" #if PY_MAJOR_VERSION < 3 #define _PyUnicode_FromString(arg) \ @@ -342,7 +343,7 @@ static bool is_tracepoint(struct pyrf_event *pevent) static PyObject* tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) { - struct tep_handle *pevent = field->event->pevent; + struct tep_handle *pevent = field->event->tep; void *data = pe->sample.raw_data; PyObject *ret = NULL; unsigned long long val; diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 11e07fab20dc..f399b7ec4d8d 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -1,8 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Based on strlist.c by: * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Licensed under the GPLv2. */ #include <errno.h> diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index c215704931dc..83d2e149ef19 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -17,8 +17,8 @@ * see Documentation/perf.data-file-format.txt. * PERF_RECORD_AUXTRACE_INFO: * Defines a table of contains for PERF_RECORD_AUXTRACE records. This - * record is generated during 'perf record' command. Each record contains up - * to 256 entries describing offset and size of the AUXTRACE data in the + * record is generated during 'perf record' command. Each record contains + * up to 256 entries describing offset and size of the AUXTRACE data in the * perf.data file. * PERF_RECORD_AUXTRACE_ERROR: * Indicates an error during AUXTRACE collection such as buffer overflow. @@ -146,6 +146,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <linux/zalloc.h> #include <sys/stat.h> #include <sys/types.h> @@ -156,7 +157,6 @@ #include "evlist.h" #include "machine.h" #include "session.h" -#include "util.h" #include "thread.h" #include "debug.h" #include "auxtrace.h" @@ -237,10 +237,33 @@ static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf, return rc; } -/* Display s390 CPU measurement facility basic-sampling data entry */ +/* Display s390 CPU measurement facility basic-sampling data entry + * Data written on s390 in big endian byte order and contains bit + * fields across byte boundaries. + */ static bool s390_cpumsf_basic_show(const char *color, size_t pos, - struct hws_basic_entry *basic) + struct hws_basic_entry *basicp) { + struct hws_basic_entry *basic = basicp; +#if __BYTE_ORDER == __LITTLE_ENDIAN + struct hws_basic_entry local; + unsigned long long word = be64toh(*(unsigned long long *)basicp); + + memset(&local, 0, sizeof(local)); + local.def = be16toh(basicp->def); + local.prim_asn = word & 0xffff; + local.CL = word >> 30 & 0x3; + local.I = word >> 32 & 0x1; + local.AS = word >> 33 & 0x3; + local.P = word >> 35 & 0x1; + local.W = word >> 36 & 0x1; + local.T = word >> 37 & 0x1; + local.U = word >> 40 & 0xf; + local.ia = be64toh(basicp->ia); + local.gpp = be64toh(basicp->gpp); + local.hpp = be64toh(basicp->hpp); + basic = &local; +#endif if (basic->def != 1) { pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); return false; @@ -258,10 +281,22 @@ static bool s390_cpumsf_basic_show(const char *color, size_t pos, return true; } -/* Display s390 CPU measurement facility diagnostic-sampling data entry */ +/* Display s390 CPU measurement facility diagnostic-sampling data entry. + * Data written on s390 in big endian byte order and contains bit + * fields across byte boundaries. + */ static bool s390_cpumsf_diag_show(const char *color, size_t pos, - struct hws_diag_entry *diag) + struct hws_diag_entry *diagp) { + struct hws_diag_entry *diag = diagp; +#if __BYTE_ORDER == __LITTLE_ENDIAN + struct hws_diag_entry local; + unsigned long long word = be64toh(*(unsigned long long *)diagp); + + local.def = be16toh(diagp->def); + local.I = word >> 32 & 0x1; + diag = &local; +#endif if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); return false; @@ -272,35 +307,52 @@ static bool s390_cpumsf_diag_show(const char *color, size_t pos, } /* Return TOD timestamp contained in an trailer entry */ -static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) +static unsigned long long trailer_timestamp(struct hws_trailer_entry *te, + int idx) { /* te->t set: TOD in STCKE format, bytes 8-15 * to->t not set: TOD in STCK format, bytes 0-7 */ unsigned long long ts; - memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); - return ts; + memcpy(&ts, &te->timestamp[idx], sizeof(ts)); + return be64toh(ts); } /* Display s390 CPU measurement facility trailer entry */ static bool s390_cpumsf_trailer_show(const char *color, size_t pos, struct hws_trailer_entry *te) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + struct hws_trailer_entry local; + const unsigned long long flags = be64toh(te->flags); + + memset(&local, 0, sizeof(local)); + local.f = flags >> 63 & 0x1; + local.a = flags >> 62 & 0x1; + local.t = flags >> 61 & 0x1; + local.bsdes = be16toh((flags >> 16 & 0xffff)); + local.dsdes = be16toh((flags & 0xffff)); + memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp)); + local.overflow = be64toh(te->overflow); + local.clock_base = be64toh(te->progusage[0]) >> 63 & 1; + local.progusage2 = be64toh(te->progusage2); + te = &local; +#endif if (te->bsdes != sizeof(struct hws_basic_entry)) { pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); return false; } color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" " dsdes:%d Overflow:%lld Time:%#llx\n" - "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", + "\t\tC:%d TOD:%#lx\n", pos, te->f ? 'F' : ' ', te->a ? 'A' : ' ', te->t ? 'T' : ' ', te->bsdes, te->dsdes, te->overflow, - trailer_timestamp(te), te->clock_base, te->progusage2, - te->progusage[0], te->progusage[1]); + trailer_timestamp(te, te->clock_base), + te->clock_base, te->progusage2); return true; } @@ -327,13 +379,13 @@ static bool s390_cpumsf_validate(int machine_type, *dsdes = *bsdes = 0; if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ return false; - if (basic->def != 1) /* No basic set entry, must be first */ + if (be16toh(basic->def) != 1) /* No basic set entry, must be first */ return false; /* Check for trailer entry at end of SDB */ te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ - sizeof(*te)); - *bsdes = te->bsdes; - *dsdes = te->dsdes; + *bsdes = be16toh(te->bsdes); + *dsdes = be16toh(te->dsdes); if (!te->bsdes && !te->dsdes) { /* Very old hardware, use CPUID */ switch (machine_type) { @@ -495,19 +547,27 @@ static bool s390_cpumsf_make_event(size_t pos, static unsigned long long get_trailer_time(const unsigned char *buf) { struct hws_trailer_entry *te; - unsigned long long aux_time; + unsigned long long aux_time, progusage2; + bool clock_base; te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ - sizeof(*te)); - if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + clock_base = be64toh(te->progusage[0]) >> 63 & 0x1; + progusage2 = be64toh(te->progusage[1]); +#else + clock_base = te->clock_base; + progusage2 = te->progusage2; +#endif + if (!clock_base) /* TOD_CLOCK_BASE value missing */ return 0; /* Correct calculation to convert time stamp in trailer entry to * nano seconds (taken from arch/s390 function tod_to_ns()). * TOD_CLOCK_BASE is stored in trailer entry member progusage2. */ - aux_time = trailer_timestamp(te) - te->progusage2; + aux_time = trailer_timestamp(te, clock_base) - progusage2; aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); return aux_time; } @@ -696,7 +756,7 @@ static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, */ if (err) { sfq->buffer = NULL; - list_del(&buffer->list); + list_del_init(&buffer->list); auxtrace_buffer__free(buffer); if (err > 0) /* Buffer done, no error */ err = 0; @@ -984,7 +1044,7 @@ static void s390_cpumsf_free(struct perf_session *session) auxtrace_heap__free(&sf->heap); s390_cpumsf_free_queues(session); session->auxtrace = NULL; - free(sf->logdir); + zfree(&sf->logdir); free(sf); } @@ -1041,8 +1101,7 @@ static int s390_cpumsf__config(const char *var, const char *value, void *cb) if (rc == -1 || !S_ISDIR(stbuf.st_mode)) { pr_err("Missing auxtrace log directory %s," " continue with current directory...\n", value); - free(sf->logdir); - sf->logdir = NULL; + zfree(&sf->logdir); } return 1; } @@ -1102,7 +1161,7 @@ err_free_queues: auxtrace_queues__free(&sf->queues); session->auxtrace = NULL; err_free: - free(sf->logdir); + zfree(&sf->logdir); free(sf); return err; } diff --git a/tools/perf/util/sane_ctype.h b/tools/perf/util/sane_ctype.h deleted file mode 100644 index c2b42ff9ff32..000000000000 --- a/tools/perf/util/sane_ctype.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PERF_SANE_CTYPE_H -#define _PERF_SANE_CTYPE_H - -extern const char *graph_line; -extern const char *graph_dotted_line; -extern const char *spaces; -extern const char *dots; - -/* Sane ctype - no locale, and works with signed chars */ -#undef isascii -#undef isspace -#undef isdigit -#undef isxdigit -#undef isalpha -#undef isprint -#undef isalnum -#undef islower -#undef isupper -#undef tolower -#undef toupper - -extern unsigned char sane_ctype[256]; -#define GIT_SPACE 0x01 -#define GIT_DIGIT 0x02 -#define GIT_ALPHA 0x04 -#define GIT_GLOB_SPECIAL 0x08 -#define GIT_REGEX_SPECIAL 0x10 -#define GIT_PRINT_EXTRA 0x20 -#define GIT_PRINT 0x3E -#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) -#define isascii(x) (((x) & ~0x7f) == 0) -#define isspace(x) sane_istest(x,GIT_SPACE) -#define isdigit(x) sane_istest(x,GIT_DIGIT) -#define isxdigit(x) \ - (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G') -#define isalpha(x) sane_istest(x,GIT_ALPHA) -#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define isprint(x) sane_istest(x,GIT_PRINT) -#define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20)) -#define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20)) -#define tolower(x) sane_case((unsigned char)(x), 0x20) -#define toupper(x) sane_case((unsigned char)(x), 0) - -static inline int sane_case(int x, int high) -{ - if (sane_istest(x, GIT_ALPHA)) - x = (x & ~0x20) | high; - return x; -} - -#endif /* _PERF_SANE_CTYPE_H */ diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 5f06378a482b..61aa7f3df915 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -372,7 +372,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, ns = nsecs - s * NSEC_PER_SEC; scripting_context->event_data = data; - scripting_context->pevent = evsel->tp_format->pevent; + scripting_context->pevent = evsel->tp_format->tep; ENTER; SAVETMPS; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 09604c6508f0..112bed65232f 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -112,6 +112,7 @@ struct tables { PyObject *sample_handler; PyObject *call_path_handler; PyObject *call_return_handler; + PyObject *synth_handler; bool db_export_mode; }; @@ -837,7 +838,7 @@ static void python_process_tracepoint(struct perf_sample *sample, ns = nsecs - s * NSEC_PER_SEC; scripting_context->event_data = data; - scripting_context->pevent = evsel->tp_format->pevent; + scripting_context->pevent = evsel->tp_format->tep; context = _PyCapsule_New(scripting_context, NULL, NULL); @@ -947,6 +948,12 @@ static int tuple_set_string(PyObject *t, unsigned int pos, const char *s) return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s)); } +static int tuple_set_bytes(PyObject *t, unsigned int pos, void *bytes, + unsigned int sz) +{ + return PyTuple_SetItem(t, pos, _PyBytes_FromStringAndSize(bytes, sz)); +} + static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel) { struct tables *tables = container_of(dbe, struct tables, dbe); @@ -1105,13 +1112,13 @@ static int python_export_branch_type(struct db_export *dbe, u32 branch_type, return 0; } -static int python_export_sample(struct db_export *dbe, - struct export_sample *es) +static void python_export_sample_table(struct db_export *dbe, + struct export_sample *es) { struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(22); + t = tuple_new(24); tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); @@ -1135,10 +1142,39 @@ static int python_export_sample(struct db_export *dbe, tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK); tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX)); tuple_set_u64(t, 21, es->call_path_id); + tuple_set_u64(t, 22, es->sample->insn_cnt); + tuple_set_u64(t, 23, es->sample->cyc_cnt); call_object(tables->sample_handler, t, "sample_table"); Py_DECREF(t); +} + +static void python_export_synth(struct db_export *dbe, struct export_sample *es) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(3); + + tuple_set_u64(t, 0, es->db_id); + tuple_set_u64(t, 1, es->evsel->attr.config); + tuple_set_bytes(t, 2, es->sample->raw_data, es->sample->raw_size); + + call_object(tables->synth_handler, t, "synth_data"); + + Py_DECREF(t); +} + +static int python_export_sample(struct db_export *dbe, + struct export_sample *es) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + + python_export_sample_table(dbe, es); + + if (es->evsel->attr.type == PERF_TYPE_SYNTH && tables->synth_handler) + python_export_synth(dbe, es); return 0; } @@ -1173,7 +1209,7 @@ static int python_export_call_return(struct db_export *dbe, u64 comm_db_id = cr->comm ? cr->comm->db_id : 0; PyObject *t; - t = tuple_new(12); + t = tuple_new(14); tuple_set_u64(t, 0, cr->db_id); tuple_set_u64(t, 1, cr->thread->db_id); @@ -1187,6 +1223,8 @@ static int python_export_call_return(struct db_export *dbe, tuple_set_u64(t, 9, cr->cp->parent->db_id); tuple_set_s32(t, 10, cr->flags); tuple_set_u64(t, 11, cr->parent_db_id); + tuple_set_u64(t, 12, cr->insn_count); + tuple_set_u64(t, 13, cr->cyc_count); call_object(tables->call_return_handler, t, "call_return_table"); @@ -1473,6 +1511,14 @@ static void set_table_handlers(struct tables *tables) SET_TABLE_HANDLER(sample); SET_TABLE_HANDLER(call_path); SET_TABLE_HANDLER(call_return); + + /* + * Synthesized events are samples but with architecture-specific data + * stored in sample->raw_data. They are exported via + * python_export_sample() and consequently do not need a separate export + * callback. + */ + tables->synth_handler = get_handler("synth_data"); } #if PY_MAJOR_VERSION < 3 diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index bad5f87ae001..d0fd6c614e68 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2,6 +2,7 @@ #include <errno.h> #include <inttypes.h> #include <linux/kernel.h> +#include <linux/zalloc.h> #include <traceevent/event-parse.h> #include <api/fs/fs.h> @@ -18,7 +19,6 @@ #include "session.h" #include "tool.h" #include "sort.h" -#include "util.h" #include "cpumap.h" #include "perf_regs.h" #include "asm/bug.h" @@ -29,6 +29,61 @@ #include "stat.h" #include "arch/common.h" +#ifdef HAVE_ZSTD_SUPPORT +static int perf_session__process_compressed_event(struct perf_session *session, + union perf_event *event, u64 file_offset) +{ + void *src; + size_t decomp_size, src_size; + u64 decomp_last_rem = 0; + size_t decomp_len = session->header.env.comp_mmap_len; + struct decomp *decomp, *decomp_last = session->decomp_last; + + decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (decomp == MAP_FAILED) { + pr_err("Couldn't allocate memory for decompression\n"); + return -1; + } + + decomp->file_pos = file_offset; + decomp->head = 0; + + if (decomp_last) { + decomp_last_rem = decomp_last->size - decomp_last->head; + memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem); + decomp->size = decomp_last_rem; + } + + src = (void *)event + sizeof(struct compressed_event); + src_size = event->pack.header.size - sizeof(struct compressed_event); + + decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size, + &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); + if (!decomp_size) { + munmap(decomp, sizeof(struct decomp) + decomp_len); + pr_err("Couldn't decompress data\n"); + return -1; + } + + decomp->size += decomp_size; + + if (session->decomp == NULL) { + session->decomp = decomp; + session->decomp_last = decomp; + } else { + session->decomp_last->next = decomp; + session->decomp_last = decomp; + } + + pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size); + + return 0; +} +#else /* !HAVE_ZSTD_SUPPORT */ +#define perf_session__process_compressed_event perf_session__process_compressed_event_stub +#endif + static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool, @@ -197,6 +252,21 @@ static void perf_session__delete_threads(struct perf_session *session) machine__delete_threads(&session->machines.host); } +static void perf_session__release_decomp_events(struct perf_session *session) +{ + struct decomp *next, *decomp; + size_t decomp_len; + next = session->decomp; + decomp_len = session->header.env.comp_mmap_len; + do { + decomp = next; + if (decomp == NULL) + break; + next = decomp->next; + munmap(decomp, decomp_len + sizeof(struct decomp)); + } while (1); +} + void perf_session__delete(struct perf_session *session) { if (session == NULL) @@ -205,6 +275,7 @@ void perf_session__delete(struct perf_session *session) auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); perf_session__delete_threads(session); + perf_session__release_decomp_events(session); perf_env__exit(&session->header.env); machines__exit(&session->machines); if (session->data) @@ -358,6 +429,14 @@ static int process_stat_round_stub(struct perf_session *perf_session __maybe_unu return 0; } +static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused, + u64 file_offset __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + void perf_tool__fill_defaults(struct perf_tool *tool) { if (tool->sample == NULL) @@ -430,6 +509,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->time_conv = process_event_op2_stub; if (tool->feature == NULL) tool->feature = process_event_op2_stub; + if (tool->compressed == NULL) + tool->compressed = perf_session__process_compressed_event; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -566,6 +647,26 @@ static void perf_event__throttle_swap(union perf_event *event, swap_sample_id_all(event, &event->throttle + 1); } +static void perf_event__namespaces_swap(union perf_event *event, + bool sample_id_all) +{ + u64 i; + + event->namespaces.pid = bswap_32(event->namespaces.pid); + event->namespaces.tid = bswap_32(event->namespaces.tid); + event->namespaces.nr_namespaces = bswap_64(event->namespaces.nr_namespaces); + + for (i = 0; i < event->namespaces.nr_namespaces; i++) { + struct perf_ns_link_info *ns = &event->namespaces.link_info[i]; + + ns->dev = bswap_64(ns->dev); + ns->ino = bswap_64(ns->ino); + } + + if (sample_id_all) + swap_sample_id_all(event, &event->namespaces.link_info[i]); +} + static u8 revbyte(u8 b) { int rev = (b >> 4) | ((b & 0xf) << 4); @@ -806,6 +907,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, [PERF_RECORD_SWITCH] = perf_event__switch_swap, [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, + [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1144,9 +1246,12 @@ static void dump_read(struct perf_evsel *evsel, union perf_event *event) return; printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, - evsel ? perf_evsel__name(evsel) : "FAIL", + perf_evsel__name(evsel), event->read.value); + if (!evsel) + return; + read_format = evsel->attr.read_format; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -1373,7 +1478,9 @@ static s64 perf_session__process_user_event(struct perf_session *session, int fd = perf_data__fd(session->data); int err; - dump_event(session->evlist, event, file_offset, &sample); + if (event->header.type != PERF_RECORD_COMPRESSED || + tool->compressed == perf_session__process_compressed_event_stub) + dump_event(session->evlist, event, file_offset, &sample); /* These events are processed right away */ switch (event->header.type) { @@ -1426,6 +1533,11 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->time_conv(session, event); case PERF_RECORD_HEADER_FEATURE: return tool->feature(session, event); + case PERF_RECORD_COMPRESSED: + err = tool->compressed(session, event, file_offset); + if (err) + dump_event(session->evlist, event, file_offset, &sample); + return err; default: return -EINVAL; } @@ -1708,6 +1820,8 @@ static int perf_session__flush_thread_stacks(struct perf_session *session) volatile int session_done; +static int __perf_session__process_decomp_events(struct perf_session *session); + static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; @@ -1788,6 +1902,10 @@ more: if (skip > 0) head += skip; + err = __perf_session__process_decomp_events(session); + if (err) + goto out_err; + if (!session_done()) goto more; done: @@ -1836,6 +1954,39 @@ fetch_mmaped_event(struct perf_session *session, return event; } +static int __perf_session__process_decomp_events(struct perf_session *session) +{ + s64 skip; + u64 size, file_pos = 0; + struct decomp *decomp = session->decomp_last; + + if (!decomp) + return 0; + + while (decomp->head < decomp->size && !session_done()) { + union perf_event *event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data); + + if (!event) + break; + + size = event->header.size; + + if (size < sizeof(struct perf_event_header) || + (skip = perf_session__process_event(session, event, file_pos)) < 0) { + pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", + decomp->file_pos + decomp->head, event->header.size, event->header.type); + return -EINVAL; + } + + if (skip) + size += skip; + + decomp->head += size; + } + + return 0; +} + /* * On 64bit we can mmap the data file in one go. No need for tiny mmap * slices. On 32bit we use 32MB. @@ -1945,6 +2096,10 @@ more: head += size; file_pos += size; + err = __perf_session__process_decomp_events(session); + if (err) + goto out; + ui_progress__update(prog, size); if (session_done()) diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index d96eccd7d27f..dd8920b745bc 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -8,6 +8,7 @@ #include "machine.h" #include "data.h" #include "ordered-events.h" +#include "util/compress.h" #include <linux/kernel.h> #include <linux/rbtree.h> #include <linux/perf_event.h> @@ -35,6 +36,19 @@ struct perf_session { struct ordered_events ordered_events; struct perf_data *data; struct perf_tool *tool; + u64 bytes_transferred; + u64 bytes_compressed; + struct zstd_data zstd_data; + struct decomp *decomp; + struct decomp *decomp_last; +}; + +struct decomp { + struct decomp *next; + u64 file_pos; + u64 head; + size_t size; + char data[]; }; struct perf_tool; diff --git a/tools/perf/util/setns.c b/tools/perf/util/setns.c index ce8fc290fce8..48f9c0af63b2 100644 --- a/tools/perf/util/setns.c +++ b/tools/perf/util/setns.c @@ -1,4 +1,6 @@ -#include "util.h" +// SPDX-License-Identifier: LGPL-2.1 + +#include "namespaces.h" #include <unistd.h> #include <sys/syscall.h> diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 5b5a167b43ce..a1a68a2fa917 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -17,6 +17,8 @@ if cc == "clang": vars[var] = sub("-fcf-protection", "", vars[var]) if not clang_has_option("-fstack-clash-protection"): vars[var] = sub("-fstack-clash-protection", "", vars[var]) + if not clang_has_option("-fstack-protector-strong"): + vars[var] = sub("-fstack-protector-strong", "", vars[var]) from distutils.core import setup, Extension diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 453f6f6f29f3..3b791ef2cd50 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -23,8 +23,12 @@ int smt_on(void) char fn[256]; snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/thread_siblings", - cpu); + "devices/system/cpu/cpu%d/topology/core_cpus", cpu); + if (access(fn, F_OK) == -1) { + snprintf(fn, sizeof fn, + "devices/system/cpu/cpu%d/topology/thread_siblings", + cpu); + } if (sysfs__read_str(fn, &str, &strlen) < 0) continue; /* Entry is hex, but does not have 0x, so need custom parser */ diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ce376a73f964..a0f232151d6f 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -79,6 +79,9 @@ struct hist_entry_diff { /* HISTC_WEIGHTED_DIFF */ s64 wdiff; + + /* PERF_HPP_DIFF__CYCLES */ + s64 cycles; }; }; @@ -144,6 +147,7 @@ struct hist_entry { long time; struct hists *hists; struct mem_info *mem_info; + struct block_info *block_info; void *raw_data; u32 raw_size; int num_res; @@ -285,6 +289,15 @@ struct sort_entry { u8 se_width_idx; }; +struct block_hist { + struct hists block_hists; + struct perf_hpp_list block_list; + struct perf_hpp_fmt block_fmt; + int block_idx; + bool valid; + struct hist_entry he; +}; + extern struct sort_entry sort_thread; extern struct list_head hist_entry__sort_list; diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index fcc8630f6dff..adfcf1ff464c 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -1,18 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Manage printing of source lines * Copyright (c) 2017, Intel Corporation. * Author: Andi Kleen - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ -#include "linux/list.h" +#include <linux/list.h> +#include <linux/zalloc.h> #include <stdlib.h> #include <sys/mman.h> #include <sys/stat.h> @@ -90,12 +83,12 @@ static void fill_lines(char **lines, int maxline, char *map, int maplen) static void free_srcfile(struct srcfile *sf) { - list_del(&sf->nd); + list_del_init(&sf->nd); hlist_del(&sf->hash_nd); map_total_sz -= sf->maplen; munmap(sf->map, sf->maplen); - free(sf->lines); - free(sf->fn); + zfree(&sf->lines); + zfree(&sf->fn); free(sf); num_srcfiles--; } @@ -161,7 +154,7 @@ static struct srcfile *find_srcfile(char *fn) out_map: munmap(h->map, sz); out_fn: - free(h->fn); + zfree(&h->fn); out_h: free(h); return NULL; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 10ca1533937e..6ccf6f6d09df 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -5,11 +5,13 @@ #include <string.h> #include <linux/kernel.h> +#include <linux/string.h> +#include <linux/zalloc.h> #include "util/dso.h" -#include "util/util.h" #include "util/debug.h" #include "util/callchain.h" +#include "util/symbol_conf.h" #include "srcline.h" #include "string2.h" #include "symbol.h" @@ -287,7 +289,8 @@ static int addr2line(const char *dso_name, u64 addr, } if (a2l == NULL) { - pr_warning("addr2line_init failed for %s\n", dso_name); + if (!symbol_conf.disable_add2line_warn) + pr_warning("addr2line_init failed for %s\n", dso_name); return 0; } @@ -464,7 +467,7 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, char *srcline; struct symbol *inline_sym; - rtrim(funcname); + strim(funcname); if (getline(&filename, &filelen, fp) == -1) goto out; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 6d043c78f3c2..58df6a0dbb9f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1,5 +1,6 @@ #include <stdio.h> #include <inttypes.h> +#include <linux/string.h> #include <linux/time64.h> #include <math.h> #include "color.h" @@ -10,7 +11,7 @@ #include "thread_map.h" #include "cpumap.h" #include "string2.h" -#include "sane_ctype.h" +#include <linux/ctype.h> #include "cgroup.h" #include <math.h> #include <api/fs/fs.h> @@ -18,11 +19,6 @@ #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" -static bool is_duration_time(struct perf_evsel *evsel) -{ - return !strcmp(evsel->name, "duration_time"); -} - static void print_running(struct perf_stat_config *config, u64 run, u64 ena) { @@ -74,8 +70,9 @@ static void aggr_printout(struct perf_stat_config *config, { switch (config->aggr_mode) { case AGGR_CORE: - fprintf(config->output, "S%d-C%*d%s%*d%s", + fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", cpu_map__id_to_socket(id), + cpu_map__id_to_die(id), config->csv_output ? 0 : -8, cpu_map__id_to_cpu(id), config->csv_sep, @@ -83,6 +80,16 @@ static void aggr_printout(struct perf_stat_config *config, nr, config->csv_sep); break; + case AGGR_DIE: + fprintf(config->output, "S%d-D%*d%s%*d%s", + cpu_map__id_to_socket(id << 16), + config->csv_output ? 0 : -8, + cpu_map__id_to_die(id << 16), + config->csv_sep, + config->csv_output ? 0 : 4, + nr, + config->csv_sep); + break; case AGGR_SOCKET: fprintf(config->output, "S%*d%s%*d%s", config->csv_output ? 0 : -5, @@ -93,9 +100,18 @@ static void aggr_printout(struct perf_stat_config *config, config->csv_sep); break; case AGGR_NONE: - fprintf(config->output, "CPU%*d%s", - config->csv_output ? 0 : -4, - perf_evsel__cpus(evsel)->map[id], config->csv_sep); + if (evsel->percore) { + fprintf(config->output, "S%d-D%d-C%*d%s", + cpu_map__id_to_socket(id), + cpu_map__id_to_die(id), + config->csv_output ? 0 : -5, + cpu_map__id_to_cpu(id), config->csv_sep); + } else { + fprintf(config->output, "CPU%*d%s ", + config->csv_output ? 0 : -5, + perf_evsel__cpus(evsel)->map[id], + config->csv_sep); + } break; case AGGR_THREAD: fprintf(config->output, "%*s-%*d%s", @@ -196,13 +212,11 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused, return; } snprintf(buf, sizeof(buf), fmt, val); - ends = vals = ltrim(buf); + ends = vals = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; - while (isspace(*unit)) - unit++; - fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit); + fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, skip_spaces(unit)); } /* Filter out some columns that don't work well in metrics only mode */ @@ -266,7 +280,7 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused return; unit = fixunit(tbuf, os->evsel, unit); snprintf(buf, sizeof buf, fmt, val); - ends = vals = ltrim(buf); + ends = vals = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; @@ -404,6 +418,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, [AGGR_THREAD] = 1, [AGGR_NONE] = 1, [AGGR_SOCKET] = 2, + [AGGR_DIE] = 2, [AGGR_CORE] = 2, }; @@ -539,7 +554,8 @@ static void collect_all_aliases(struct perf_stat_config *config, struct perf_evs alias->scale != counter->scale || alias->cgrp != counter->cgrp || strcmp(alias->unit, counter->unit) || - perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter)) + perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter) || + !strcmp(alias->pmu_name, counter->pmu_name)) break; alias->merged_stat = true; cb(config, alias, data, false); @@ -599,6 +615,41 @@ static void aggr_cb(struct perf_stat_config *config, } } +static void print_counter_aggrdata(struct perf_stat_config *config, + struct perf_evsel *counter, int s, + char *prefix, bool metric_only, + bool *first) +{ + struct aggr_data ad; + FILE *output = config->output; + u64 ena, run, val; + int id, nr; + double uval; + + ad.id = id = config->aggr_map->map[s]; + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + if (!collect_data(config, counter, aggr_cb, &ad)) + return; + + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; + if (*first && metric_only) { + *first = false; + aggr_printout(config, counter, id, nr); + } + if (prefix && !metric_only) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + printout(config, id, nr, counter, uval, prefix, + run, ena, 1.0, &rt_stat); + if (!metric_only) + fputc('\n', output); +} + static void print_aggr(struct perf_stat_config *config, struct perf_evlist *evlist, char *prefix) @@ -606,9 +657,7 @@ static void print_aggr(struct perf_stat_config *config, bool metric_only = config->metric_only; FILE *output = config->output; struct perf_evsel *counter; - int s, id, nr; - double uval; - u64 ena, run, val; + int s; bool first; if (!(config->aggr_map || config->aggr_get_id)) @@ -621,36 +670,14 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - struct aggr_data ad; if (prefix && metric_only) fprintf(output, "%s", prefix); - ad.id = id = config->aggr_map->map[s]; first = true; evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; - - ad.val = ad.ena = ad.run = 0; - ad.nr = 0; - if (!collect_data(config, counter, aggr_cb, &ad)) - continue; - nr = ad.nr; - ena = ad.ena; - run = ad.run; - val = ad.val; - if (first && metric_only) { - first = false; - aggr_printout(config, counter, id, nr); - } - if (prefix && !metric_only) - fprintf(output, "%s", prefix); - - uval = val * counter->scale; - printout(config, id, nr, counter, uval, prefix, - run, ena, 1.0, &rt_stat); - if (!metric_only) - fputc('\n', output); + print_counter_aggrdata(config, counter, s, + prefix, metric_only, + &first); } if (metric_only) fputc('\n', output); @@ -848,8 +875,6 @@ static void print_no_aggr_metric(struct perf_stat_config *config, if (prefix) fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; if (first) { aggr_printout(config, counter, cpu, 0); first = false; @@ -867,7 +892,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, } static int aggr_header_lens[] = { - [AGGR_CORE] = 18, + [AGGR_CORE] = 24, + [AGGR_DIE] = 18, [AGGR_SOCKET] = 12, [AGGR_NONE] = 6, [AGGR_THREAD] = 24, @@ -876,6 +902,7 @@ static int aggr_header_lens[] = { static const char *aggr_header_csv[] = { [AGGR_CORE] = "core,cpus,", + [AGGR_DIE] = "die,cpus", [AGGR_SOCKET] = "socket,cpus", [AGGR_NONE] = "cpu,", [AGGR_THREAD] = "comm-pid,", @@ -906,8 +933,6 @@ static void print_metric_headers(struct perf_stat_config *config, /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; os.evsel = counter; out.ctx = &os; out.print_metric = print_metric_header; @@ -944,8 +969,13 @@ static void print_interval(struct perf_stat_config *config, if (!metric_only) fprintf(output, " counts %*s events\n", unit_width, "unit"); break; + case AGGR_DIE: + fprintf(output, "# time die cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; case AGGR_CORE: - fprintf(output, "# time core cpus"); + fprintf(output, "# time core cpus"); if (!metric_only) fprintf(output, " counts %*s events\n", unit_width, "unit"); break; @@ -1101,6 +1131,30 @@ static void print_footer(struct perf_stat_config *config) "the same PMU. Try reorganizing the group.\n"); } +static void print_percore(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) +{ + bool metric_only = config->metric_only; + FILE *output = config->output; + int s; + bool first = true; + + if (!(config->aggr_map || config->aggr_get_id)) + return; + + for (s = 0; s < config->aggr_map->nr; s++) { + if (prefix && metric_only) + fprintf(output, "%s", prefix); + + print_counter_aggrdata(config, counter, s, + prefix, metric_only, + &first); + } + + if (metric_only) + fputc('\n', output); +} + void perf_evlist__print_counters(struct perf_evlist *evlist, struct perf_stat_config *config, @@ -1131,20 +1185,17 @@ perf_evlist__print_counters(struct perf_evlist *evlist, switch (config->aggr_mode) { case AGGR_CORE: + case AGGR_DIE: case AGGR_SOCKET: print_aggr(config, evlist, prefix); break; case AGGR_THREAD: evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; print_aggr_thread(config, _target, counter, prefix); } break; case AGGR_GLOBAL: evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; print_counter_aggr(config, counter, prefix); } if (metric_only) @@ -1155,9 +1206,10 @@ perf_evlist__print_counters(struct perf_evlist *evlist, print_no_aggr_metric(config, evlist, prefix); else { evlist__for_each_entry(evlist, counter) { - if (is_duration_time(counter)) - continue; - print_counter(config, counter, prefix); + if (counter->percore) + print_percore(config, counter, prefix); + else + print_counter(config, counter, prefix); } } break; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 83d8094be4fe..656065af4971 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -8,10 +8,12 @@ #include "evlist.h" #include "expr.h" #include "metricgroup.h" +#include <linux/zalloc.h> /* * AGGR_GLOBAL: Use CPU 0 * AGGR_SOCKET: Use first CPU of socket + * AGGR_DIE: Use first CPU of die * AGGR_CORE: Use first CPU of core * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? @@ -303,7 +305,7 @@ static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, struct perf_evsel *c2; evlist__for_each_entry (evsel_list, c2) { - if (!strcasecmp(c2->name, name)) + if (!strcasecmp(c2->name, name) && !c2->collect_stat) return c2; } return NULL; @@ -342,7 +344,8 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) if (leader) { /* Search in group */ for_each_group_member (oc, leader) { - if (!strcasecmp(oc->name, metric_names[i])) { + if (!strcasecmp(oc->name, metric_names[i]) && + !oc->collect_stat) { found = true; break; } @@ -722,6 +725,7 @@ static void generic_metric(struct perf_stat_config *config, double ratio; int i; void *ctxp = out->ctx; + char *n, *pn; expr__ctx_init(&pctx); expr__add_id(&pctx, name, avg); @@ -741,7 +745,19 @@ static void generic_metric(struct perf_stat_config *config, stats = &v->stats; scale = 1.0; } - expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); + + n = strdup(metric_events[i]->name); + if (!n) + return; + /* + * This display code with --no-merge adds [cpu] postfixes. + * These are not supported by the parser. Remove everything + * after the space. + */ + pn = strchr(n, ' '); + if (pn) + *pn = 0; + expr__add_id(&pctx, n, avg_stats(stats)*scale); } if (!metric_events[i]) { const char *p = metric_expr; @@ -758,6 +774,9 @@ static void generic_metric(struct perf_stat_config *config, (metric_name ? metric_name : name) : "", 0); } else print_metric(config, ctxp, NULL, NULL, "", 0); + + for (i = 1; i < pctx.num_ids; i++) + zfree(&pctx.ids[i].name); } void perf_stat__print_shadow_stats(struct perf_stat_config *config, diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 2856cc9d5a31..db8a6cf336be 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -6,6 +6,7 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" +#include <linux/zalloc.h> void update_stats(struct stats *stats, u64 val) { @@ -132,7 +133,7 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) struct perf_stat_evsel *ps = evsel->stats; if (ps) - free(ps->group_data); + zfree(&ps->group_data); zfree(&evsel->stats); } @@ -272,14 +273,17 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel switch (config->aggr_mode) { case AGGR_THREAD: case AGGR_CORE: + case AGGR_DIE: case AGGR_SOCKET: case AGGR_NONE: if (!evsel->snapshot) perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); - if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->val, cpu, - &rt_stat); + if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { + perf_stat__update_shadow_stats(evsel, count->val, + cpu, &rt_stat); + } + if (config->aggr_mode == AGGR_THREAD) { if (config->stats) perf_stat__update_shadow_stats(evsel, diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2f9c9159a364..7032dd1eeac2 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -44,6 +44,7 @@ enum aggr_mode { AGGR_NONE, AGGR_GLOBAL, AGGR_SOCKET, + AGGR_DIE, AGGR_CORE, AGGR_THREAD, AGGR_UNSET, diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 23092fd6451d..2ce0dc887364 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include "debug.h" -#include "util.h" #include <linux/kernel.h> +#include <linux/zalloc.h> #include <errno.h> +#include <stdlib.h> /* * Used as the default ->buf value, so that people can always assume diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index 7f3253d44afd..78aa4c3b990d 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 -#include "util.h" #include "string2.h" #include "strfilter.h" #include <errno.h> -#include "sane_ctype.h" +#include <stdlib.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/zalloc.h> /* Operators */ static const char *OP_and = "&"; /* Logical AND */ @@ -37,8 +39,7 @@ static const char *get_token(const char *s, const char **e) { const char *p; - while (isspace(*s)) /* Skip spaces */ - s++; + s = skip_spaces(s); if (*s == '\0') { p = s; diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index d8bfd0c4d2cb..52603876c548 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -4,7 +4,16 @@ #include <linux/string.h> #include <stdlib.h> -#include "sane_ctype.h" +#include <linux/ctype.h> + +const char *graph_dotted_line = + "---------------------------------------------------------------------" + "---------------------------------------------------------------------" + "---------------------------------------------------------------------"; +const char *dots = + "....................................................................." + "....................................................................." + "....................................................................."; #define K 1024LL /* @@ -60,109 +69,6 @@ out_err: return -1; } -/* - * Helper function for splitting a string into an argv-like array. - * originally copied from lib/argv_split.c - */ -static const char *skip_sep(const char *cp) -{ - while (*cp && isspace(*cp)) - cp++; - - return cp; -} - -static const char *skip_arg(const char *cp) -{ - while (*cp && !isspace(*cp)) - cp++; - - return cp; -} - -static int count_argc(const char *str) -{ - int count = 0; - - while (*str) { - str = skip_sep(str); - if (*str) { - count++; - str = skip_arg(str); - } - } - - return count; -} - -/** - * argv_free - free an argv - * @argv - the argument vector to be freed - * - * Frees an argv and the strings it points to. - */ -void argv_free(char **argv) -{ - char **p; - for (p = argv; *p; p++) { - free(*p); - *p = NULL; - } - - free(argv); -} - -/** - * argv_split - split a string at whitespace, returning an argv - * @str: the string to be split - * @argcp: returned argument count - * - * Returns an array of pointers to strings which are split out from - * @str. This is performed by strictly splitting on white-space; no - * quote processing is performed. Multiple whitespace characters are - * considered to be a single argument separator. The returned array - * is always NULL-terminated. Returns NULL on memory allocation - * failure. - */ -char **argv_split(const char *str, int *argcp) -{ - int argc = count_argc(str); - char **argv = calloc(argc + 1, sizeof(*argv)); - char **argvp; - - if (argv == NULL) - goto out; - - if (argcp) - *argcp = argc; - - argvp = argv; - - while (*str) { - str = skip_sep(str); - - if (*str) { - const char *p = str; - char *t; - - str = skip_arg(str); - - t = strndup(p, str-p); - if (t == NULL) - goto fail; - *argvp++ = t; - } - } - *argvp = NULL; - -out: - return argv; - -fail: - argv_free(argv); - return NULL; -} - /* Character class matching */ static bool __match_charclass(const char *pat, char c, const char **npat) { @@ -303,61 +209,6 @@ int strtailcmp(const char *s1, const char *s2) return 0; } -/** - * strxfrchar - Locate and replace character in @s - * @s: The string to be searched/changed. - * @from: Source character to be replaced. - * @to: Destination character. - * - * Return pointer to the changed string. - */ -char *strxfrchar(char *s, char from, char to) -{ - char *p = s; - - while ((p = strchr(p, from)) != NULL) - *p++ = to; - - return s; -} - -/** - * ltrim - Removes leading whitespace from @s. - * @s: The string to be stripped. - * - * Return pointer to the first non-whitespace character in @s. - */ -char *ltrim(char *s) -{ - while (isspace(*s)) - s++; - - return s; -} - -/** - * rtrim - Removes trailing whitespace from @s. - * @s: The string to be stripped. - * - * Note that the first trailing whitespace is replaced with a %NUL-terminator - * in the given string @s. Returns @s. - */ -char *rtrim(char *s) -{ - size_t size = strlen(s); - char *end; - - if (!size) - return s; - - end = s + size - 1; - while (end >= s && isspace(*end)) - end--; - *(end + 1) = '\0'; - - return s; -} - char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints) { /* diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 4c68a09b97e8..708805f5573e 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -2,13 +2,15 @@ #ifndef PERF_STRING_H #define PERF_STRING_H +#include <linux/string.h> #include <linux/types.h> #include <stddef.h> #include <string.h> +extern const char *graph_dotted_line; +extern const char *dots; + s64 perf_atoll(const char *str); -char **argv_split(const char *str, int *argcp); -void argv_free(char **argv); bool strglobmatch(const char *str, const char *pat); bool strglobmatch_nocase(const char *str, const char *pat); bool strlazymatch(const char *str, const char *pat); @@ -17,15 +19,6 @@ static inline bool strisglob(const char *str) return strpbrk(str, "*?[") != NULL; } int strtailcmp(const char *s1, const char *s2); -char *strxfrchar(char *s, char from, char to); - -char *ltrim(char *s); -char *rtrim(char *s); - -static inline char *trim(char *s) -{ - return ltrim(rtrim(s)); -} char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 9de5434bb49e..8a868cbeffae 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -1,16 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Licensed under the GPLv2. */ #include "strlist.h" -#include "util.h" #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <linux/zalloc.h> static struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry) diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index f735ee038713..76cc54000483 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * svghelper.c - helper functions for outputting svg * @@ -5,11 +6,6 @@ * * Authors: * Arjan van de Ven <arjan@linux.intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. */ #include <inttypes.h> @@ -19,10 +15,10 @@ #include <string.h> #include <linux/bitmap.h> #include <linux/time64.h> +#include <linux/zalloc.h> #include "perf.h" #include "svghelper.h" -#include "util.h" #include "cpumap.h" static u64 first_time, last_time; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 4ad106a5f2c0..7d504dc22108 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -2,6 +2,7 @@ #include <fcntl.h> #include <stdio.h> #include <errno.h> +#include <stdlib.h> #include <string.h> #include <unistd.h> #include <inttypes.h> @@ -14,7 +15,9 @@ #include "machine.h" #include "vdso.h" #include "debug.h" -#include "sane_ctype.h" +#include "util.h" +#include <linux/ctype.h> +#include <linux/zalloc.h> #include <symbol/kallsyms.h> #ifndef EM_AARCH64 @@ -699,7 +702,6 @@ bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr) int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type) { - int err = -1; GElf_Ehdr ehdr; Elf *elf; int fd; @@ -793,7 +795,7 @@ out_elf_end: elf_end(elf); out_close: close(fd); - return err; + return -1; } /** @@ -1476,7 +1478,7 @@ static void kcore_copy__free_phdrs(struct kcore_copy_info *kci) struct phdr_data *p, *tmp; list_for_each_entry_safe(p, tmp, &kci->phdrs, node) { - list_del(&p->node); + list_del_init(&p->node); free(p); } } @@ -1499,7 +1501,7 @@ static void kcore_copy__free_syms(struct kcore_copy_info *kci) struct sym_data *s, *tmp; list_for_each_entry_safe(s, tmp, &kci->syms, node) { - list_del(&s->node); + list_del_init(&s->node); free(s); } } @@ -2131,11 +2133,11 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, return 0; out_free_args: - free(tmp->args); + zfree(&tmp->args); out_free_name: - free(tmp->name); + zfree(&tmp->name); out_free_prov: - free(tmp->provider); + zfree(&tmp->provider); out_free_note: free(tmp); out_err: @@ -2250,9 +2252,9 @@ int cleanup_sdt_note_list(struct list_head *sdt_notes) int nr_free = 0; list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) { - list_del(&pos->note_list); - free(pos->name); - free(pos->provider); + list_del_init(&pos->note_list); + zfree(&pos->name); + zfree(&pos->provider); free(pos); nr_free++; } diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 17edbd4f6f85..3bc8b7e3300e 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -7,9 +7,10 @@ #include <stdio.h> #include <fcntl.h> #include <string.h> +#include <stdlib.h> #include <byteswap.h> #include <sys/stat.h> - +#include <linux/zalloc.h> static bool check_need_swap(int file_endian) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5cbad55cd99d..173f3378aaa0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -25,7 +25,8 @@ #include "namespaces.h" #include "header.h" #include "path.h" -#include "sane_ctype.h" +#include <linux/ctype.h> +#include <linux/zalloc.h> #include <elf.h> #include <limits.h> @@ -1166,6 +1167,85 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) return 0; } +/* + * Merges map into map_groups by splitting the new map + * within the existing map regions. + */ +int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map) +{ + struct map *old_map; + LIST_HEAD(merged); + + for (old_map = map_groups__first(kmaps); old_map; + old_map = map_groups__next(old_map)) { + + /* no overload with this one */ + if (new_map->end < old_map->start || + new_map->start >= old_map->end) + continue; + + if (new_map->start < old_map->start) { + /* + * |new...... + * |old.... + */ + if (new_map->end < old_map->end) { + /* + * |new......| -> |new..| + * |old....| -> |old....| + */ + new_map->end = old_map->start; + } else { + /* + * |new.............| -> |new..| |new..| + * |old....| -> |old....| + */ + struct map *m = map__clone(new_map); + + if (!m) + return -ENOMEM; + + m->end = old_map->start; + list_add_tail(&m->node, &merged); + new_map->start = old_map->end; + } + } else { + /* + * |new...... + * |old.... + */ + if (new_map->end < old_map->end) { + /* + * |new..| -> x + * |old.........| -> |old.........| + */ + map__put(new_map); + new_map = NULL; + break; + } else { + /* + * |new......| -> |new...| + * |old....| -> |old....| + */ + new_map->start = old_map->end; + } + } + } + + while (!list_empty(&merged)) { + old_map = list_entry(merged.next, struct map, node); + list_del_init(&old_map->node); + map_groups__insert(kmaps, old_map); + map__put(old_map); + } + + if (new_map) { + map_groups__insert(kmaps, new_map); + map__put(new_map); + } + return 0; +} + static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { @@ -1222,7 +1302,12 @@ static int dso__load_kcore(struct dso *dso, struct map *map, while (old_map) { struct map *next = map_groups__next(old_map); - if (old_map != map) + /* + * We need to preserve eBPF maps even if they are + * covered by kcore, because we need to access + * eBPF dso for source data. + */ + if (old_map != map && !__map__is_bpf_prog(old_map)) map_groups__remove(kmaps, old_map); old_map = next; } @@ -1256,11 +1341,16 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map_groups__remove(kmaps, map); map_groups__insert(kmaps, map); map__put(map); + map__put(new_map); } else { - map_groups__insert(kmaps, new_map); + /* + * Merge kcore map into existing maps, + * and ensure that current maps (eBPF) + * stay intact. + */ + if (map_groups__merge_in(kmaps, new_map)) + goto out_err; } - - map__put(new_map); } if (machine__is(machine, "x86_64")) { @@ -2262,3 +2352,25 @@ struct mem_info *mem_info__new(void) refcount_set(&mi->refcnt, 1); return mi; } + +struct block_info *block_info__get(struct block_info *bi) +{ + if (bi) + refcount_inc(&bi->refcnt); + return bi; +} + +void block_info__put(struct block_info *bi) +{ + if (bi && refcount_dec_and_test(&bi->refcnt)) + free(bi); +} + +struct block_info *block_info__new(void) +{ + struct block_info *bi = zalloc(sizeof(*bi)); + + if (bi) + refcount_set(&bi->refcnt, 1); + return bi; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 9a8fe012910a..12755b42ea93 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -131,6 +131,17 @@ struct mem_info { refcount_t refcnt; }; +struct block_info { + struct symbol *sym; + u64 start; + u64 end; + u64 cycles; + u64 cycles_aggr; + int num; + int num_aggr; + refcount_t refcnt; +}; + struct addr_location { struct machine *machine; struct thread *thread; @@ -332,4 +343,16 @@ static inline void __mem_info__zput(struct mem_info **mi) #define mem_info__zput(mi) __mem_info__zput(&mi) +struct block_info *block_info__new(void); +struct block_info *block_info__get(struct block_info *bi); +void block_info__put(struct block_info *bi); + +static inline void __block_info__zput(struct block_info **bi) +{ + block_info__put(*bi); + *bi = NULL; +} + +#define block_info__zput(bi) __block_info__zput(&bi) + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 6c55fa6fccec..e6880789864c 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -39,7 +39,9 @@ struct symbol_conf { hide_unresolved, raw_trace, report_hierarchy, - inline_name; + report_block, + inline_name, + disable_add2line_warn; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -69,6 +71,7 @@ struct symbol_conf { *tid_list; const char *symfs; int res_sample; + int pad_output_len_dso; }; extern struct symbol_conf symbol_conf; diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 3393d7ee9401..022a9c670338 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * System call table mapper * * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #include "syscalltbl.h" @@ -18,9 +10,9 @@ #include <linux/compiler.h> #ifdef HAVE_SYSCALL_TABLE_SUPPORT +#include <linux/zalloc.h> #include <string.h> #include "string2.h" -#include "util.h" #if defined(__x86_64__) #include <asm/syscalls_64.c> diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 21c4d9b23c24..3adc65480349 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Helper functions for handling target threads/cpus * * Copyright (C) 2012, LG Electronics, Namhyung Kim <namhyung.kim@lge.com> - * - * Released under the GPL v2. */ #include "target.h" @@ -11,9 +10,9 @@ #include "debug.h" #include <pwd.h> +#include <stdlib.h> #include <string.h> - enum target_errno target__validate(struct target *target) { enum target_errno ret = TARGET_ERRNO__SUCCESS; diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 41942c2aaa18..15134ac9b8f1 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -1,27 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * thread-stack.c: Synthesize a thread's stack using call / return events * Copyright (c) 2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include <linux/rbtree.h> #include <linux/list.h> #include <linux/log2.h> +#include <linux/zalloc.h> #include <errno.h> +#include <stdlib.h> #include "thread.h" #include "event.h" #include "machine.h" #include "env.h" -#include "util.h" #include "debug.h" #include "symbol.h" #include "comm.h" @@ -49,6 +41,8 @@ enum retpoline_state_t { * @timestamp: timestamp (if known) * @ref: external reference (e.g. db_id of sample) * @branch_count: the branch count when the entry was created + * @insn_count: the instruction count when the entry was created + * @cyc_count the cycle count when the entry was created * @db_id: id used for db-export * @cp: call path * @no_call: a 'call' was not seen @@ -60,6 +54,8 @@ struct thread_stack_entry { u64 timestamp; u64 ref; u64 branch_count; + u64 insn_count; + u64 cyc_count; u64 db_id; struct call_path *cp; bool no_call; @@ -75,6 +71,8 @@ struct thread_stack_entry { * @sz: current maximum stack size * @trace_nr: current trace number * @branch_count: running branch count + * @insn_count: running instruction count + * @cyc_count running cycle count * @kernel_start: kernel start address * @last_time: last timestamp * @crp: call/return processor @@ -88,6 +86,8 @@ struct thread_stack { size_t sz; u64 trace_nr; u64 branch_count; + u64 insn_count; + u64 cyc_count; u64 kernel_start; u64 last_time; struct call_return_processor *crp; @@ -289,6 +289,8 @@ static int thread_stack__call_return(struct thread *thread, cr.call_time = tse->timestamp; cr.return_time = timestamp; cr.branch_count = ts->branch_count - tse->branch_count; + cr.insn_count = ts->insn_count - tse->insn_count; + cr.cyc_count = ts->cyc_count - tse->cyc_count; cr.db_id = tse->db_id; cr.call_ref = tse->ref; cr.return_ref = ref; @@ -544,6 +546,8 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, tse->timestamp = timestamp; tse->ref = ref; tse->branch_count = ts->branch_count; + tse->insn_count = ts->insn_count; + tse->cyc_count = ts->cyc_count; tse->cp = cp; tse->no_call = no_call; tse->trace_end = trace_end; @@ -625,6 +629,23 @@ static int thread_stack__bottom(struct thread_stack *ts, true, false); } +static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts, + struct perf_sample *sample, u64 ref) +{ + u64 tm = sample->time; + int err; + + /* Return to userspace, so pop all kernel addresses */ + while (thread_stack__in_kernel(ts)) { + err = thread_stack__call_return(thread, ts, --ts->cnt, + tm, ref, true); + if (err) + return err; + } + + return 0; +} + static int thread_stack__no_call_return(struct thread *thread, struct thread_stack *ts, struct perf_sample *sample, @@ -644,12 +665,9 @@ static int thread_stack__no_call_return(struct thread *thread, if (ip >= ks && addr < ks) { /* Return to userspace, so pop all kernel addresses */ - while (thread_stack__in_kernel(ts)) { - err = thread_stack__call_return(thread, ts, --ts->cnt, - tm, ref, true); - if (err) - return err; - } + err = thread_stack__pop_ks(thread, ts, sample, ref); + if (err) + return err; /* If the stack is empty, push the userspace address */ if (!ts->cnt) { @@ -659,12 +677,9 @@ static int thread_stack__no_call_return(struct thread *thread, } } else if (thread_stack__in_kernel(ts) && ip < ks) { /* Return to userspace, so pop all kernel addresses */ - while (thread_stack__in_kernel(ts)) { - err = thread_stack__call_return(thread, ts, --ts->cnt, - tm, ref, true); - if (err) - return err; - } + err = thread_stack__pop_ks(thread, ts, sample, ref); + if (err) + return err; } if (ts->cnt) @@ -874,6 +889,8 @@ int thread_stack__process(struct thread *thread, struct comm *comm, } ts->branch_count += 1; + ts->insn_count += sample->insn_cnt; + ts->cyc_count += sample->cyc_cnt; ts->last_time = sample->time; if (sample->flags & PERF_IP_FLAG_CALL) { @@ -905,7 +922,18 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->rstate = X86_RETPOLINE_DETECTED; } else if (sample->flags & PERF_IP_FLAG_RETURN) { - if (!sample->ip || !sample->addr) + if (!sample->addr) { + u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET | + PERF_IP_FLAG_INTERRUPT; + + if (!(sample->flags & return_from_kernel)) + return 0; + + /* Pop kernel stack */ + return thread_stack__pop_ks(thread, ts, sample, ref); + } + + if (!sample->ip) return 0; /* x86 retpoline 'return' doesn't match the stack */ diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 9c45f947f5a9..e1ec5a58f1b2 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * thread-stack.h: Synthesize a thread's stack using call / return events * Copyright (c) 2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #ifndef __PERF_THREAD_STACK_H @@ -52,6 +43,8 @@ enum { * @call_time: timestamp of call (if known) * @return_time: timestamp of return (if known) * @branch_count: number of branches seen between call and return + * @insn_count: approx. number of instructions between call and return + * @cyc_count: approx. number of cycles between call and return * @call_ref: external reference to 'call' sample (e.g. db_id) * @return_ref: external reference to 'return' sample (e.g. db_id) * @db_id: id used for db-export @@ -65,6 +58,8 @@ struct call_return { u64 call_time; u64 return_time; u64 branch_count; + u64 insn_count; + u64 cyc_count; u64 call_ref; u64 return_ref; u64 db_id; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 50678d318185..873ab505ca80 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -5,16 +5,17 @@ #include <stdio.h> #include <string.h> #include <linux/kernel.h> +#include <linux/zalloc.h> #include "session.h" #include "thread.h" #include "thread-stack.h" -#include "util.h" #include "debug.h" #include "namespaces.h" #include "comm.h" #include "map.h" #include "symbol.h" #include "unwind.h" +#include "callchain.h" #include <api/fs/fs.h> @@ -92,14 +93,14 @@ void thread__delete(struct thread *thread) down_write(&thread->namespaces_lock); list_for_each_entry_safe(namespaces, tmp_namespaces, &thread->namespaces_list, list) { - list_del(&namespaces->list); + list_del_init(&namespaces->list); namespaces__free(namespaces); } up_write(&thread->namespaces_lock); down_write(&thread->comm_lock); list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) { - list_del(&comm->list); + list_del_init(&comm->list); comm__free(comm); } up_write(&thread->comm_lock); @@ -124,15 +125,32 @@ void thread__put(struct thread *thread) { if (thread && refcount_dec_and_test(&thread->refcnt)) { /* - * Remove it from the dead_threads list, as last reference - * is gone. + * Remove it from the dead threads list, as last reference is + * gone, if it is in a dead threads list. + * + * We may not be there anymore if say, the machine where it was + * stored was already deleted, so we already removed it from + * the dead threads and some other piece of code still keeps a + * reference. + * + * This is what 'perf sched' does and finally drops it in + * perf_sched__lat(), where it calls perf_sched__read_events(), + * that processes the events by creating a session and deleting + * it, which ends up destroying the list heads for the dead + * threads, but before it does that it removes all threads from + * it using list_del_init(). + * + * So we need to check here if it is in a dead threads list and + * if so, remove it before finally deleting the thread, to avoid + * an use after free situation. */ - list_del_init(&thread->node); + if (!list_empty(&thread->node)) + list_del_init(&thread->node); thread__delete(thread); } } -struct namespaces *thread__namespaces(const struct thread *thread) +static struct namespaces *__thread__namespaces(const struct thread *thread) { if (list_empty(&thread->namespaces_list)) return NULL; @@ -140,10 +158,21 @@ struct namespaces *thread__namespaces(const struct thread *thread) return list_first_entry(&thread->namespaces_list, struct namespaces, list); } +struct namespaces *thread__namespaces(struct thread *thread) +{ + struct namespaces *ns; + + down_read(&thread->namespaces_lock); + ns = __thread__namespaces(thread); + up_read(&thread->namespaces_lock); + + return ns; +} + static int __thread__set_namespaces(struct thread *thread, u64 timestamp, struct namespaces_event *event) { - struct namespaces *new, *curr = thread__namespaces(thread); + struct namespaces *new, *curr = __thread__namespaces(thread); new = namespaces__new(event); if (!new) @@ -259,13 +288,13 @@ static const char *__thread__comm_str(const struct thread *thread) return comm__str(comm); } -const char *thread__comm_str(const struct thread *thread) +const char *thread__comm_str(struct thread *thread) { const char *str; - down_read((struct rw_semaphore *)&thread->comm_lock); + down_read(&thread->comm_lock); str = __thread__comm_str(thread); - up_read((struct rw_semaphore *)&thread->comm_lock); + up_read(&thread->comm_lock); return str; } @@ -327,7 +356,7 @@ static int thread__prepare_access(struct thread *thread) { int err = 0; - if (symbol_conf.use_callchain) + if (dwarf_callchain_users) err = __thread__prepare_access(thread); return err; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index cf8375c017a0..e97ef6977eb9 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -76,7 +76,7 @@ static inline void thread__exited(struct thread *thread) thread->dead = true; } -struct namespaces *thread__namespaces(const struct thread *thread); +struct namespaces *thread__namespaces(struct thread *thread); int thread__set_namespaces(struct thread *thread, u64 timestamp, struct namespaces_event *event); @@ -93,7 +93,7 @@ int thread__set_comm_from_proc(struct thread *thread); int thread__comm_len(struct thread *thread); struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); -const char *thread__comm_str(const struct thread *thread); +const char *thread__comm_str(struct thread *thread); int thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); size_t thread__fprintf(struct thread *thread, FILE *fp); diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 5d467d8ae9ab..5b3511f2b6b1 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -12,9 +12,10 @@ #include "strlist.h" #include <string.h> #include <api/fs/fs.h> +#include <linux/string.h> +#include <linux/zalloc.h> #include "asm/bug.h" #include "thread_map.h" -#include "util.h" #include "debug.h" #include "event.h" @@ -392,7 +393,7 @@ static int get_comm(char **comm, pid_t pid) * mark the end of the string. */ (*comm)[size] = 0; - rtrim(*comm); + strim(*comm); } free(path); @@ -479,7 +480,7 @@ int thread_map__remove(struct thread_map *threads, int idx) /* * Free the 'idx' item and shift the rest up. */ - free(threads->map[idx].comm); + zfree(&threads->map[idx].comm); for (i = idx; i < threads->nr - 1; i++) threads->map[i] = threads->map[i + 1]; diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 20663a460df3..c2abc259b51d 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -1,12 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdlib.h> #include <string.h> +#include <linux/string.h> #include <sys/time.h> #include <linux/time64.h> #include <time.h> #include <errno.h> #include <inttypes.h> #include <math.h> +#include <linux/ctype.h> #include "perf.h" #include "debug.h" @@ -116,6 +118,66 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) return rc; } +static int perf_time__parse_strs(struct perf_time_interval *ptime, + const char *ostr, int size) +{ + const char *cp; + char *str, *arg, *p; + int i, num = 0, rc = 0; + + /* Count the commas */ + for (cp = ostr; *cp; cp++) + num += !!(*cp == ','); + + if (!num) + return -EINVAL; + + BUG_ON(num > size); + + str = strdup(ostr); + if (!str) + return -ENOMEM; + + /* Split the string and parse each piece, except the last */ + for (i = 0, p = str; i < num - 1; i++) { + arg = p; + /* Find next comma, there must be one */ + p = skip_spaces(strchr(p, ',') + 1); + /* Skip the value, must not contain space or comma */ + while (*p && !isspace(*p)) { + if (*p++ == ',') { + rc = -EINVAL; + goto out; + } + } + /* Split and parse */ + if (*p) + *p++ = 0; + rc = perf_time__parse_str(ptime + i, arg); + if (rc < 0) + goto out; + } + + /* Parse the last piece */ + rc = perf_time__parse_str(ptime + i, p); + if (rc < 0) + goto out; + + /* Check there is no overlap */ + for (i = 0; i < num - 1; i++) { + if (ptime[i].end >= ptime[i + 1].start) { + rc = -EINVAL; + goto out; + } + } + + rc = num; +out: + free(str); + + return rc; +} + static int parse_percent(double *pcnt, char *str) { char *c, *endptr; @@ -135,12 +197,30 @@ static int parse_percent(double *pcnt, char *str) return 0; } +static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt, + double end_pcnt, u64 start, u64 end) +{ + u64 total = end - start; + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + if (ptime->end > ptime->start && ptime->end != end) + ptime->end -= 1; + + return 0; +} + static int percent_slash_split(char *str, struct perf_time_interval *ptime, u64 start, u64 end) { char *p, *end_str; double pcnt, start_pcnt, end_pcnt; - u64 total = end - start; int i; /* @@ -168,15 +248,7 @@ static int percent_slash_split(char *str, struct perf_time_interval *ptime, start_pcnt = pcnt * (i - 1); end_pcnt = pcnt * i; - if (start_pcnt < 0.0 || start_pcnt > 1.0 || - end_pcnt < 0.0 || end_pcnt > 1.0) { - return -1; - } - - ptime->start = start + round(start_pcnt * total); - ptime->end = start + round(end_pcnt * total); - - return 0; + return set_percent_time(ptime, start_pcnt, end_pcnt, start, end); } static int percent_dash_split(char *str, struct perf_time_interval *ptime, @@ -184,7 +256,6 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime, { char *start_str = NULL, *end_str; double start_pcnt, end_pcnt; - u64 total = end - start; int ret; /* @@ -203,16 +274,7 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime, free(start_str); - if (start_pcnt < 0.0 || start_pcnt > 1.0 || - end_pcnt < 0.0 || end_pcnt > 1.0 || - start_pcnt > end_pcnt) { - return -1; - } - - ptime->start = start + round(start_pcnt * total); - ptime->end = start + round(end_pcnt * total); - - return 0; + return set_percent_time(ptime, start_pcnt, end_pcnt, start, end); } typedef int (*time_pecent_split)(char *, struct perf_time_interval *, @@ -389,13 +451,12 @@ bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, ptime = &ptime_buf[i]; if (timestamp >= ptime->start && - ((timestamp < ptime->end && i < num - 1) || - (timestamp <= ptime->end && i == num - 1))) { - break; + (timestamp <= ptime->end || !ptime->end)) { + return false; } } - return (i == num) ? true : false; + return true; } int perf_time__parse_for_ranges(const char *time_str, @@ -403,20 +464,20 @@ int perf_time__parse_for_ranges(const char *time_str, struct perf_time_interval **ranges, int *range_size, int *range_num) { + bool has_percent = strchr(time_str, '%'); struct perf_time_interval *ptime_range; - int size, num, ret; + int size, num, ret = -EINVAL; ptime_range = perf_time__range_alloc(time_str, &size); if (!ptime_range) return -ENOMEM; - if (perf_time__parse_str(ptime_range, time_str) != 0) { + if (has_percent) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { pr_err("HINT: no first/last sample time found in perf data.\n" "Please use latest perf binary to execute 'perf record'\n" "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); - ret = -EINVAL; goto error; } @@ -425,21 +486,20 @@ int perf_time__parse_for_ranges(const char *time_str, time_str, session->evlist->first_sample_time, session->evlist->last_sample_time); - - if (num < 0) { - pr_err("Invalid time string\n"); - ret = -EINVAL; - goto error; - } } else { - num = 1; + num = perf_time__parse_strs(ptime_range, time_str, size); } + if (num < 0) + goto error_invalid; + *range_size = size; *range_num = num; *ranges = ptime_range; return 0; +error_invalid: + pr_err("Invalid time string\n"); error: free(ptime_range); return ret; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 250391672f9f..9096a6e3de59 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -28,6 +28,7 @@ typedef int (*event_attr_op)(struct perf_tool *tool, typedef int (*event_op2)(struct perf_session *session, union perf_event *event); typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event); +typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data); typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); @@ -72,6 +73,7 @@ struct perf_tool { stat, stat_round, feature; + event_op4 compressed; event_op3 auxtrace; bool ordered_events; bool ordering_requires_timestamps; diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 4c8da8c4435f..251bbf124fb0 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> * * Refactored from builtin-top.c, see that files for further copyright notes. - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "cpumap.h" diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 8ad8e755127b..4550015b9d5d 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -1,22 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2008,2009, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include "util.h" #include <dirent.h> @@ -34,6 +18,7 @@ #include <stdbool.h> #include <linux/list.h> #include <linux/kernel.h> +#include <linux/zalloc.h> #include "../perf.h" #include "trace-event.h" diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index ad74be1f0e42..b3982e1bb4c5 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1,22 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> @@ -27,7 +11,7 @@ #include "debug.h" #include "trace-event.h" -#include "sane_ctype.h" +#include <linux/ctype.h> static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) @@ -111,7 +95,7 @@ raw_field_value(struct tep_event *event, const char *name, void *data) unsigned long long read_size(struct tep_event *event, void *ptr, int size) { - return tep_read_number(event->pevent, ptr, size); + return tep_read_number(event->tep, ptr, size); } void event_format__fprintf(struct tep_event *event, diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index efe2f58cff4e..13c1cf60d1bc 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -1,22 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <dirent.h> #include <stdio.h> @@ -442,7 +426,7 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) tep_set_flag(pevent, TEP_NSEC_OUTPUT); tep_set_file_bigendian(pevent, file_bigendian); - tep_set_host_bigendian(pevent, host_bigendian); + tep_set_local_bigendian(pevent, host_bigendian); if (do_read(buf, 1) < 0) goto out; diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index b749f812ac70..ba58f69777a1 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * trace-event-scripting. Scripting engine common and initialization code. * * Copyright (C) 2009-2010 Tom Zanussi <tzanussi@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #include <stdio.h> @@ -26,8 +12,8 @@ #include "../perf.h" #include "debug.h" -#include "util.h" #include "trace-event.h" +#include <linux/zalloc.h> struct scripting_context *scripting_context; diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index cbe0dd758e3a..01b9d89bf5bf 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -40,7 +40,7 @@ int trace_event__init(struct trace_event *t) static int trace_event__init2(void) { - int be = tep_host_bigendian(); + int be = tep_is_bigendian(); struct tep_handle *pevent; if (trace_event__init(&tevent)) @@ -49,7 +49,7 @@ static int trace_event__init2(void) pevent = tevent.pevent; tep_set_flag(pevent, TEP_NSEC_OUTPUT); tep_set_file_bigendian(pevent, be); - tep_set_host_bigendian(pevent, be); + tep_set_local_bigendian(pevent, be); tevent_initialized = true; return 0; } diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 407d0167b942..28f71ca6ce1c 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -12,6 +12,7 @@ #include "symbol.h" #include "thread.h" #include <linux/types.h> +#include <linux/zalloc.h> #include "event.h" #include "perf_regs.h" #include "callchain.h" diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index f3c666a84e4d..71a788921b62 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -25,6 +25,7 @@ #include <unistd.h> #include <sys/mman.h> #include <linux/list.h> +#include <linux/zalloc.h> #ifndef REMOTE_UNWIND_LIBUNWIND #include <libunwind.h> #include <libunwind-ptrace.h> @@ -345,7 +346,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, __func__, dso->symsrc_filename, debuglink); - free(dso->symsrc_filename); + zfree(&dso->symsrc_filename); } dso->symsrc_filename = debuglink; } else { @@ -617,8 +618,6 @@ static unw_accessors_t accessors = { static int _unwind__prepare_access(struct thread *thread) { - if (!dwarf_callchain_users) - return 0; thread->addr_space = unw_create_addr_space(&accessors, 0); if (!thread->addr_space) { pr_err("unwind: Can't create unwind address space.\n"); @@ -631,15 +630,11 @@ static int _unwind__prepare_access(struct thread *thread) static void _unwind__flush_access(struct thread *thread) { - if (!dwarf_callchain_users) - return; unw_flush_cache(thread->addr_space, 0, 0); } static void _unwind__finish_access(struct thread *thread) { - if (!dwarf_callchain_users) - return; unw_destroy_addr_space(thread->addr_space); } diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 9778b3133b77..c0811977d7d5 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -5,6 +5,7 @@ #include "session.h" #include "debug.h" #include "env.h" +#include "callchain.h" struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; @@ -24,6 +25,9 @@ int unwind__prepare_access(struct thread *thread, struct map *map, struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops; int err; + if (!dwarf_callchain_users) + return 0; + if (thread->addr_space) { pr_debug("unwind: thread map already set, dso=%s\n", map->dso->name); @@ -65,12 +69,18 @@ out_register: void unwind__flush_access(struct thread *thread) { + if (!dwarf_callchain_users) + return; + if (thread->unwind_libunwind_ops) thread->unwind_libunwind_ops->flush_access(thread); } void unwind__finish_access(struct thread *thread) { + if (!dwarf_callchain_users) + return; + if (thread->unwind_libunwind_ops) thread->unwind_libunwind_ops->finish_access(thread); } diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 070d25ceea6a..3949a60b00ae 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -9,6 +9,9 @@ */ #include "util.h" #include "debug.h" +#include <stdio.h> +#include <stdlib.h> +#include <linux/compiler.h> static __noreturn void usage_builtin(const char *err) { diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d388f80d8703..a61535cf1bca 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -434,19 +434,6 @@ size_t hex_width(u64 v) return n; } -/* - * While we find nice hex chars, build a long_val. - * Return number of chars processed. - */ -int hex2u64(const char *ptr, u64 *long_val) -{ - char *p; - - *long_val = strtoull(ptr, &p, 16); - - return p - ptr; -} - int perf_event_paranoid(void) { int value; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 09c1b0f91f65..dc7a469921e9 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -9,8 +9,6 @@ #include <fcntl.h> #include <stdbool.h> #include <stddef.h> -#include <stdlib.h> -#include <stdarg.h> #include <linux/compiler.h> #include <sys/types.h> @@ -18,13 +16,6 @@ void usage(const char *err) __noreturn; void die(const char *err, ...) __noreturn __printf(1, 2); -static inline void *zalloc(size_t size) -{ - return calloc(1, size); -} - -#define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) - struct dirent; struct nsinfo; struct strlist; @@ -43,7 +34,6 @@ ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); size_t hex_width(u64 v); -int hex2u64(const char *ptr, u64 *val); extern unsigned int page_size; int __pure cacheline_size(void); @@ -60,18 +50,10 @@ int fetch_kernel_version(unsigned int *puint, const char *perf_tip(const char *dirpath); -#ifndef HAVE_GET_CURRENT_DIR_NAME -char *get_current_dir_name(void); -#endif - #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); #endif -#ifndef HAVE_SETNS_SUPPORT -int setns(int fd, int nstype); -#endif - extern bool perf_singlethreaded; void perf_set_singlethreaded(void); diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 4b7a303e4ba8..c59154e2d124 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -3,8 +3,8 @@ #include <stdio.h> #include <stdlib.h> #include <errno.h> +#include <linux/zalloc.h> -#include "util.h" #include "values.h" #include "debug.h" diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 5031b7b22bbd..7f427bab6c12 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -16,6 +16,7 @@ #include "machine.h" #include "thread.h" #include "linux/string.h" +#include <linux/zalloc.h> #include "debug.h" /* diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c index dc95154f5646..86889ebc3514 100644 --- a/tools/perf/util/xyarray.c +++ b/tools/perf/util/xyarray.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "xyarray.h" -#include "util.h" #include <stdlib.h> #include <string.h> +#include <linux/zalloc.h> struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size) { diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c new file mode 100644 index 000000000000..23bdb9884576 --- /dev/null +++ b/tools/perf/util/zstd.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <string.h> + +#include "util/compress.h" +#include "util/debug.h" + +int zstd_init(struct zstd_data *data, int level) +{ + size_t ret; + + data->dstream = ZSTD_createDStream(); + if (data->dstream == NULL) { + pr_err("Couldn't create decompression stream.\n"); + return -1; + } + + ret = ZSTD_initDStream(data->dstream); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret)); + return -1; + } + + if (!level) + return 0; + + data->cstream = ZSTD_createCStream(); + if (data->cstream == NULL) { + pr_err("Couldn't create compression stream.\n"); + return -1; + } + + ret = ZSTD_initCStream(data->cstream, level); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret)); + return -1; + } + + return 0; +} + +int zstd_fini(struct zstd_data *data) +{ + if (data->dstream) { + ZSTD_freeDStream(data->dstream); + data->dstream = NULL; + } + + if (data->cstream) { + ZSTD_freeCStream(data->cstream); + data->cstream = NULL; + } + + return 0; +} + +size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, + void *src, size_t src_size, size_t max_record_size, + size_t process_header(void *record, size_t increment)) +{ + size_t ret, size, compressed = 0; + ZSTD_inBuffer input = { src, src_size, 0 }; + ZSTD_outBuffer output; + void *record; + + while (input.pos < input.size) { + record = dst; + size = process_header(record, 0); + compressed += size; + dst += size; + dst_size -= size; + output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ? + max_record_size : dst_size, 0 }; + ret = ZSTD_compressStream(data->cstream, &output, &input); + ZSTD_flushStream(data->cstream, &output); + if (ZSTD_isError(ret)) { + pr_err("failed to compress %ld bytes: %s\n", + (long)src_size, ZSTD_getErrorName(ret)); + memcpy(dst, src, src_size); + return src_size; + } + size = output.pos; + size = process_header(record, size); + compressed += size; + dst += size; + dst_size -= size; + } + + return compressed; +} + +size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size, + void *dst, size_t dst_size) +{ + size_t ret; + ZSTD_inBuffer input = { src, src_size, 0 }; + ZSTD_outBuffer output = { dst, dst_size, 0 }; + + while (input.pos < input.size) { + ret = ZSTD_decompressStream(data->dstream, &output, &input); + if (ZSTD_isError(ret)) { + pr_err("failed to decompress (B): %ld -> %ld : %s\n", + src_size, output.size, ZSTD_getErrorName(ret)); + break; + } + output.dst = dst + output.pos; + output.size = dst_size - output.pos; + } + + return output.pos; +} |