aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/pmu-events/arch/x86/jaketown
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/pmu-events/arch/x86/jaketown')
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/cache.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json150
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/pipeline.json12
3 files changed, 128 insertions, 40 deletions
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/cache.json b/tools/perf/pmu-events/arch/x86/jaketown/cache.json
index ee22e4a5e30d..52dc6ef40e63 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/cache.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/cache.json
@@ -31,7 +31,7 @@
},
{
"PEBS": "1",
- "PublicDescription": "This event counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"EventCode": "0xD0",
"Counter": "0,1,2,3",
"UMask": "0x41",
@@ -42,7 +42,7 @@
},
{
"PEBS": "1",
- "PublicDescription": "This event counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
"EventCode": "0xD0",
"Counter": "0,1,2,3",
"UMask": "0x42",
@@ -179,7 +179,7 @@
"CounterHTOff": "0,1,2,3"
},
{
- "PublicDescription": "This event counts L1D data line replacements. Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier. ",
+ "PublicDescription": "This event counts L1D data line replacements. Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier.",
"EventCode": "0x51",
"Counter": "0,1,2,3",
"UMask": "0x1",
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index fd7d7c438226..98c73e430b05 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -1,140 +1,232 @@
[
{
- "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Frontend_Bound"
+ },
+ {
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Frontend_Bound_SMT"
+ },
+ {
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Bad_Speculation"
+ },
+ {
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Bad_Speculation_SMT"
+ },
+ {
+ "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Backend_Bound"
+ },
+ {
+ "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Backend_Bound_SMT"
+ },
+ {
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Retiring"
+ },
+ {
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Retiring_SMT"
+ },
+ {
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
"MetricGroup": "TopDownL1",
"MetricName": "IPC"
},
{
- "BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
- "MetricGroup": "Pipeline",
+ "BriefDescription": "Uops Per Instruction",
+ "MetricGroup": "Pipeline;Retiring",
"MetricName": "UPI"
},
{
- "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
- "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
- "MetricGroup": "Frontend",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )",
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+ "MetricGroup": "PGO",
"MetricName": "IFetch_Line_Utilization"
},
{
- "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
- "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+ "MetricGroup": "DSB;Frontend_Bandwidth",
"MetricName": "DSB_Coverage"
},
{
- "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricGroup": "Pipeline;Summary",
"MetricName": "CPI"
},
{
- "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
"MetricGroup": "Summary",
"MetricName": "CLKS"
},
{
- "BriefDescription": "Total issue-pipeline slots",
- "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricExpr": "4 * cycles",
+ "BriefDescription": "Total issue-pipeline slots (per core)",
"MetricGroup": "TopDownL1",
"MetricName": "SLOTS"
},
{
- "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "BriefDescription": "Total issue-pipeline slots (per core)",
+ "MetricGroup": "TopDownL1_SMT",
+ "MetricName": "SLOTS_SMT"
+ },
+ {
"MetricExpr": "INST_RETIRED.ANY",
+ "BriefDescription": "Total number of retired Instructions",
"MetricGroup": "Summary",
"MetricName": "Instructions"
},
{
+ "MetricExpr": "INST_RETIRED.ANY / cycles",
"BriefDescription": "Instructions Per Cycle (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
"MetricGroup": "SMT",
"MetricName": "CoreIPC"
},
{
+ "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC_SMT"
+ },
+ {
+ "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles",
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricGroup": "FLOPS",
+ "MetricName": "FLOPc"
+ },
+ {
+ "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricGroup": "FLOPS_SMT",
+ "MetricName": "FLOPc_SMT"
+ },
+ {
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
- "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
"MetricGroup": "Pipeline;Ports_Utilization",
"MetricName": "ILP"
},
{
+ "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
- "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "SMT",
"MetricName": "CORE_CLKS"
},
{
- "BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "BriefDescription": "Average CPU Utilization",
"MetricGroup": "Summary",
"MetricName": "CPU_Utilization"
},
{
+ "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time",
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
"MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs"
},
{
- "BriefDescription": "Average Frequency Utilization relative nominal frequency",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
"MetricGroup": "Power",
"MetricName": "Turbo_Utilization"
},
{
- "BriefDescription": "Fraction of cycles where both hardware threads were active",
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
"MetricGroup": "SMT;Summary",
"MetricName": "SMT_2T_Utilization"
},
{
- "BriefDescription": "Fraction of cycles spent in Kernel mode",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
{
- "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricGroup": "Memory_BW",
+ "MetricName": "DRAM_BW_Use"
+ },
+ {
+ "MetricExpr": "cbox_0@event\\=0x0@",
+ "BriefDescription": "Socket actual clocks when any core is active on that socket",
+ "MetricGroup": "",
+ "MetricName": "Socket_CLKS"
+ },
+ {
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
+ "BriefDescription": "C3 residency percent per core",
"MetricName": "C3_Core_Residency"
},
{
- "BriefDescription": "C6 residency percent per core",
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
+ "BriefDescription": "C6 residency percent per core",
"MetricName": "C6_Core_Residency"
},
{
- "BriefDescription": "C7 residency percent per core",
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
+ "BriefDescription": "C7 residency percent per core",
"MetricName": "C7_Core_Residency"
},
{
- "BriefDescription": "C2 residency percent per package",
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
+ "BriefDescription": "C2 residency percent per package",
"MetricName": "C2_Pkg_Residency"
},
{
- "BriefDescription": "C3 residency percent per package",
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
+ "BriefDescription": "C3 residency percent per package",
"MetricName": "C3_Pkg_Residency"
},
{
- "BriefDescription": "C6 residency percent per package",
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
+ "BriefDescription": "C6 residency percent per package",
"MetricName": "C6_Pkg_Residency"
},
{
- "BriefDescription": "C7 residency percent per package",
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
+ "BriefDescription": "C7 residency percent per package",
"MetricName": "C7_Pkg_Residency"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json b/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json
index 34a519d9bfa0..783a5b4a67b1 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json
@@ -1,7 +1,6 @@
[
{
- "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. ",
- "EventCode": "0x00",
+ "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers.",
"Counter": "Fixed counter 1",
"UMask": "0x1",
"EventName": "INST_RETIRED.ANY",
@@ -10,8 +9,7 @@
"CounterHTOff": "Fixed counter 1"
},
{
- "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
- "EventCode": "0x00",
+ "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
"Counter": "Fixed counter 2",
"UMask": "0x2",
"EventName": "CPU_CLK_UNHALTED.THREAD",
@@ -20,8 +18,7 @@
"CounterHTOff": "Fixed counter 2"
},
{
- "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
- "EventCode": "0x00",
+ "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
"Counter": "Fixed counter 3",
"UMask": "0x3",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
@@ -778,7 +775,7 @@
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
- "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
+ "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
"EventCode": "0x03",
"Counter": "0,1,2,3",
"UMask": "0x2",
@@ -1098,7 +1095,6 @@
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
- "EventCode": "0x00",
"Counter": "Fixed counter 2",
"UMask": "0x2",
"AnyThread": "1",