aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/events/intel/lbr.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/intel/lbr.c')
-rw-r--r--arch/x86/events/intel/lbr.c502
1 files changed, 135 insertions, 367 deletions
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8043213b75a5..8259d725054d 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -4,18 +4,9 @@
#include <asm/perf_event.h>
#include <asm/msr.h>
-#include <asm/insn.h>
#include "../perf_event.h"
-static const enum {
- LBR_EIP_FLAGS = 1,
- LBR_TSX = 2,
-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
- [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
- [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
-};
-
/*
* Intel LBR_SELECT bits
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -74,65 +65,6 @@ static const enum {
#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
/*
- * x86control flow change classification
- * x86control flow changes include branches, interrupts, traps, faults
- */
-enum {
- X86_BR_NONE = 0, /* unknown */
-
- X86_BR_USER = 1 << 0, /* branch target is user */
- X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
-
- X86_BR_CALL = 1 << 2, /* call */
- X86_BR_RET = 1 << 3, /* return */
- X86_BR_SYSCALL = 1 << 4, /* syscall */
- X86_BR_SYSRET = 1 << 5, /* syscall return */
- X86_BR_INT = 1 << 6, /* sw interrupt */
- X86_BR_IRET = 1 << 7, /* return from interrupt */
- X86_BR_JCC = 1 << 8, /* conditional */
- X86_BR_JMP = 1 << 9, /* jump */
- X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
- X86_BR_IND_CALL = 1 << 11,/* indirect calls */
- X86_BR_ABORT = 1 << 12,/* transaction abort */
- X86_BR_IN_TX = 1 << 13,/* in transaction */
- X86_BR_NO_TX = 1 << 14,/* not in transaction */
- X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
- X86_BR_CALL_STACK = 1 << 16,/* call stack */
- X86_BR_IND_JMP = 1 << 17,/* indirect jump */
-
- X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
-
-};
-
-#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
-#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
-
-#define X86_BR_ANY \
- (X86_BR_CALL |\
- X86_BR_RET |\
- X86_BR_SYSCALL |\
- X86_BR_SYSRET |\
- X86_BR_INT |\
- X86_BR_IRET |\
- X86_BR_JCC |\
- X86_BR_JMP |\
- X86_BR_IRQ |\
- X86_BR_ABORT |\
- X86_BR_IND_CALL |\
- X86_BR_IND_JMP |\
- X86_BR_ZERO_CALL)
-
-#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
-
-#define X86_BR_ANY_CALL \
- (X86_BR_CALL |\
- X86_BR_IND_CALL |\
- X86_BR_ZERO_CALL |\
- X86_BR_SYSCALL |\
- X86_BR_IRQ |\
- X86_BR_INT)
-
-/*
* Intel LBR_CTL bits
*
* Hardware branch filter for Arch LBR
@@ -243,7 +175,7 @@ void intel_pmu_lbr_reset_64(void)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (x86_pmu.lbr_has_info)
wrmsrl(x86_pmu.lbr_info + i, 0);
}
}
@@ -286,9 +218,9 @@ enum {
};
/*
- * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
- * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
- * TSX is not supported they have no consistent behavior:
+ * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
+ * are the TSX flags when TSX is supported, but when TSX is not supported
+ * they have no consistent behavior:
*
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
@@ -296,7 +228,7 @@ enum {
*
* Therefore, if:
*
- * 1) LBR has TSX format
+ * 1) LBR format LBR_FORMAT_EIP_FLAGS2
* 2) CPU has no TSX support enabled
*
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
@@ -305,11 +237,10 @@ enum {
*/
static inline bool lbr_from_signext_quirk_needed(void)
{
- int lbr_format = x86_pmu.intel_cap.lbr_format;
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+ return !tsx_support;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -427,12 +358,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
void intel_pmu_lbr_restore(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
- int i;
- unsigned lbr_idx, mask;
+ bool need_info = x86_pmu.lbr_has_info;
u64 tos = task_ctx->tos;
+ unsigned lbr_idx, mask;
+ int i;
mask = x86_pmu.lbr_nr - 1;
for (i = 0; i < task_ctx->valid_lbrs; i++) {
@@ -444,7 +375,7 @@ void intel_pmu_lbr_restore(void *ctx)
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, 0);
wrlbr_to(lbr_idx, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (need_info)
wrlbr_info(lbr_idx, 0);
}
@@ -519,9 +450,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
void intel_pmu_lbr_save(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
+ bool need_info = x86_pmu.lbr_has_info;
unsigned lbr_idx, mask;
u64 tos;
int i;
@@ -778,6 +709,7 @@ void intel_pmu_lbr_disable_all(void)
void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
{
unsigned long mask = x86_pmu.lbr_nr - 1;
+ struct perf_branch_entry *br = cpuc->lbr_entries;
u64 tos = intel_pmu_lbr_tos();
int i;
@@ -793,15 +725,11 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
- cpuc->lbr_entries[i].from = msr_lastbranch.from;
- cpuc->lbr_entries[i].to = msr_lastbranch.to;
- cpuc->lbr_entries[i].mispred = 0;
- cpuc->lbr_entries[i].predicted = 0;
- cpuc->lbr_entries[i].in_tx = 0;
- cpuc->lbr_entries[i].abort = 0;
- cpuc->lbr_entries[i].cycles = 0;
- cpuc->lbr_entries[i].type = 0;
- cpuc->lbr_entries[i].reserved = 0;
+ perf_clear_branch_entry_bitfields(br);
+
+ br->from = msr_lastbranch.from;
+ br->to = msr_lastbranch.to;
+ br++;
}
cpuc->lbr_stack.nr = i;
cpuc->lbr_stack.hw_idx = tos;
@@ -816,7 +744,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
- int lbr_format = x86_pmu.intel_cap.lbr_format;
+ struct perf_branch_entry *br = cpuc->lbr_entries;
u64 tos = intel_pmu_lbr_tos();
int i;
int out = 0;
@@ -831,9 +759,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
- int skip = 0;
u16 cycles = 0;
- int lbr_flags = lbr_desc[lbr_format];
from = rdlbr_from(lbr_idx, NULL);
to = rdlbr_to(lbr_idx, NULL);
@@ -845,37 +771,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (call_stack && !from)
break;
- if (lbr_format == LBR_FORMAT_INFO && need_info) {
- u64 info;
-
- info = rdlbr_info(lbr_idx, NULL);
- mis = !!(info & LBR_INFO_MISPRED);
- pred = !mis;
- in_tx = !!(info & LBR_INFO_IN_TX);
- abort = !!(info & LBR_INFO_ABORT);
- cycles = (info & LBR_INFO_CYCLES);
- }
-
- if (lbr_format == LBR_FORMAT_TIME) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- cycles = ((to >> 48) & LBR_INFO_CYCLES);
-
- to = (u64)((((s64)to) << 16) >> 16);
- }
-
- if (lbr_flags & LBR_EIP_FLAGS) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- }
- if (lbr_flags & LBR_TSX) {
- in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
- abort = !!(from & LBR_FROM_FLAG_ABORT);
- skip = 3;
+ if (x86_pmu.lbr_has_info) {
+ if (need_info) {
+ u64 info;
+
+ info = rdlbr_info(lbr_idx, NULL);
+ mis = !!(info & LBR_INFO_MISPRED);
+ pred = !mis;
+ cycles = (info & LBR_INFO_CYCLES);
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(info & LBR_INFO_IN_TX);
+ abort = !!(info & LBR_INFO_ABORT);
+ }
+ }
+ } else {
+ int skip = 0;
+
+ if (x86_pmu.lbr_from_flags) {
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
+ skip = 1;
+ }
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
+ }
+ from = (u64)((((s64)from) << skip) >> skip);
+
+ if (x86_pmu.lbr_to_cycles) {
+ cycles = ((to >> 48) & LBR_INFO_CYCLES);
+ to = (u64)((((s64)to) << 16) >> 16);
+ }
}
- from = (u64)((((s64)from) << skip) >> skip);
/*
* Some CPUs report duplicated abort records,
@@ -888,52 +816,54 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (abort && x86_pmu.lbr_double_abort && out > 0)
out--;
- cpuc->lbr_entries[out].from = from;
- cpuc->lbr_entries[out].to = to;
- cpuc->lbr_entries[out].mispred = mis;
- cpuc->lbr_entries[out].predicted = pred;
- cpuc->lbr_entries[out].in_tx = in_tx;
- cpuc->lbr_entries[out].abort = abort;
- cpuc->lbr_entries[out].cycles = cycles;
- cpuc->lbr_entries[out].type = 0;
- cpuc->lbr_entries[out].reserved = 0;
+ perf_clear_branch_entry_bitfields(br+out);
+ br[out].from = from;
+ br[out].to = to;
+ br[out].mispred = mis;
+ br[out].predicted = pred;
+ br[out].in_tx = in_tx;
+ br[out].abort = abort;
+ br[out].cycles = cycles;
out++;
}
cpuc->lbr_stack.nr = out;
cpuc->lbr_stack.hw_idx = tos;
}
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
+
static __always_inline int get_lbr_br_type(u64 info)
{
- if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
- return 0;
+ int type = 0;
+
+ if (static_branch_likely(&x86_lbr_type))
+ type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
- return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+ return type;
}
static __always_inline bool get_lbr_mispred(u64 info)
{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
+ bool mispred = 0;
- return !!(info & LBR_INFO_MISPRED);
-}
-
-static __always_inline bool get_lbr_predicted(u64 info)
-{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
+ if (static_branch_likely(&x86_lbr_mispred))
+ mispred = !!(info & LBR_INFO_MISPRED);
- return !(info & LBR_INFO_MISPRED);
+ return mispred;
}
static __always_inline u16 get_lbr_cycles(u64 info)
{
+ u16 cycles = info & LBR_INFO_CYCLES;
+
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
- !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
- return 0;
+ (!static_branch_likely(&x86_lbr_cycles) ||
+ !(info & LBR_INFO_CYC_CNT_VALID)))
+ cycles = 0;
- return info & LBR_INFO_CYCLES;
+ return cycles;
}
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
@@ -958,15 +888,16 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
to = rdlbr_to(i, lbr);
info = rdlbr_info(i, lbr);
+ perf_clear_branch_entry_bitfields(e);
+
e->from = from;
e->to = to;
e->mispred = get_lbr_mispred(info);
- e->predicted = get_lbr_predicted(info);
+ e->predicted = !e->mispred;
e->in_tx = !!(info & LBR_INFO_IN_TX);
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
e->type = get_lbr_br_type(info);
- e->reserved = 0;
}
cpuc->lbr_stack.nr = i;
@@ -1106,6 +1037,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
reg->config = mask;
+
+ /*
+ * The Arch LBR HW can retrieve the common branch types
+ * from the LBR_INFO. It doesn't require the high overhead
+ * SW disassemble.
+ * Enable the branch type by default for the Arch LBR.
+ */
+ reg->reg |= X86_BR_TYPE_SAVE;
return 0;
}
@@ -1120,7 +1059,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
- (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ x86_pmu.lbr_has_info)
reg->config |= LBR_NO_INFO;
return 0;
@@ -1152,219 +1091,6 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
return ret;
}
-/*
- * return the type of control flow change at address "from"
- * instruction is not necessarily a branch (in case of interrupt).
- *
- * The branch type returned also includes the priv level of the
- * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
- *
- * If a branch type is unknown OR the instruction cannot be
- * decoded (e.g., text page not present), then X86_BR_NONE is
- * returned.
- */
-static int branch_type(unsigned long from, unsigned long to, int abort)
-{
- struct insn insn;
- void *addr;
- int bytes_read, bytes_left;
- int ret = X86_BR_NONE;
- int ext, to_plm, from_plm;
- u8 buf[MAX_INSN_SIZE];
- int is64 = 0;
-
- to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
- from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
-
- /*
- * maybe zero if lbr did not fill up after a reset by the time
- * we get a PMU interrupt
- */
- if (from == 0 || to == 0)
- return X86_BR_NONE;
-
- if (abort)
- return X86_BR_ABORT | to_plm;
-
- if (from_plm == X86_BR_USER) {
- /*
- * can happen if measuring at the user level only
- * and we interrupt in a kernel thread, e.g., idle.
- */
- if (!current->mm)
- return X86_BR_NONE;
-
- /* may fail if text not present */
- bytes_left = copy_from_user_nmi(buf, (void __user *)from,
- MAX_INSN_SIZE);
- bytes_read = MAX_INSN_SIZE - bytes_left;
- if (!bytes_read)
- return X86_BR_NONE;
-
- addr = buf;
- } else {
- /*
- * The LBR logs any address in the IP, even if the IP just
- * faulted. This means userspace can control the from address.
- * Ensure we don't blindly read any address by validating it is
- * a known text address.
- */
- if (kernel_text_address(from)) {
- addr = (void *)from;
- /*
- * Assume we can get the maximum possible size
- * when grabbing kernel data. This is not
- * _strictly_ true since we could possibly be
- * executing up next to a memory hole, but
- * it is very unlikely to be a problem.
- */
- bytes_read = MAX_INSN_SIZE;
- } else {
- return X86_BR_NONE;
- }
- }
-
- /*
- * decoder needs to know the ABI especially
- * on 64-bit systems running 32-bit apps
- */
-#ifdef CONFIG_X86_64
- is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
-#endif
- insn_init(&insn, addr, bytes_read, is64);
- if (insn_get_opcode(&insn))
- return X86_BR_ABORT;
-
- switch (insn.opcode.bytes[0]) {
- case 0xf:
- switch (insn.opcode.bytes[1]) {
- case 0x05: /* syscall */
- case 0x34: /* sysenter */
- ret = X86_BR_SYSCALL;
- break;
- case 0x07: /* sysret */
- case 0x35: /* sysexit */
- ret = X86_BR_SYSRET;
- break;
- case 0x80 ... 0x8f: /* conditional */
- ret = X86_BR_JCC;
- break;
- default:
- ret = X86_BR_NONE;
- }
- break;
- case 0x70 ... 0x7f: /* conditional */
- ret = X86_BR_JCC;
- break;
- case 0xc2: /* near ret */
- case 0xc3: /* near ret */
- case 0xca: /* far ret */
- case 0xcb: /* far ret */
- ret = X86_BR_RET;
- break;
- case 0xcf: /* iret */
- ret = X86_BR_IRET;
- break;
- case 0xcc ... 0xce: /* int */
- ret = X86_BR_INT;
- break;
- case 0xe8: /* call near rel */
- if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
- /* zero length call */
- ret = X86_BR_ZERO_CALL;
- break;
- }
- fallthrough;
- case 0x9a: /* call far absolute */
- ret = X86_BR_CALL;
- break;
- case 0xe0 ... 0xe3: /* loop jmp */
- ret = X86_BR_JCC;
- break;
- case 0xe9 ... 0xeb: /* jmp */
- ret = X86_BR_JMP;
- break;
- case 0xff: /* call near absolute, call far absolute ind */
- if (insn_get_modrm(&insn))
- return X86_BR_ABORT;
-
- ext = (insn.modrm.bytes[0] >> 3) & 0x7;
- switch (ext) {
- case 2: /* near ind call */
- case 3: /* far ind call */
- ret = X86_BR_IND_CALL;
- break;
- case 4:
- case 5:
- ret = X86_BR_IND_JMP;
- break;
- }
- break;
- default:
- ret = X86_BR_NONE;
- }
- /*
- * interrupts, traps, faults (and thus ring transition) may
- * occur on any instructions. Thus, to classify them correctly,
- * we need to first look at the from and to priv levels. If they
- * are different and to is in the kernel, then it indicates
- * a ring transition. If the from instruction is not a ring
- * transition instr (syscall, systenter, int), then it means
- * it was a irq, trap or fault.
- *
- * we have no way of detecting kernel to kernel faults.
- */
- if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
- && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
- ret = X86_BR_IRQ;
-
- /*
- * branch priv level determined by target as
- * is done by HW when LBR_SELECT is implemented
- */
- if (ret != X86_BR_NONE)
- ret |= to_plm;
-
- return ret;
-}
-
-#define X86_BR_TYPE_MAP_MAX 16
-
-static int branch_map[X86_BR_TYPE_MAP_MAX] = {
- PERF_BR_CALL, /* X86_BR_CALL */
- PERF_BR_RET, /* X86_BR_RET */
- PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
- PERF_BR_SYSRET, /* X86_BR_SYSRET */
- PERF_BR_UNKNOWN, /* X86_BR_INT */
- PERF_BR_UNKNOWN, /* X86_BR_IRET */
- PERF_BR_COND, /* X86_BR_JCC */
- PERF_BR_UNCOND, /* X86_BR_JMP */
- PERF_BR_UNKNOWN, /* X86_BR_IRQ */
- PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
- PERF_BR_UNKNOWN, /* X86_BR_ABORT */
- PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
- PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
- PERF_BR_CALL, /* X86_BR_ZERO_CALL */
- PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
- PERF_BR_IND, /* X86_BR_IND_JMP */
-};
-
-static int
-common_branch_type(int type)
-{
- int i;
-
- type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
-
- if (type) {
- i = __ffs(type);
- if (i < X86_BR_TYPE_MAP_MAX)
- return branch_map[i];
- }
-
- return PERF_BR_UNKNOWN;
-}
-
enum {
ARCH_LBR_BR_TYPE_JCC = 0,
ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1,
@@ -1618,9 +1344,6 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
-
- if (lbr_from_signext_quirk_needed())
- static_branch_enable(&lbr_from_quirk_key);
}
/* skylake */
@@ -1706,6 +1429,42 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
+void intel_pmu_lbr_init(void)
+{
+ switch (x86_pmu.intel_cap.lbr_format) {
+ case LBR_FORMAT_EIP_FLAGS2:
+ x86_pmu.lbr_has_tsx = 1;
+ x86_pmu.lbr_from_flags = 1;
+ if (lbr_from_signext_quirk_needed())
+ static_branch_enable(&lbr_from_quirk_key);
+ break;
+
+ case LBR_FORMAT_EIP_FLAGS:
+ x86_pmu.lbr_from_flags = 1;
+ break;
+
+ case LBR_FORMAT_INFO:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_INFO2:
+ x86_pmu.lbr_has_info = 1;
+ break;
+
+ case LBR_FORMAT_TIME:
+ x86_pmu.lbr_from_flags = 1;
+ x86_pmu.lbr_to_cycles = 1;
+ break;
+ }
+
+ if (x86_pmu.lbr_has_info) {
+ /*
+ * Only used in combination with baseline pebs.
+ */
+ static_branch_enable(&x86_lbr_mispred);
+ static_branch_enable(&x86_lbr_cycles);
+ }
+}
+
/*
* LBR state size is variable based on the max number of registers.
* This calculates the expected state size, which should match
@@ -1726,6 +1485,9 @@ static bool is_arch_lbr_xsave_available(void)
* Check the LBR state with the corresponding software structure.
* Disable LBR XSAVES support if the size doesn't match.
*/
+ if (xfeature_size(XFEATURE_LBR) == 0)
+ return false;
+
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
return false;
@@ -1765,6 +1527,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
x86_pmu.lbr_nr = lbr_nr;
+ if (x86_pmu.lbr_mispred)
+ static_branch_enable(&x86_lbr_mispred);
+ if (x86_pmu.lbr_timed_lbr)
+ static_branch_enable(&x86_lbr_cycles);
+ if (x86_pmu.lbr_br_type)
+ static_branch_enable(&x86_lbr_type);
arch_lbr_xsave = is_arch_lbr_xsave_available();
if (arch_lbr_xsave) {
@@ -1828,7 +1596,7 @@ void __init intel_pmu_arch_lbr_init(void)
return;
clear_arch_lbr:
- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR);
+ setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR);
}
/**