diff options
Diffstat (limited to 'arch/x86/events/amd/brs.c')
-rw-r--r-- | arch/x86/events/amd/brs.c | 84 |
1 files changed, 74 insertions, 10 deletions
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c index bee8765a1e9b..780acd3dff22 100644 --- a/arch/x86/events/amd/brs.c +++ b/arch/x86/events/amd/brs.c @@ -41,18 +41,15 @@ static inline unsigned int brs_to(int idx) return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1; } -static inline void set_debug_extn_cfg(u64 val) +static __always_inline void set_debug_extn_cfg(u64 val) { /* bits[4:3] must always be set to 11b */ - wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3); + __wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32); } -static inline u64 get_debug_extn_cfg(void) +static __always_inline u64 get_debug_extn_cfg(void) { - u64 val; - - rdmsrl(MSR_AMD_DBG_EXTN_CFG, val); - return val; + return __rdmsr(MSR_AMD_DBG_EXTN_CFG); } static bool __init amd_brs_detect(void) @@ -81,7 +78,7 @@ static bool __init amd_brs_detect(void) * a br_sel_map. Software filtering is not supported because it would not correlate well * with a sampling period. */ -int amd_brs_setup_filter(struct perf_event *event) +static int amd_brs_setup_filter(struct perf_event *event) { u64 type = event->attr.branch_sample_type; @@ -96,6 +93,73 @@ int amd_brs_setup_filter(struct perf_event *event) return 0; } +static inline int amd_is_brs_event(struct perf_event *e) +{ + return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; +} + +int amd_brs_hw_config(struct perf_event *event) +{ + int ret = 0; + + /* + * Due to interrupt holding, BRS is not recommended in + * counting mode. + */ + if (!is_sampling_event(event)) + return -EINVAL; + + /* + * Due to the way BRS operates by holding the interrupt until + * lbr_nr entries have been captured, it does not make sense + * to allow sampling on BRS with an event that does not match + * what BRS is capturing, i.e., retired taken branches. + * Otherwise the correlation with the event's period is even + * more loose: + * + * With retired taken branch: + * Effective P = P + 16 + X + * With any other event: + * Effective P = P + Y + X + * + * Where X is the number of taken branches due to interrupt + * skid. Skid is large. + * + * Where Y is the occurrences of the event while BRS is + * capturing the lbr_nr entries. + * + * By using retired taken branches, we limit the impact on the + * Y variable. We know it cannot be more than the depth of + * BRS. + */ + if (!amd_is_brs_event(event)) + return -EINVAL; + + /* + * BRS implementation does not work with frequency mode + * reprogramming of the period. + */ + if (event->attr.freq) + return -EINVAL; + /* + * The kernel subtracts BRS depth from period, so it must + * be big enough. + */ + if (event->attr.sample_period <= x86_pmu.lbr_nr) + return -EINVAL; + + /* + * Check if we can allow PERF_SAMPLE_BRANCH_STACK + */ + ret = amd_brs_setup_filter(event); + + /* only set in case of success */ + if (!ret) + event->hw.flags |= PERF_X86_EVENT_AMD_BRS; + + return ret; +} + /* tos = top of stack, i.e., last valid entry written */ static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) { @@ -317,7 +381,7 @@ static void amd_brs_poison_buffer(void) * On ctxswin, sched_in = true, called after the PMU has started * On ctxswout, sched_in = false, called before the PMU is stopped */ -void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -338,7 +402,7 @@ void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) * called from ACPI processor_idle.c or acpi_pad.c * with interrupts disabled */ -void perf_amd_brs_lopwr_cb(bool lopwr_in) +void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); union amd_debug_extn_cfg cfg; |