diff options
Diffstat (limited to 'arch/x86/events/perf_event.h')
-rw-r--r-- | arch/x86/events/perf_event.h | 241 |
1 files changed, 177 insertions, 64 deletions
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ca2f8bfe6ff1..fb56518356ec 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -35,15 +35,17 @@ * per-core reg tables. */ enum extra_reg_type { - EXTRA_REG_NONE = -1, /* not used */ + EXTRA_REG_NONE = -1, /* not used */ - EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ - EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ - EXTRA_REG_LBR = 2, /* lbr_select */ - EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ - EXTRA_REG_FE = 4, /* fe_* */ + EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ + EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + EXTRA_REG_LBR = 2, /* lbr_select */ + EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ + EXTRA_REG_FE = 4, /* fe_* */ + EXTRA_REG_SNOOP_0 = 5, /* snoop response 0 */ + EXTRA_REG_SNOOP_1 = 6, /* snoop response 1 */ - EXTRA_REG_MAX /* number of entries needed */ + EXTRA_REG_MAX /* number of entries needed */ }; struct event_constraint { @@ -64,27 +66,25 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) return ((ecode & c->cmask) - c->code) <= (u64)c->size; } +#define PERF_ARCH(name, val) \ + PERF_X86_EVENT_##name = val, + /* * struct hw_perf_event.flags flags */ -#define PERF_X86_EVENT_PEBS_LDLAT 0x00001 /* ld+ldlat data address sampling */ -#define PERF_X86_EVENT_PEBS_ST 0x00002 /* st data address sampling */ -#define PERF_X86_EVENT_PEBS_ST_HSW 0x00004 /* haswell style datala, store */ -#define PERF_X86_EVENT_PEBS_LD_HSW 0x00008 /* haswell style datala, load */ -#define PERF_X86_EVENT_PEBS_NA_HSW 0x00010 /* haswell style datala, unknown */ -#define PERF_X86_EVENT_EXCL 0x00020 /* HT exclusivity on counter */ -#define PERF_X86_EVENT_DYNAMIC 0x00040 /* dynamic alloc'd constraint */ - -#define PERF_X86_EVENT_EXCL_ACCT 0x00100 /* accounted EXCL event */ -#define PERF_X86_EVENT_AUTO_RELOAD 0x00200 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_LARGE_PEBS 0x00400 /* use large PEBS */ -#define PERF_X86_EVENT_PEBS_VIA_PT 0x00800 /* use PT buffer for PEBS */ -#define PERF_X86_EVENT_PAIR 0x01000 /* Large Increment per Cycle */ -#define PERF_X86_EVENT_LBR_SELECT 0x02000 /* Save/Restore MSR_LBR_SELECT */ -#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */ -#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */ -#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */ -#define PERF_X86_EVENT_PEBS_LAT_HYBRID 0x20000 /* ld and st lat for hybrid */ +enum { +#include "perf_event_flags.h" +}; + +#undef PERF_ARCH + +#define PERF_ARCH(name, val) \ + static_assert((PERF_X86_EVENT_##name & PERF_EVENT_FLAG_ARCH) == \ + PERF_X86_EVENT_##name); + +#include "perf_event_flags.h" + +#undef PERF_ARCH static inline bool is_topdown_count(struct perf_event *event) { @@ -110,6 +110,11 @@ static inline bool is_topdown_event(struct perf_event *event) return is_metric_event(event) || is_slots_event(event); } +static inline bool is_branch_counters_group(struct perf_event *event) +{ + return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; +} + struct amd_nb { int nb_id; /* NorthBridge id */ int refcnt; /* reference count */ @@ -272,6 +277,10 @@ struct cpu_hw_events { u64 active_pebs_data_cfg; int pebs_record_size; + /* Intel Fixed counter configuration */ + u64 fixed_ctrl_val; + u64 active_fixed_ctrl_val; + /* * Intel LBR bits */ @@ -279,6 +288,7 @@ struct cpu_hw_events { int lbr_pebs_users; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */ union { struct er_account *lbr_sel; struct er_account *lbr_ctl; @@ -604,6 +614,7 @@ union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; + u64 pebs_timing_info:1; u64 anythread_deprecated:1; }; u64 capabilities; @@ -645,11 +656,31 @@ enum { }; #define PERF_PEBS_DATA_SOURCE_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) + +enum hybrid_cpu_type { + HYBRID_INTEL_NONE, + HYBRID_INTEL_ATOM = 0x20, + HYBRID_INTEL_CORE = 0x40, +}; + +enum hybrid_pmu_type { + not_hybrid, + hybrid_small = BIT(0), + hybrid_big = BIT(1), + + hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */ +}; + +#define X86_HYBRID_PMU_ATOM_IDX 0 +#define X86_HYBRID_PMU_CORE_IDX 1 + +#define X86_HYBRID_NUM_PMUS 2 struct x86_hybrid_pmu { struct pmu pmu; const char *name; - u8 cpu_type; + enum hybrid_pmu_type pmu_type; cpumask_t supported_cpus; union perf_capabilities intel_cap; u64 intel_ctrl; @@ -715,18 +746,6 @@ extern struct static_key_false perf_is_hybrid; __Fp; \ }) -enum hybrid_pmu_type { - hybrid_big = 0x40, - hybrid_small = 0x20, - - hybrid_big_small = hybrid_big | hybrid_small, -}; - -#define X86_HYBRID_PMU_ATOM_IDX 0 -#define X86_HYBRID_PMU_CORE_IDX 1 - -#define X86_HYBRID_NUM_PMUS 2 - /* * struct x86_pmu - generic x86 pmu */ @@ -745,6 +764,8 @@ struct x86_pmu { void (*add)(struct perf_event *); void (*del)(struct perf_event *); void (*read)(struct perf_event *event); + int (*set_period)(struct perf_event *event); + u64 (*update)(struct perf_event *event); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; @@ -780,8 +801,7 @@ struct x86_pmu { struct event_constraint *event_constraints; struct x86_pmu_quirk *quirks; - int perfctr_second_write; - u64 (*limit_period)(struct perf_event *event, u64 l); + void (*limit_period)(struct perf_event *event, s64 *l); /* PMI handler bits */ unsigned int late_ack :1, @@ -808,7 +828,7 @@ struct x86_pmu { void (*cpu_dead)(int cpu); void (*check_microcode)(void); - void (*sched_task)(struct perf_event_context *ctx, + void (*sched_task)(struct perf_event_pmu_context *pmu_ctx, bool sched_in); /* @@ -828,7 +848,8 @@ struct x86_pmu { pebs_prec_dist :1, pebs_no_tlb :1, pebs_no_isolation :1, - pebs_block :1; + pebs_block :1, + pebs_ept :1; int pebs_record_size; int pebs_buffer_size; int max_pebs_events; @@ -838,6 +859,7 @@ struct x86_pmu { u64 (*pebs_latency_data)(struct perf_event *event, u64 status); unsigned long large_pebs_flags; u64 rtm_abort_event; + u64 pebs_capable; /* * Intel LBR @@ -872,6 +894,7 @@ struct x86_pmu { unsigned int lbr_mispred:1; unsigned int lbr_timed_lbr:1; unsigned int lbr_br_type:1; + unsigned int lbr_counters:4; void (*lbr_reset)(void); void (*lbr_read)(struct cpu_hw_events *cpuc); @@ -887,16 +910,14 @@ struct x86_pmu { * Intel perf metrics */ int num_topdown_events; - u64 (*update_topdown_event)(struct perf_event *event); - int (*set_topdown_event_period)(struct perf_event *event); /* - * perf task context (i.e. struct perf_event_context::task_ctx_data) + * perf task context (i.e. struct perf_event_pmu_context::task_ctx_data) * switch helper to bridge calls from perf/core to perf/x86. * See struct pmu::swap_task_ctx() usage for examples; */ - void (*swap_task_ctx)(struct perf_event_context *prev, - struct perf_event_context *next); + void (*swap_task_ctx)(struct perf_event_pmu_context *prev_epc, + struct perf_event_pmu_context *next_epc); /* * AMD bits @@ -913,7 +934,7 @@ struct x86_pmu { /* * Intel host/guest support (KVM) */ - struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); + struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr, void *data); /* * Check period value for PERF_EVENT_IOC_PERIOD ioctl. @@ -922,7 +943,7 @@ struct x86_pmu { int (*aux_output_match) (struct perf_event *event); - int (*filter_match)(struct perf_event *event); + void (*filter)(struct pmu *pmu, int cpu, bool *ret); /* * Hybrid support * @@ -933,7 +954,7 @@ struct x86_pmu { */ int num_hybrid_pmus; struct x86_hybrid_pmu *hybrid_pmu; - u8 (*get_hybrid_cpu_type) (void); + enum hybrid_cpu_type (*get_hybrid_cpu_type) (void); }; struct x86_perf_task_context_opt { @@ -997,6 +1018,8 @@ do { \ #define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ +#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ +#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -1042,6 +1065,9 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; +DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); +DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); + static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) { if (static_cpu_has(X86_FEATURE_ARCH_LBR)) @@ -1057,6 +1083,7 @@ static inline bool x86_pmu_has_lbr_callstack(void) } DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); int x86_perf_event_set_period(struct perf_event *event); @@ -1173,8 +1200,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs); void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, u64 intel_ctrl); -void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu); - extern struct event_constraint emptyconstraint; extern struct event_constraint unconstrained; @@ -1208,6 +1233,70 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } +/* + * x86control flow change classification + * x86control flow changes include branches, interrupts, traps, faults + */ +enum { + X86_BR_NONE = 0, /* unknown */ + + X86_BR_USER = 1 << 0, /* branch target is user */ + X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ + + X86_BR_CALL = 1 << 2, /* call */ + X86_BR_RET = 1 << 3, /* return */ + X86_BR_SYSCALL = 1 << 4, /* syscall */ + X86_BR_SYSRET = 1 << 5, /* syscall return */ + X86_BR_INT = 1 << 6, /* sw interrupt */ + X86_BR_IRET = 1 << 7, /* return from interrupt */ + X86_BR_JCC = 1 << 8, /* conditional */ + X86_BR_JMP = 1 << 9, /* jump */ + X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ + X86_BR_IND_CALL = 1 << 11,/* indirect calls */ + X86_BR_ABORT = 1 << 12,/* transaction abort */ + X86_BR_IN_TX = 1 << 13,/* in transaction */ + X86_BR_NO_TX = 1 << 14,/* not in transaction */ + X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ + X86_BR_CALL_STACK = 1 << 16,/* call stack */ + X86_BR_IND_JMP = 1 << 17,/* indirect jump */ + + X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */ + +}; + +#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) +#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) + +#define X86_BR_ANY \ + (X86_BR_CALL |\ + X86_BR_RET |\ + X86_BR_SYSCALL |\ + X86_BR_SYSRET |\ + X86_BR_INT |\ + X86_BR_IRET |\ + X86_BR_JCC |\ + X86_BR_JMP |\ + X86_BR_IRQ |\ + X86_BR_ABORT |\ + X86_BR_IND_CALL |\ + X86_BR_IND_JMP |\ + X86_BR_ZERO_CALL) + +#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) + +#define X86_BR_ANY_CALL \ + (X86_BR_CALL |\ + X86_BR_IND_CALL |\ + X86_BR_ZERO_CALL |\ + X86_BR_SYSCALL |\ + X86_BR_IRQ |\ + X86_BR_INT) + +int common_branch_type(int type); +int branch_type(unsigned long from, unsigned long to, int abort); +int branch_type_fused(unsigned long from, unsigned long to, int abort, + int *offset); + ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); ssize_t intel_event_sysfs_show(char *page, u64 config); @@ -1230,7 +1319,20 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu) int amd_pmu_init(void); +int amd_pmu_lbr_init(void); +void amd_pmu_lbr_reset(void); +void amd_pmu_lbr_read(void); +void amd_pmu_lbr_add(struct perf_event *event); +void amd_pmu_lbr_del(struct perf_event *event); +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void amd_pmu_lbr_enable_all(void); +void amd_pmu_lbr_disable_all(void); +int amd_pmu_lbr_hw_config(struct perf_event *event); + #ifdef CONFIG_PERF_EVENTS_AMD_BRS + +#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */ + int amd_brs_init(void); void amd_brs_disable(void); void amd_brs_enable(void); @@ -1238,15 +1340,14 @@ void amd_brs_enable_all(void); void amd_brs_disable_all(void); void amd_brs_drain(void); void amd_brs_lopwr_init(void); -void amd_brs_disable_all(void); -int amd_brs_setup_filter(struct perf_event *event); +int amd_brs_hw_config(struct perf_event *event); void amd_brs_reset(void); static inline void amd_pmu_brs_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - perf_sched_cb_inc(event->ctx->pmu); + perf_sched_cb_inc(event->pmu); cpuc->lbr_users++; /* * No need to reset BRS because it is reset @@ -1261,10 +1362,10 @@ static inline void amd_pmu_brs_del(struct perf_event *event) cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); - perf_sched_cb_dec(event->ctx->pmu); + perf_sched_cb_dec(event->pmu); } -void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in); +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); #else static inline int amd_brs_init(void) { @@ -1275,7 +1376,7 @@ static inline void amd_brs_enable(void) {} static inline void amd_brs_drain(void) {} static inline void amd_brs_lopwr_init(void) {} static inline void amd_brs_disable_all(void) {} -static inline int amd_brs_setup_filter(struct perf_event *event) +static inline int amd_brs_hw_config(struct perf_event *event) { return 0; } @@ -1289,7 +1390,7 @@ static inline void amd_pmu_brs_del(struct perf_event *event) { } -static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) +static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { } @@ -1405,6 +1506,8 @@ int intel_pmu_drain_bts_buffer(void); u64 adl_latency_data_small(struct perf_event *event, u64 status); +u64 mtl_latency_data_small(struct perf_event *event, u64 status); + extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[]; @@ -1433,7 +1536,7 @@ extern struct event_constraint intel_skl_pebs_event_constraints[]; extern struct event_constraint intel_icl_pebs_event_constraints[]; -extern struct event_constraint intel_spr_pebs_event_constraints[]; +extern struct event_constraint intel_glc_pebs_event_constraints[]; struct event_constraint *intel_pebs_constraints(struct perf_event *event); @@ -1449,7 +1552,7 @@ void intel_pmu_pebs_enable_all(void); void intel_pmu_pebs_disable_all(void); -void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); +void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); void intel_pmu_auto_reload_read(struct perf_event *event); @@ -1457,10 +1560,14 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); void intel_ds_init(void); -void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, - struct perf_event_context *next); +void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, + struct cpu_hw_events *cpuc, + struct perf_event *event); + +void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, + struct perf_event_pmu_context *next_epc); -void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); u64 lbr_from_signext_quirk_wr(u64 val); @@ -1514,6 +1621,12 @@ void intel_pmu_pebs_data_source_skl(bool pmem); void intel_pmu_pebs_data_source_adl(void); +void intel_pmu_pebs_data_source_grt(void); + +void intel_pmu_pebs_data_source_mtl(void); + +void intel_pmu_pebs_data_source_cmt(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); |