diff options
49 files changed, 1780 insertions, 712 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 417c392ddf1c..64ca8c3ab94b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -3001,13 +3001,6 @@ arch_initcall(init_hw_perf_events); /* * Callchain handling code. */ -static inline void -callchain_store(struct perf_callchain_entry *entry, - u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} /* * The registers we're interested in are at the end of the variable @@ -3039,7 +3032,7 @@ user_backtrace(struct frame_tail *tail, if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) return NULL; - callchain_store(entry, buftail.lr); + perf_callchain_store(entry, buftail.lr); /* * Frame pointers should strictly progress back up the stack @@ -3051,16 +3044,11 @@ user_backtrace(struct frame_tail *tail, return buftail.fp - 1; } -static void -perf_callchain_user(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct frame_tail *tail; - callchain_store(entry, PERF_CONTEXT_USER); - - if (!user_mode(regs)) - regs = task_pt_regs(current); tail = (struct frame_tail *)regs->ARM_fp - 1; @@ -3078,56 +3066,18 @@ callchain_trace(struct stackframe *fr, void *data) { struct perf_callchain_entry *entry = data; - callchain_store(entry, fr->pc); + perf_callchain_store(entry, fr->pc); return 0; } -static void -perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stackframe fr; - callchain_store(entry, PERF_CONTEXT_KERNEL); fr.fp = regs->ARM_fp; fr.sp = regs->ARM_sp; fr.lr = regs->ARM_lr; fr.pc = regs->ARM_pc; walk_stackframe(&fr, callchain_trace, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || !current->pid) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); - -struct perf_callchain_entry * -perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - perf_do_callchain(regs, entry); - return entry; -} diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 95ad9dad298e..d05ae4204bbf 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -23,18 +23,6 @@ #include "ppc32.h" #endif -/* - * Store another value in a callchain_entry. - */ -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - unsigned int nr = entry->nr; - - if (nr < PERF_MAX_STACK_DEPTH) { - entry->ip[nr] = ip; - entry->nr = nr + 1; - } -} /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->nip); + perf_callchain_store(entry, regs->nip); if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return; @@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; level = 0; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); } else { if (level == 0) @@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ++level; } - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); if (!valid_next_sp(next_sp, sp)) return; sp = next_sp; @@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp) puc == (unsigned long) &sf->uc; } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); for (;;) { fp = (unsigned long __user *) sp; @@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, read_user_stack_64(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } @@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) return __get_user_inatomic(*ret, ptr); } -static inline void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, return mctx->mc_gregs; } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned int sp, next_sp; unsigned int next_ip; @@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); while (entry->nr < PERF_MAX_STACK_DEPTH) { fp = (unsigned int __user *) (unsigned long) sp; @@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs, read_user_stack_32(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } } -/* - * Since we can't get PMU interrupts inside a PMU interrupt handler, - * we don't need separate irq and nmi entries here. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - if (current_is_64bit()) - perf_callchain_user_64(regs, entry); - else - perf_callchain_user_32(regs, entry); - } - - return entry; + if (current_is_64bit()) + perf_callchain_user_64(entry, regs); + else + perf_callchain_user_32(entry, regs); } diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index a9dd3abde28e..d5ca1ef50fa9 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -14,11 +14,6 @@ #include <asm/unwinder.h> #include <asm/ptrace.h> -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} static void callchain_warning(void *data, char *msg) { @@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable) struct perf_callchain_entry *entry = data; if (reliable) - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops callchain_ops = { @@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = { .address = callchain_address, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->pc); + perf_callchain_store(entry, regs->pc); unwind_stack(NULL, regs, NULL, &callchain_ops, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (is_user && current->state != TASK_RUNNING) - return; - - /* - * Only the kernel side is implemented for now. - */ - if (!is_user) - perf_callchain_kernel(regs, entry); -} - -/* - * No need for separate IRQ and NMI entries. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 357ced3c33ff..4bc402938575 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1283,22 +1283,17 @@ void __init init_hw_perf_events(void) register_die_notifier(&perf_event_nmi_notifier); } -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ksp, fp; #ifdef CONFIG_FUNCTION_GRAPH_TRACER int graph = 0; #endif - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->tpc); + stack_trace_flush(); + + perf_callchain_store(entry, regs->tpc); ksp = regs->u_regs[UREG_I6]; fp = ksp + STACK_BIAS; @@ -1322,13 +1317,13 @@ static void perf_callchain_kernel(struct pt_regs *regs, pc = sf->callers_pc; fp = (unsigned long)sf->fp + STACK_BIAS; } - callchain_store(entry, pc); + perf_callchain_store(entry, pc); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((pc + 8UL) == (unsigned long) &return_to_handler) { int index = current->curr_ret_stack; if (current->ret_stack && index >= graph) { pc = current->ret_stack[index - graph].ret; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); graph++; } } @@ -1336,13 +1331,12 @@ static void perf_callchain_kernel(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] + STACK_BIAS; do { @@ -1355,17 +1349,16 @@ static void perf_callchain_user_64(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp + STACK_BIAS; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { @@ -1378,34 +1371,16 @@ static void perf_callchain_user_32(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } -/* Like powerpc we can't get PMU interrupts within the PMU handler, - * so no need for separate NMI and IRQ chains as on x86. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - if (!user_mode(regs)) { - stack_trace_flush(); - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - if (regs) { - flushw_user(); - if (test_thread_flag(TIF_32BIT)) - perf_callchain_user_32(regs, entry); - else - perf_callchain_user_64(regs, entry); - } - return entry; + flushw_user(); + if (test_thread_flag(TIF_32BIT)) + perf_callchain_user_32(entry, regs); + else + perf_callchain_user_64(entry, regs); } diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index def500776b16..a70cd216be5d 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -36,19 +36,6 @@ #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) -/* Non HT mask */ -#define P4_ESCR_MASK \ - (P4_ESCR_EVENT_MASK | \ - P4_ESCR_EVENTMASK_MASK | \ - P4_ESCR_TAG_MASK | \ - P4_ESCR_TAG_ENABLE | \ - P4_ESCR_T0_OS | \ - P4_ESCR_T0_USR) - -/* HT mask */ -#define P4_ESCR_MASK_HT \ - (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) - #define P4_CCCR_OVF 0x80000000U #define P4_CCCR_CASCADE 0x40000000U #define P4_CCCR_OVF_PMI_T0 0x04000000U @@ -70,23 +57,6 @@ #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) -/* Non HT mask */ -#define P4_CCCR_MASK \ - (P4_CCCR_OVF | \ - P4_CCCR_CASCADE | \ - P4_CCCR_OVF_PMI_T0 | \ - P4_CCCR_FORCE_OVF | \ - P4_CCCR_EDGE | \ - P4_CCCR_THRESHOLD_MASK | \ - P4_CCCR_COMPLEMENT | \ - P4_CCCR_COMPARE | \ - P4_CCCR_ESCR_SELECT_MASK | \ - P4_CCCR_ENABLE) - -/* HT mask */ -#define P4_CCCR_MASK_HT \ - (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) - #define P4_GEN_ESCR_EMASK(class, name, bit) \ class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_EMASK_BIT(class, name) class##__##name @@ -127,6 +97,28 @@ #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) +/* + * The bits we allow to pass for RAW events + */ +#define P4_CONFIG_MASK_ESCR \ + P4_ESCR_EVENT_MASK | \ + P4_ESCR_EVENTMASK_MASK | \ + P4_ESCR_TAG_MASK | \ + P4_ESCR_TAG_ENABLE + +#define P4_CONFIG_MASK_CCCR \ + P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE | \ + P4_CCCR_THREAD_ANY | \ + P4_CCCR_RESERVED + +/* some dangerous bits are reserved for kernel internals */ +#define P4_CONFIG_MASK \ + (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ + (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) + static inline bool p4_is_event_cascaded(u64 config) { u32 cccr = p4_config_unpack_cccr(config); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3efdf2870a35..de6569c04cd0 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1604,17 +1604,6 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) * callchain support */ -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); - - static void backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) { @@ -1635,7 +1624,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { @@ -1646,11 +1635,15 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->ip); dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } @@ -1679,7 +1672,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (fp < compat_ptr(regs->sp)) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } return 1; @@ -1692,19 +1685,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) } #endif -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stack_frame frame; const void __user *fp; - if (!user_mode(regs)) - regs = task_pt_regs(current); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } fp = (void __user *)regs->bp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); + perf_callchain_store(entry, regs->ip); if (perf_callchain_user32(regs, entry)) return; @@ -1721,52 +1715,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) if ((unsigned long)fp < regs->sp) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } } -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* TODO: We don't support guest os callchain now */ - return NULL; - } - - if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index b560db3305be..c70c878ee02a 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -18,6 +18,8 @@ struct p4_event_bind { unsigned int opcode; /* Event code and ESCR selector */ unsigned int escr_msr[2]; /* ESCR MSR for this event */ + unsigned int escr_emask; /* valid ESCR EventMask bits */ + unsigned int shared; /* event is shared across threads */ char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ }; @@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = { [P4_EVENT_TC_DELIVER_MODE] = { .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), + .shared = 1, .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_BPU_FETCH_REQUEST] = { .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_ITLB_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_MEMORY_CANCEL] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MEMORY_COMPLETE] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_LOAD_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_STORE_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MOB_LOAD_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_PAGE_WALK_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_CACHE_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ALLOCATION] = { .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), .cntr = { {2, -1, -1}, {3, -1, -1} }, }, [P4_EVENT_FSB_DATA_ACTIVITY] = { .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), .cntr = { {0, -1, -1}, {1, -1, -1} }, }, [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), .cntr = { {2, -1, -1}, {3, -1, -1} }, }, [P4_EVENT_SSE_INPUT_ASSIST] = { .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_PACKED_SP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_PACKED_DP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_SCALAR_SP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_SCALAR_DP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_64BIT_MMX_UOP] = { .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_128BIT_MMX_UOP] = { .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_X87_FP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_TC_MISC] = { .opcode = P4_OPCODE(P4_EVENT_TC_MISC), .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_GLOBAL_POWER_EVENTS] = { .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_TC_MS_XFER] = { .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_UOP_QUEUE_WRITES] = { .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RETIRED_BRANCH_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RESOURCE_STALL] = { .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_WC_BUFFER] = { .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_B2B_CYCLES] = { .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BNR] = { .opcode = P4_OPCODE(P4_EVENT_BNR), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_SNOOP] = { .opcode = P4_OPCODE(P4_EVENT_SNOOP), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_RESPONSE] = { .opcode = P4_OPCODE(P4_EVENT_RESPONSE), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_FRONT_END_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_EXECUTION_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_REPLAY_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_INSTR_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_UOPS_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_UOP_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_BRANCH_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_MISPRED_BRANCH_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_X87_ASSIST] = { .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_MACHINE_CLEAR] = { .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_INSTR_COMPLETED] = { .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, }; @@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event) return config; } +/* check cpu model specifics */ +static bool p4_event_match_cpu_model(unsigned int event_idx) +{ + /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ + if (event_idx == P4_EVENT_INSTR_COMPLETED) { + if (boot_cpu_data.x86_model != 3 && + boot_cpu_data.x86_model != 4 && + boot_cpu_data.x86_model != 6) + return false; + } + + /* + * For info + * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 + */ + + return true; +} + static int p4_validate_raw_event(struct perf_event *event) { - unsigned int v; + unsigned int v, emask; - /* user data may have out-of-bound event index */ + /* User data may have out-of-bound event index */ v = p4_config_unpack_event(event->attr.config); - if (v >= ARRAY_SIZE(p4_event_bind_map)) { - pr_warning("P4 PMU: Unknown event code: %d\n", v); + if (v >= ARRAY_SIZE(p4_event_bind_map)) + return -EINVAL; + + /* It may be unsupported: */ + if (!p4_event_match_cpu_model(v)) return -EINVAL; + + /* + * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as + * in Architectural Performance Monitoring, it means not + * on _which_ logical cpu to count but rather _when_, ie it + * depends on logical cpu state -- count event if one cpu active, + * none, both or any, so we just allow user to pass any value + * desired. + * + * In turn we always set Tx_OS/Tx_USR bits bound to logical + * cpu without their propagation to another cpu + */ + + /* + * if an event is shared accross the logical threads + * the user needs special permissions to be able to use it + */ + if (p4_event_bind_map[v].shared) { + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return -EACCES; } + /* ESCR EventMask bits may be invalid */ + emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; + if (emask & ~p4_event_bind_map[v].escr_emask) + return -EINVAL; + /* - * it may have some screwed PEBS bits + * it may have some invalid PEBS bits */ - if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { - pr_warning("P4 PMU: PEBS are not supported yet\n"); + if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) return -EINVAL; - } + v = p4_config_unpack_metric(event->attr.config); - if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { - pr_warning("P4 PMU: Unknown metric code: %d\n", v); + if (v >= ARRAY_SIZE(p4_pebs_bind_map)) return -EINVAL; - } return 0; } @@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) { + /* + * Clear bits we reserve to be managed by kernel itself + * and never allowed from a user space + */ + event->attr.config &= P4_CONFIG_MASK; + rc = p4_validate_raw_event(event); if (rc) goto out; /* - * We don't control raw events so it's up to the caller - * to pass sane values (and we don't count the thread number - * on HT machine but allow HT-compatible specifics to be - * passed on) - * * Note that for RAW events we allow user to use P4_CCCR_RESERVED * bits since we keep additional info here (for cache events and etc) - * - * XXX: HT wide things should check perf_paranoid_cpu() && - * CAP_SYS_ADMIN */ - event->hw.config |= event->attr.config & - (p4_config_pack_escr(P4_ESCR_MASK_HT) | - p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); - - event->hw.config &= ~P4_CCCR_FORCE_OVF; + event->hw.config |= event->attr.config; } rc = x86_setup_perfctr(event); diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 02b8b24f8f51..5f8ad7bec636 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -191,8 +191,8 @@ struct ftrace_event_call { unsigned int flags; #ifdef CONFIG_PERF_EVENTS - int perf_refcount; - struct hlist_head *perf_events; + int perf_refcount; + struct hlist_head __percpu *perf_events; #endif }; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6f..531495db1708 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include <asm/atomic.h> #include <asm/ptrace.h> #include <asm/system.h> +#include <trace/events/irq.h> /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -407,7 +408,12 @@ asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) +static inline void __raise_softirq_irqoff(unsigned int nr) +{ + trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); + or_softirq_pending(1UL << nr); +} + extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); extern void wakeup_softirqd(void); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 716f99b682c1..000610c4de71 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -808,6 +808,12 @@ struct perf_event_context { struct rcu_head rcu_head; }; +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + /** * struct perf_event_cpu_context - per cpu event context structure */ @@ -821,12 +827,8 @@ struct perf_cpu_context { struct mutex hlist_mutex; int hlist_refcount; - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; + /* Recursion avoidance in each contexts */ + int recursion[PERF_NR_CONTEXTS]; }; struct perf_output_handle { @@ -976,7 +978,21 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +/* Callchains */ +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + +extern void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs); + + +static inline void +perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} extern int sysctl_perf_event_paranoid; extern int sysctl_perf_event_mlock; diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 0e4cfb694fe7..6fa7cbab7d93 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -5,7 +5,9 @@ #define _TRACE_IRQ_H #include <linux/tracepoint.h> -#include <linux/interrupt.h> + +struct irqaction; +struct softirq_action; #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } #define show_softirq_name(val) \ @@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq, ), TP_fast_assign( - __entry->vec = (int)(h - vec); + if (vec) + __entry->vec = (int)(h - vec); + else + __entry->vec = (int)(long)h; ), TP_printk("vec=%d [action=%s]", __entry->vec, @@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit, TP_ARGS(h, vec) ); +/** + * softirq_raise - called immediately when a softirq is raised + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the softirq vector number which is + * raised. @vec is NULL and it means @h includes vector number not + * softirq_action. When used in combination with the softirq_entry tracepoint + * we can determine the softirq raise latency. + */ +DEFINE_EVENT(softirq, softirq_raise, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + #endif /* _TRACE_IRQ_H */ /* This part must be outside protection */ diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 188deca2f3c7..8fe1e93f531d 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -6,10 +6,31 @@ #include <linux/netdevice.h> #include <linux/tracepoint.h> +#include <linux/ftrace.h> + +#define NO_DEV "(no_device)" + +TRACE_EVENT(napi_poll, -DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), - TP_ARGS(napi)); + + TP_ARGS(napi), + + TP_STRUCT__entry( + __field( struct napi_struct *, napi) + __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) + ), + + TP_fast_assign( + __entry->napi = napi; + __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); + ), + + TP_printk("napi poll on napi struct %p for device %s", + __entry->napi, __get_str(dev_name)) +); + +#undef NO_DEV #endif /* _TRACE_NAPI_H_ */ diff --git a/include/trace/events/net.h b/include/trace/events/net.h new file mode 100644 index 000000000000..5f247f5ffc56 --- /dev/null +++ b/include/trace/events/net.h @@ -0,0 +1,82 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM net + +#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NET_H + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <linux/tracepoint.h> + +TRACE_EVENT(net_dev_xmit, + + TP_PROTO(struct sk_buff *skb, + int rc), + + TP_ARGS(skb, rc), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __field( int, rc ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __entry->rc = rc; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) +); + +DECLARE_EVENT_CLASS(net_dev_template, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u", + __get_str(name), __entry->skbaddr, __entry->len) +) + +DEFINE_EVENT(net_dev_template, net_dev_queue, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_receive_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_rx, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); +#endif /* _TRACE_NET_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 4b2be6dc76f0..75ce9d500d8e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb, __entry->skbaddr, __entry->protocol, __entry->location) ); +TRACE_EVENT(consume_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + ), + + TP_printk("skbaddr=%p", __entry->skbaddr) +); + TRACE_EVENT(skb_copy_datagram_iovec, TP_PROTO(const struct sk_buff *skb, int len), diff --git a/kernel/perf_event.c b/kernel/perf_event.c index db5b56064687..2d74f31220ad 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1782,6 +1782,216 @@ static u64 perf_event_read(struct perf_event *event) } /* + * Callchain support + */ + +struct callchain_cpus_entries { + struct rcu_head rcu_head; + struct perf_callchain_entry *cpu_entries[0]; +}; + +static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); +static atomic_t nr_callchain_events; +static DEFINE_MUTEX(callchain_mutex); +struct callchain_cpus_entries *callchain_cpus_entries; + + +__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +__weak void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +static void release_callchain_buffers_rcu(struct rcu_head *head) +{ + struct callchain_cpus_entries *entries; + int cpu; + + entries = container_of(head, struct callchain_cpus_entries, rcu_head); + + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + + kfree(entries); +} + +static void release_callchain_buffers(void) +{ + struct callchain_cpus_entries *entries; + + entries = callchain_cpus_entries; + rcu_assign_pointer(callchain_cpus_entries, NULL); + call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); +} + +static int alloc_callchain_buffers(void) +{ + int cpu; + int size; + struct callchain_cpus_entries *entries; + + /* + * We can't use the percpu allocation API for data that can be + * accessed from NMI. Use a temporary manual per cpu allocation + * until that gets sorted out. + */ + size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * + num_possible_cpus(); + + entries = kzalloc(size, GFP_KERNEL); + if (!entries) + return -ENOMEM; + + size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; + + for_each_possible_cpu(cpu) { + entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, + cpu_to_node(cpu)); + if (!entries->cpu_entries[cpu]) + goto fail; + } + + rcu_assign_pointer(callchain_cpus_entries, entries); + + return 0; + +fail: + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + kfree(entries); + + return -ENOMEM; +} + +static int get_callchain_buffers(void) +{ + int err = 0; + int count; + + mutex_lock(&callchain_mutex); + + count = atomic_inc_return(&nr_callchain_events); + if (WARN_ON_ONCE(count < 1)) { + err = -EINVAL; + goto exit; + } + + if (count > 1) { + /* If the allocation failed, give up */ + if (!callchain_cpus_entries) + err = -ENOMEM; + goto exit; + } + + err = alloc_callchain_buffers(); + if (err) + release_callchain_buffers(); +exit: + mutex_unlock(&callchain_mutex); + + return err; +} + +static void put_callchain_buffers(void) +{ + if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { + release_callchain_buffers(); + mutex_unlock(&callchain_mutex); + } +} + +static int get_recursion_context(int *recursion) +{ + int rctx; + + if (in_nmi()) + rctx = 3; + else if (in_irq()) + rctx = 2; + else if (in_softirq()) + rctx = 1; + else + rctx = 0; + + if (recursion[rctx]) + return -1; + + recursion[rctx]++; + barrier(); + + return rctx; +} + +static inline void put_recursion_context(int *recursion, int rctx) +{ + barrier(); + recursion[rctx]--; +} + +static struct perf_callchain_entry *get_callchain_entry(int *rctx) +{ + int cpu; + struct callchain_cpus_entries *entries; + + *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); + if (*rctx == -1) + return NULL; + + entries = rcu_dereference(callchain_cpus_entries); + if (!entries) + return NULL; + + cpu = smp_processor_id(); + + return &entries->cpu_entries[cpu][*rctx]; +} + +static void +put_callchain_entry(int rctx) +{ + put_recursion_context(__get_cpu_var(callchain_recursion), rctx); +} + +static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + int rctx; + struct perf_callchain_entry *entry; + + + entry = get_callchain_entry(&rctx); + if (rctx == -1) + return NULL; + + if (!entry) + goto exit_put; + + entry->nr = 0; + + if (!user_mode(regs)) { + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_kernel(entry, regs); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_user(entry, regs); + } + +exit_put: + put_callchain_entry(rctx); + + return entry; +} + +/* * Initialize the perf_event context in a task_struct: */ static void @@ -1913,6 +2123,8 @@ static void free_event(struct perf_event *event) atomic_dec(&nr_comm_events); if (event->attr.task) atomic_dec(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); } if (event->buffer) { @@ -2956,16 +3168,6 @@ void perf_event_do_pending(void) } /* - * Callchain support -- arch specific - */ - -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - return NULL; -} - - -/* * We assume there is only KVM supporting the callbacks. * Later on, we might change it to a list if there is * another virtualization implementation supporting the callbacks. @@ -3459,14 +3661,20 @@ static void perf_event_output(struct perf_event *event, int nmi, struct perf_output_handle handle; struct perf_event_header header; + /* protect the callchain buffers */ + rcu_read_lock(); + perf_prepare_sample(&header, data, event, regs); if (perf_output_begin(&handle, event, header.size, nmi, 1)) - return; + goto exit; perf_output_sample(&handle, &header, data, event); perf_output_end(&handle); + +exit: + rcu_read_unlock(); } /* @@ -4222,32 +4430,16 @@ end: int perf_swevent_get_recursion_context(void) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - int rctx; - if (in_nmi()) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_softirq()) - rctx = 1; - else - rctx = 0; - - if (cpuctx->recursion[rctx]) - return -1; - - cpuctx->recursion[rctx]++; - barrier(); - - return rctx; + return get_recursion_context(cpuctx->recursion); } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); void inline perf_swevent_put_recursion_context(int rctx) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - barrier(); - cpuctx->recursion[rctx]--; + + put_recursion_context(cpuctx->recursion, rctx); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4947,6 +5139,13 @@ done: atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { + err = get_callchain_buffers(); + if (err) { + free_event(event); + return ERR_PTR(err); + } + } } return event; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 19cccc3c3028..ef27017caa56 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); +/* + * The total entries in the ring buffer is the running counter + * of entries entered into the ring buffer, minus the sum of + * the entries read from the ring buffer and the number of + * entries that were overwritten. + */ +static inline unsigned long +rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) +{ + return local_read(&cpu_buffer->entries) - + (local_read(&cpu_buffer->overrun) + cpu_buffer->read); +} + /** * ring_buffer_entries_cpu - get the number of entries in a cpu buffer * @buffer: The ring buffer @@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - unsigned long ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; - ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) - - cpu_buffer->read; - return ret; + return rb_num_of_entries(cpu_buffer); } EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); @@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) /* if you care about this being correct, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - entries += (local_read(&cpu_buffer->entries) - - local_read(&cpu_buffer->overrun)) - cpu_buffer->read; + entries += rb_num_of_entries(cpu_buffer); } return entries; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 31cc4cb0dbf2..f3bbcd1c90c8 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,7 +9,7 @@ #include <linux/kprobes.h> #include "trace.h" -static char *perf_trace_buf[4]; +static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -24,7 +24,7 @@ static int total_ref_count; static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { - struct hlist_head *list; + struct hlist_head __percpu *list; int ret = -ENOMEM; int cpu; @@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, tp_event->perf_events = list; if (!total_ref_count) { - char *buf; + char __percpu *buf; int i; - for (i = 0; i < 4; i++) { - buf = (char *)alloc_percpu(perf_trace_t); + for (i = 0; i < PERF_NR_CONTEXTS; i++) { + buf = (char __percpu *)alloc_percpu(perf_trace_t); if (!buf) goto fail; @@ -65,7 +65,7 @@ fail: if (!total_ref_count) { int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } @@ -104,13 +104,14 @@ int perf_trace_init(struct perf_event *p_event) int perf_trace_enable(struct perf_event *p_event) { struct ftrace_event_call *tp_event = p_event->tp_event; + struct hlist_head __percpu *pcpu_list; struct hlist_head *list; - list = tp_event->perf_events; - if (WARN_ON_ONCE(!list)) + pcpu_list = tp_event->perf_events; + if (WARN_ON_ONCE(!pcpu_list)) return -EINVAL; - list = this_cpu_ptr(list); + list = this_cpu_ptr(pcpu_list); hlist_add_head_rcu(&p_event->hlist_entry, list); return 0; @@ -142,7 +143,7 @@ void perf_trace_destroy(struct perf_event *p_event) tp_event->perf_events = NULL; if (!--total_ref_count) { - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4c758f146328..398c0e8b332c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -600,21 +600,29 @@ out: enum { FORMAT_HEADER = 1, - FORMAT_PRINTFMT = 2, + FORMAT_FIELD_SEPERATOR = 2, + FORMAT_PRINTFMT = 3, }; static void *f_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_event_call *call = m->private; struct ftrace_event_field *field; - struct list_head *head; + struct list_head *common_head = &ftrace_common_fields; + struct list_head *head = trace_get_fields(call); (*pos)++; switch ((unsigned long)v) { case FORMAT_HEADER: - head = &ftrace_common_fields; + if (unlikely(list_empty(common_head))) + return NULL; + + field = list_entry(common_head->prev, + struct ftrace_event_field, link); + return field; + case FORMAT_FIELD_SEPERATOR: if (unlikely(list_empty(head))) return NULL; @@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) return NULL; } - head = trace_get_fields(call); - - /* - * To separate common fields from event fields, the - * LSB is set on the first event field. Clear it in case. - */ - v = (void *)((unsigned long)v & ~1L); - field = v; - /* - * If this is a common field, and at the end of the list, then - * continue with main list. - */ - if (field->link.prev == &ftrace_common_fields) { - if (unlikely(list_empty(head))) - return NULL; - field = list_entry(head->prev, struct ftrace_event_field, link); - /* Set the LSB to notify f_show to print an extra newline */ - field = (struct ftrace_event_field *) - ((unsigned long)field | 1); - return field; - } - - /* If we are done tell f_show to print the format */ - if (field->link.prev == head) + if (field->link.prev == common_head) + return (void *)FORMAT_FIELD_SEPERATOR; + else if (field->link.prev == head) return (void *)FORMAT_PRINTFMT; field = list_entry(field->link.prev, struct ftrace_event_field, link); @@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v) seq_printf(m, "format:\n"); return 0; + case FORMAT_FIELD_SEPERATOR: + seq_putc(m, '\n'); + return 0; + case FORMAT_PRINTFMT: seq_printf(m, "\nprint fmt: %s\n", call->print_fmt); return 0; } - /* - * To separate common fields from event fields, the - * LSB is set on the first event field. Clear it and - * print a newline if it is set. - */ - if ((unsigned long)v & 1) { - seq_putc(m, '\n'); - v = (void *)((unsigned long)v & ~1L); - } - field = v; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 6f233698518e..c93bcb248638 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -23,7 +23,7 @@ struct fgraph_cpu_data { }; struct fgraph_data { - struct fgraph_cpu_data *cpu_data; + struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ struct ftrace_graph_ent_entry ent; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f9c3c52ecc1..fa71aebda4ff 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -43,7 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif -static int __read_mostly did_panic; static int __initdata no_watchdog; @@ -187,18 +186,6 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -static int -watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr) -{ - did_panic = 1; - - return NOTIFY_DONE; -} - -static struct notifier_block panic_block = { - .notifier_call = watchdog_panic, -}; - #ifdef CONFIG_HARDLOCKUP_DETECTOR static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, @@ -378,7 +365,7 @@ static int watchdog_nmi_enable(int cpu) } printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); - return -1; + return PTR_ERR(event); /* success path */ out_save: @@ -422,17 +409,19 @@ static int watchdog_prepare_cpu(int cpu) static int watchdog_enable(int cpu) { struct task_struct *p = per_cpu(softlockup_watchdog, cpu); + int err; /* enable the perf event */ - if (watchdog_nmi_enable(cpu) != 0) - return -1; + err = watchdog_nmi_enable(cpu); + if (err) + return err; /* create the watchdog thread */ if (!p) { p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); if (IS_ERR(p)) { printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); - return -1; + return PTR_ERR(p); } kthread_bind(p, cpu); per_cpu(watchdog_touch_ts, cpu) = 0; @@ -526,17 +515,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; + int err = 0; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - if (watchdog_prepare_cpu(hotcpu)) - return NOTIFY_BAD; + err = watchdog_prepare_cpu(hotcpu); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - if (watchdog_enable(hotcpu)) - return NOTIFY_BAD; + err = watchdog_enable(hotcpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: @@ -549,7 +537,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; #endif /* CONFIG_HOTPLUG_CPU */ } - return NOTIFY_OK; + return notifier_from_errno(err); } static struct notifier_block __cpuinitdata cpu_nfb = { @@ -565,13 +553,11 @@ static int __init spawn_watchdog_task(void) return 0; err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - WARN_ON(err == NOTIFY_BAD); + WARN_ON(notifier_to_errno(err)); cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); - atomic_notifier_chain_register(&panic_notifier_list, &panic_block); - return 0; } early_initcall(spawn_watchdog_task); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1b4afd2e6ca0..e85d549b6eac 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -482,6 +482,7 @@ config PROVE_LOCKING select DEBUG_SPINLOCK select DEBUG_MUTEXES select DEBUG_LOCK_ALLOC + select TRACE_IRQFLAGS default n help This feature enables the kernel to prove that all locking @@ -579,11 +580,10 @@ config DEBUG_LOCKDEP of more runtime overhead. config TRACE_IRQFLAGS - depends on DEBUG_KERNEL bool - default y - depends on TRACE_IRQFLAGS_SUPPORT - depends on PROVE_LOCKING + help + Enables hooks to interrupt enabling and disabling for + either tracing or lock debugging. config DEBUG_SPINLOCK_SLEEP bool "Spinlock debugging: sleep-inside-spinlock checking" diff --git a/net/core/datagram.c b/net/core/datagram.c index 251997a95483..282806ba7a57 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ + trace_kfree_skb(skb, skb_free_datagram_locked); __kfree_skb(skb); } EXPORT_SYMBOL(skb_free_datagram_locked); diff --git a/net/core/dev.c b/net/core/dev.c index 3721fbb9a83c..2308cce48048 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -128,6 +128,8 @@ #include <linux/jhash.h> #include <linux/random.h> #include <trace/events/napi.h> +#include <trace/events/net.h> +#include <trace/events/skb.h> #include <linux/pci.h> #include "net-sysfs.h" @@ -1978,6 +1980,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } rc = ops->ndo_start_xmit(skb, dev); + trace_net_dev_xmit(skb, rc); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -1998,6 +2001,7 @@ gso: skb_dst_drop(nskb); rc = ops->ndo_start_xmit(nskb, dev); + trace_net_dev_xmit(nskb, rc); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; @@ -2186,6 +2190,7 @@ int dev_queue_xmit(struct sk_buff *skb) #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); #endif + trace_net_dev_queue(skb); if (q->enqueue) { rc = __dev_xmit_skb(skb, q, dev, txq); goto out; @@ -2512,6 +2517,7 @@ int netif_rx(struct sk_buff *skb) if (netdev_tstamp_prequeue) net_timestamp_check(skb); + trace_netif_rx(skb); #ifdef CONFIG_RPS { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -2571,6 +2577,7 @@ static void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(atomic_read(&skb->users)); + trace_kfree_skb(skb, net_tx_action); __kfree_skb(skb); } } @@ -2828,6 +2835,7 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!netdev_tstamp_prequeue) net_timestamp_check(skb); + trace_netif_receive_skb(skb); if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) return NET_RX_SUCCESS; diff --git a/net/core/net-traces.c b/net/core/net-traces.c index afa6380ed88a..7f1bb2aba03b 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -26,6 +26,7 @@ #define CREATE_TRACE_POINTS #include <trace/events/skb.h> +#include <trace/events/net.h> #include <trace/events/napi.h> EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3a2513f0d0c3..12e61e351d0e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -466,6 +466,7 @@ void consume_skb(struct sk_buff *skb) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; + trace_consume_skb(skb); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 5164a655c39f..b2c63309a651 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -8,7 +8,7 @@ perf-annotate - Read perf.data (created by perf record) and display annotated co SYNOPSIS -------- [verse] -'perf annotate' [-i <file> | --input=file] symbol_name +'perf annotate' [-i <file> | --input=file] [symbol_name] DESCRIPTION ----------- @@ -24,6 +24,13 @@ OPTIONS --input=:: Input file name. (default: perf.data) +--stdio:: Use the stdio interface. + +--tui:: Use the TUI interface Use of --tui requires a tty, if one is not + present, as when piping to other commands, the stdio interface is + used. This interfaces starts by centering on the line with more + samples, TAB/UNTAB cycles thru the lines with more samples. + SEE ALSO -------- -linkperf:perf-record[1] +linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index abfabe9147a4..12052c9ed0ba 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -65,6 +65,13 @@ OPTIONS the tree is considered as a new profiled object. + Default: fractal,0.5. +--stdio:: Use the stdio interface. + +--tui:: Use the TUI interface, that is integrated with annotate and allows + zooming into DSOs or threads, among other features. Use of --tui + requires a tty, if one is not present, as when piping to other + commands, the stdio interface is used. + SEE ALSO -------- linkperf:perf-stat[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 4f1fa77c1feb..fe1e30722f3b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -313,6 +313,9 @@ TEST_PROGRAMS = SCRIPT_SH += perf-archive.sh +grep-libs = $(filter -l%,$(1)) +strip-libs = $(filter-out -l%,$(1)) + # # No Perl scripts right now: # @@ -588,14 +591,17 @@ endif ifdef NO_LIBPERL BASIC_CFLAGS += -DNO_LIBPERL else - PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` + PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) + PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) + PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y) BASIC_CFLAGS += -DNO_LIBPERL else - ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) + ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS) + EXTLIBS += $(PERL_EMBED_LIBADD) LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o endif @@ -604,13 +610,16 @@ endif ifdef NO_LIBPYTHON BASIC_CFLAGS += -DNO_LIBPYTHON else - PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` + PYTHON_EMBED_LDOPTS = $(shell python-config --ldflags 2>/dev/null) + PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) + PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) BASIC_CFLAGS += -DNO_LIBPYTHON else - ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) + ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS) + EXTLIBS += $(PYTHON_EMBED_LIBADD) LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o endif @@ -653,6 +662,15 @@ else endif endif + +ifdef NO_STRLCPY + BASIC_CFLAGS += -DNO_STRLCPY +else + ifneq ($(call try-cc,$(SOURCE_STRLCPY),),y) + BASIC_CFLAGS += -DNO_STRLCPY + endif +endif + ifndef CC_LD_DYNPATH ifdef NO_R_TO_GCC_LINKER # Some gcc does not accept and pass -R to the linker to specify @@ -910,8 +928,8 @@ $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ $(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(OUTPUT)perf.o \ - $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \ + $(BUILTIN_OBJS) $(LIBS) -o $@ $(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1478dc64bf15..6d5604d8df95 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -28,7 +28,7 @@ static char const *input_name = "perf.data"; -static bool force; +static bool force, use_tui, use_stdio; static bool full_paths; @@ -321,7 +321,7 @@ static int hist_entry__tty_annotate(struct hist_entry *he) static void hists__find_annotations(struct hists *self) { - struct rb_node *first = rb_first(&self->entries), *nd = first; + struct rb_node *nd = rb_first(&self->entries), *next; int key = KEY_RIGHT; while (nd) { @@ -343,20 +343,19 @@ find_next: if (use_browser > 0) { key = hist_entry__tui_annotate(he); - if (is_exit_key(key)) - break; switch (key) { case KEY_RIGHT: - case '\t': - nd = rb_next(nd); + next = rb_next(nd); break; case KEY_LEFT: - if (nd == first) - continue; - nd = rb_prev(nd); - default: + next = rb_prev(nd); break; + default: + return; } + + if (next != NULL) + nd = next; } else { hist_entry__tty_annotate(he); nd = rb_next(nd); @@ -428,6 +427,8 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), + OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, @@ -443,6 +444,11 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) { argc = parse_options(argc, argv, options, annotate_usage, 0); + if (use_stdio) + use_browser = 0; + else if (use_tui) + use_browser = 1; + setup_browser(); symbol_conf.priv_size = sizeof(struct sym_priv); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 55fc1f46892a..5de405d45230 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -32,7 +32,7 @@ static char const *input_name = "perf.data"; -static bool force; +static bool force, use_tui, use_stdio; static bool hide_unresolved; static bool dont_use_callchains; @@ -107,7 +107,8 @@ static int perf_session__add_hist_entry(struct perf_session *self, goto out_free_syms; err = 0; if (symbol_conf.use_callchain) { - err = append_chain(he->callchain, data->callchain, syms, data->period); + err = callchain_append(he->callchain, data->callchain, syms, + data->period); if (err) goto out_free_syms; } @@ -450,6 +451,8 @@ static const struct option options[] = { "Show per-thread event counters"), OPT_STRING(0, "pretty", &pretty_printing_style, "key", "pretty printing style key: normal raw"), + OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), + OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, @@ -482,8 +485,15 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) { argc = parse_options(argc, argv, options, report_usage, 0); + if (use_stdio) + use_browser = 0; + else if (use_tui) + use_browser = 1; + if (strcmp(input_name, "-") != 0) setup_browser(); + else + use_browser = 0; /* * Only in the newt browser we are doing integrated annotation, * so don't allocate extra space that won't be used in the stdio diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak index 7a7b60859053..b253db634f04 100644 --- a/tools/perf/feature-tests.mak +++ b/tools/perf/feature-tests.mak @@ -110,6 +110,17 @@ int main(void) } endef +define SOURCE_STRLCPY +#include <stdlib.h> +extern size_t strlcpy(char *dest, const char *src, size_t size); + +int main(void) +{ + strlcpy(NULL, NULL, 0); + return 0; +} +endef + # try-cc # Usage: option = $(call try-cc, source-to-build, cc-options) try-cc = $(shell sh -c \ diff --git a/tools/perf/scripts/python/bin/netdev-times-record b/tools/perf/scripts/python/bin/netdev-times-record new file mode 100644 index 000000000000..d931a828126b --- /dev/null +++ b/tools/perf/scripts/python/bin/netdev-times-record @@ -0,0 +1,8 @@ +#!/bin/bash +perf record -a -e net:net_dev_xmit -e net:net_dev_queue \ + -e net:netif_receive_skb -e net:netif_rx \ + -e skb:consume_skb -e skb:kfree_skb \ + -e skb:skb_copy_datagram_iovec -e napi:napi_poll \ + -e irq:irq_handler_entry -e irq:irq_handler_exit \ + -e irq:softirq_entry -e irq:softirq_exit \ + -e irq:softirq_raise $@ diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report new file mode 100644 index 000000000000..c3d0a638123d --- /dev/null +++ b/tools/perf/scripts/python/bin/netdev-times-report @@ -0,0 +1,5 @@ +#!/bin/bash +# description: display a process of packet and processing time +# args: [tx] [rx] [dev=] [debug] + +perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@ diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py new file mode 100644 index 000000000000..9aa0a32972e8 --- /dev/null +++ b/tools/perf/scripts/python/netdev-times.py @@ -0,0 +1,464 @@ +# Display a process of packets and processed time. +# It helps us to investigate networking or network device. +# +# options +# tx: show only tx chart +# rx: show only rx chart +# dev=: show only thing related to specified device +# debug: work with debug mode. It shows buffer status. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from Util import * + +all_event_list = []; # insert all tracepoint event related with this script +irq_dic = {}; # key is cpu and value is a list which stacks irqs + # which raise NET_RX softirq +net_rx_dic = {}; # key is cpu and value include time of NET_RX softirq-entry + # and a list which stacks receive +receive_hunk_list = []; # a list which include a sequence of receive events +rx_skb_list = []; # received packet list for matching + # skb_copy_datagram_iovec + +buffer_budget = 65536; # the budget of rx_skb_list, tx_queue_list and + # tx_xmit_list +of_count_rx_skb_list = 0; # overflow count + +tx_queue_list = []; # list of packets which pass through dev_queue_xmit +of_count_tx_queue_list = 0; # overflow count + +tx_xmit_list = []; # list of packets which pass through dev_hard_start_xmit +of_count_tx_xmit_list = 0; # overflow count + +tx_free_list = []; # list of packets which is freed + +# options +show_tx = 0; +show_rx = 0; +dev = 0; # store a name of device specified by option "dev=" +debug = 0; + +# indices of event_info tuple +EINFO_IDX_NAME= 0 +EINFO_IDX_CONTEXT=1 +EINFO_IDX_CPU= 2 +EINFO_IDX_TIME= 3 +EINFO_IDX_PID= 4 +EINFO_IDX_COMM= 5 + +# Calculate a time interval(msec) from src(nsec) to dst(nsec) +def diff_msec(src, dst): + return (dst - src) / 1000000.0 + +# Display a process of transmitting a packet +def print_transmit(hunk): + if dev != 0 and hunk['dev'].find(dev) < 0: + return + print "%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % \ + (hunk['dev'], hunk['len'], + nsecs_secs(hunk['queue_t']), + nsecs_nsecs(hunk['queue_t'])/1000, + diff_msec(hunk['queue_t'], hunk['xmit_t']), + diff_msec(hunk['xmit_t'], hunk['free_t'])) + +# Format for displaying rx packet processing +PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)" +PF_SOFT_ENTRY=" softirq_entry(+%.3fmsec)" +PF_NAPI_POLL= " napi_poll_exit(+%.3fmsec %s)" +PF_JOINT= " |" +PF_WJOINT= " | |" +PF_NET_RECV= " |---netif_receive_skb(+%.3fmsec skb=%x len=%d)" +PF_NET_RX= " |---netif_rx(+%.3fmsec skb=%x)" +PF_CPY_DGRAM= " | skb_copy_datagram_iovec(+%.3fmsec %d:%s)" +PF_KFREE_SKB= " | kfree_skb(+%.3fmsec location=%x)" +PF_CONS_SKB= " | consume_skb(+%.3fmsec)" + +# Display a process of received packets and interrputs associated with +# a NET_RX softirq +def print_receive(hunk): + show_hunk = 0 + irq_list = hunk['irq_list'] + cpu = irq_list[0]['cpu'] + base_t = irq_list[0]['irq_ent_t'] + # check if this hunk should be showed + if dev != 0: + for i in range(len(irq_list)): + if irq_list[i]['name'].find(dev) >= 0: + show_hunk = 1 + break + else: + show_hunk = 1 + if show_hunk == 0: + return + + print "%d.%06dsec cpu=%d" % \ + (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu) + for i in range(len(irq_list)): + print PF_IRQ_ENTRY % \ + (diff_msec(base_t, irq_list[i]['irq_ent_t']), + irq_list[i]['irq'], irq_list[i]['name']) + print PF_JOINT + irq_event_list = irq_list[i]['event_list'] + for j in range(len(irq_event_list)): + irq_event = irq_event_list[j] + if irq_event['event'] == 'netif_rx': + print PF_NET_RX % \ + (diff_msec(base_t, irq_event['time']), + irq_event['skbaddr']) + print PF_JOINT + print PF_SOFT_ENTRY % \ + diff_msec(base_t, hunk['sirq_ent_t']) + print PF_JOINT + event_list = hunk['event_list'] + for i in range(len(event_list)): + event = event_list[i] + if event['event_name'] == 'napi_poll': + print PF_NAPI_POLL % \ + (diff_msec(base_t, event['event_t']), event['dev']) + if i == len(event_list) - 1: + print "" + else: + print PF_JOINT + else: + print PF_NET_RECV % \ + (diff_msec(base_t, event['event_t']), event['skbaddr'], + event['len']) + if 'comm' in event.keys(): + print PF_WJOINT + print PF_CPY_DGRAM % \ + (diff_msec(base_t, event['comm_t']), + event['pid'], event['comm']) + elif 'handle' in event.keys(): + print PF_WJOINT + if event['handle'] == "kfree_skb": + print PF_KFREE_SKB % \ + (diff_msec(base_t, + event['comm_t']), + event['location']) + elif event['handle'] == "consume_skb": + print PF_CONS_SKB % \ + diff_msec(base_t, + event['comm_t']) + print PF_JOINT + +def trace_begin(): + global show_tx + global show_rx + global dev + global debug + + for i in range(len(sys.argv)): + if i == 0: + continue + arg = sys.argv[i] + if arg == 'tx': + show_tx = 1 + elif arg =='rx': + show_rx = 1 + elif arg.find('dev=',0, 4) >= 0: + dev = arg[4:] + elif arg == 'debug': + debug = 1 + if show_tx == 0 and show_rx == 0: + show_tx = 1 + show_rx = 1 + +def trace_end(): + # order all events in time + all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME], + b[EINFO_IDX_TIME])) + # process all events + for i in range(len(all_event_list)): + event_info = all_event_list[i] + name = event_info[EINFO_IDX_NAME] + if name == 'irq__softirq_exit': + handle_irq_softirq_exit(event_info) + elif name == 'irq__softirq_entry': + handle_irq_softirq_entry(event_info) + elif name == 'irq__softirq_raise': + handle_irq_softirq_raise(event_info) + elif name == 'irq__irq_handler_entry': + handle_irq_handler_entry(event_info) + elif name == 'irq__irq_handler_exit': + handle_irq_handler_exit(event_info) + elif name == 'napi__napi_poll': + handle_napi_poll(event_info) + elif name == 'net__netif_receive_skb': + handle_netif_receive_skb(event_info) + elif name == 'net__netif_rx': + handle_netif_rx(event_info) + elif name == 'skb__skb_copy_datagram_iovec': + handle_skb_copy_datagram_iovec(event_info) + elif name == 'net__net_dev_queue': + handle_net_dev_queue(event_info) + elif name == 'net__net_dev_xmit': + handle_net_dev_xmit(event_info) + elif name == 'skb__kfree_skb': + handle_kfree_skb(event_info) + elif name == 'skb__consume_skb': + handle_consume_skb(event_info) + # display receive hunks + if show_rx: + for i in range(len(receive_hunk_list)): + print_receive(receive_hunk_list[i]) + # display transmit hunks + if show_tx: + print " dev len Qdisc " \ + " netdevice free" + for i in range(len(tx_free_list)): + print_transmit(tx_free_list[i]) + if debug: + print "debug buffer status" + print "----------------------------" + print "xmit Qdisc:remain:%d overflow:%d" % \ + (len(tx_queue_list), of_count_tx_queue_list) + print "xmit netdevice:remain:%d overflow:%d" % \ + (len(tx_xmit_list), of_count_tx_xmit_list) + print "receive:remain:%d overflow:%d" % \ + (len(rx_skb_list), of_count_rx_skb_list) + +# called from perf, when it finds a correspoinding event +def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, vec): + if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX": + return + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec) + all_event_list.append(event_info) + +def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, vec): + if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX": + return + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec) + all_event_list.append(event_info) + +def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, vec): + if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX": + return + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec) + all_event_list.append(event_info) + +def irq__irq_handler_entry(name, context, cpu, sec, nsec, pid, comm, + irq, irq_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + irq, irq_name) + all_event_list.append(event_info) + +def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, irq, ret): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret) + all_event_list.append(event_info) + +def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, napi, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + napi, dev_name) + all_event_list.append(event_info) + +def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr, + skblen, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen, dev_name) + all_event_list.append(event_info) + +def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, skbaddr, + skblen, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen, dev_name) + all_event_list.append(event_info) + +def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm, + skbaddr, skblen, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen, dev_name) + all_event_list.append(event_info) + +def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm, + skbaddr, skblen, rc, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen, rc ,dev_name) + all_event_list.append(event_info) + +def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, + skbaddr, protocol, location): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, protocol, location) + all_event_list.append(event_info) + +def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr) + all_event_list.append(event_info) + +def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm, + skbaddr, skblen): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen) + all_event_list.append(event_info) + +def handle_irq_handler_entry(event_info): + (name, context, cpu, time, pid, comm, irq, irq_name) = event_info + if cpu not in irq_dic.keys(): + irq_dic[cpu] = [] + irq_record = {'irq':irq, 'name':irq_name, 'cpu':cpu, 'irq_ent_t':time} + irq_dic[cpu].append(irq_record) + +def handle_irq_handler_exit(event_info): + (name, context, cpu, time, pid, comm, irq, ret) = event_info + if cpu not in irq_dic.keys(): + return + irq_record = irq_dic[cpu].pop() + if irq != irq_record['irq']: + return + irq_record.update({'irq_ext_t':time}) + # if an irq doesn't include NET_RX softirq, drop. + if 'event_list' in irq_record.keys(): + irq_dic[cpu].append(irq_record) + +def handle_irq_softirq_raise(event_info): + (name, context, cpu, time, pid, comm, vec) = event_info + if cpu not in irq_dic.keys() \ + or len(irq_dic[cpu]) == 0: + return + irq_record = irq_dic[cpu].pop() + if 'event_list' in irq_record.keys(): + irq_event_list = irq_record['event_list'] + else: + irq_event_list = [] + irq_event_list.append({'time':time, 'event':'sirq_raise'}) + irq_record.update({'event_list':irq_event_list}) + irq_dic[cpu].append(irq_record) + +def handle_irq_softirq_entry(event_info): + (name, context, cpu, time, pid, comm, vec) = event_info + net_rx_dic[cpu] = {'sirq_ent_t':time, 'event_list':[]} + +def handle_irq_softirq_exit(event_info): + (name, context, cpu, time, pid, comm, vec) = event_info + irq_list = [] + event_list = 0 + if cpu in irq_dic.keys(): + irq_list = irq_dic[cpu] + del irq_dic[cpu] + if cpu in net_rx_dic.keys(): + sirq_ent_t = net_rx_dic[cpu]['sirq_ent_t'] + event_list = net_rx_dic[cpu]['event_list'] + del net_rx_dic[cpu] + if irq_list == [] or event_list == 0: + return + rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time, + 'irq_list':irq_list, 'event_list':event_list} + # merge information realted to a NET_RX softirq + receive_hunk_list.append(rec_data) + +def handle_napi_poll(event_info): + (name, context, cpu, time, pid, comm, napi, dev_name) = event_info + if cpu in net_rx_dic.keys(): + event_list = net_rx_dic[cpu]['event_list'] + rec_data = {'event_name':'napi_poll', + 'dev':dev_name, 'event_t':time} + event_list.append(rec_data) + +def handle_netif_rx(event_info): + (name, context, cpu, time, pid, comm, + skbaddr, skblen, dev_name) = event_info + if cpu not in irq_dic.keys() \ + or len(irq_dic[cpu]) == 0: + return + irq_record = irq_dic[cpu].pop() + if 'event_list' in irq_record.keys(): + irq_event_list = irq_record['event_list'] + else: + irq_event_list = [] + irq_event_list.append({'time':time, 'event':'netif_rx', + 'skbaddr':skbaddr, 'skblen':skblen, 'dev_name':dev_name}) + irq_record.update({'event_list':irq_event_list}) + irq_dic[cpu].append(irq_record) + +def handle_netif_receive_skb(event_info): + global of_count_rx_skb_list + + (name, context, cpu, time, pid, comm, + skbaddr, skblen, dev_name) = event_info + if cpu in net_rx_dic.keys(): + rec_data = {'event_name':'netif_receive_skb', + 'event_t':time, 'skbaddr':skbaddr, 'len':skblen} + event_list = net_rx_dic[cpu]['event_list'] + event_list.append(rec_data) + rx_skb_list.insert(0, rec_data) + if len(rx_skb_list) > buffer_budget: + rx_skb_list.pop() + of_count_rx_skb_list += 1 + +def handle_net_dev_queue(event_info): + global of_count_tx_queue_list + + (name, context, cpu, time, pid, comm, + skbaddr, skblen, dev_name) = event_info + skb = {'dev':dev_name, 'skbaddr':skbaddr, 'len':skblen, 'queue_t':time} + tx_queue_list.insert(0, skb) + if len(tx_queue_list) > buffer_budget: + tx_queue_list.pop() + of_count_tx_queue_list += 1 + +def handle_net_dev_xmit(event_info): + global of_count_tx_xmit_list + + (name, context, cpu, time, pid, comm, + skbaddr, skblen, rc, dev_name) = event_info + if rc == 0: # NETDEV_TX_OK + for i in range(len(tx_queue_list)): + skb = tx_queue_list[i] + if skb['skbaddr'] == skbaddr: + skb['xmit_t'] = time + tx_xmit_list.insert(0, skb) + del tx_queue_list[i] + if len(tx_xmit_list) > buffer_budget: + tx_xmit_list.pop() + of_count_tx_xmit_list += 1 + return + +def handle_kfree_skb(event_info): + (name, context, cpu, time, pid, comm, + skbaddr, protocol, location) = event_info + for i in range(len(tx_queue_list)): + skb = tx_queue_list[i] + if skb['skbaddr'] == skbaddr: + del tx_queue_list[i] + return + for i in range(len(tx_xmit_list)): + skb = tx_xmit_list[i] + if skb['skbaddr'] == skbaddr: + skb['free_t'] = time + tx_free_list.append(skb) + del tx_xmit_list[i] + return + for i in range(len(rx_skb_list)): + rec_data = rx_skb_list[i] + if rec_data['skbaddr'] == skbaddr: + rec_data.update({'handle':"kfree_skb", + 'comm':comm, 'pid':pid, 'comm_t':time}) + del rx_skb_list[i] + return + +def handle_consume_skb(event_info): + (name, context, cpu, time, pid, comm, skbaddr) = event_info + for i in range(len(tx_xmit_list)): + skb = tx_xmit_list[i] + if skb['skbaddr'] == skbaddr: + skb['free_t'] = time + tx_free_list.append(skb) + del tx_xmit_list[i] + return + +def handle_skb_copy_datagram_iovec(event_info): + (name, context, cpu, time, pid, comm, skbaddr, skblen) = event_info + for i in range(len(rx_skb_list)): + rec_data = rx_skb_list[i] + if skbaddr == rec_data['skbaddr']: + rec_data.update({'handle':"skb_copy_datagram_iovec", + 'comm':comm, 'pid':pid, 'comm_t':time}) + del rx_skb_list[i] + return diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 27e9ebe4076e..a7729797fd96 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -82,6 +82,8 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2 extern char *perf_pathdup(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +#ifdef NO_STRLCPY extern size_t strlcpy(char *dest, const char *src, size_t size); +#endif #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index f231f43424d2..e12d539417b2 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -28,6 +28,9 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event) #define chain_for_each_child(child, parent) \ list_for_each_entry(child, &parent->children, brothers) +#define chain_for_each_child_safe(child, next, parent) \ + list_for_each_entry_safe(child, next, &parent->children, brothers) + static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, enum chain_mode mode) @@ -86,10 +89,10 @@ __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, * sort them by hit */ static void -sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, +sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, u64 min_hit, struct callchain_param *param __used) { - __sort_chain_flat(rb_root, node, min_hit); + __sort_chain_flat(rb_root, &root->node, min_hit); } static void __sort_chain_graph_abs(struct callchain_node *node, @@ -108,11 +111,11 @@ static void __sort_chain_graph_abs(struct callchain_node *node, } static void -sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_node *chain_root, +sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root, u64 min_hit, struct callchain_param *param __used) { - __sort_chain_graph_abs(chain_root, min_hit); - rb_root->rb_node = chain_root->rb_root.rb_node; + __sort_chain_graph_abs(&chain_root->node, min_hit); + rb_root->rb_node = chain_root->node.rb_root.rb_node; } static void __sort_chain_graph_rel(struct callchain_node *node, @@ -133,11 +136,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node, } static void -sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, +sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root, u64 min_hit __used, struct callchain_param *param) { - __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); - rb_root->rb_node = chain_root->rb_root.rb_node; + __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0); + rb_root->rb_node = chain_root->node.rb_root.rb_node; } int register_callchain_param(struct callchain_param *param) @@ -284,19 +287,18 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain, } static int -__append_chain(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start, u64 period); +append_chain(struct callchain_node *root, struct resolved_chain *chain, + unsigned int start, u64 period); static void -__append_chain_children(struct callchain_node *root, - struct resolved_chain *chain, - unsigned int start, u64 period) +append_chain_children(struct callchain_node *root, struct resolved_chain *chain, + unsigned int start, u64 period) { struct callchain_node *rnode; /* lookup in childrens */ chain_for_each_child(rnode, root) { - unsigned int ret = __append_chain(rnode, chain, start, period); + unsigned int ret = append_chain(rnode, chain, start, period); if (!ret) goto inc_children_hit; @@ -309,8 +311,8 @@ inc_children_hit: } static int -__append_chain(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start, u64 period) +append_chain(struct callchain_node *root, struct resolved_chain *chain, + unsigned int start, u64 period) { struct callchain_list *cnode; unsigned int i = start; @@ -357,7 +359,7 @@ __append_chain(struct callchain_node *root, struct resolved_chain *chain, } /* We match the node and still have a part remaining */ - __append_chain_children(root, chain, i, period); + append_chain_children(root, chain, i, period); return 0; } @@ -380,8 +382,8 @@ static void filter_context(struct ip_callchain *old, struct resolved_chain *new, } -int append_chain(struct callchain_node *root, struct ip_callchain *chain, - struct map_symbol *syms, u64 period) +int callchain_append(struct callchain_root *root, struct ip_callchain *chain, + struct map_symbol *syms, u64 period) { struct resolved_chain *filtered; @@ -398,9 +400,65 @@ int append_chain(struct callchain_node *root, struct ip_callchain *chain, if (!filtered->nr) goto end; - __append_chain_children(root, filtered, 0, period); + append_chain_children(&root->node, filtered, 0, period); + + if (filtered->nr > root->max_depth) + root->max_depth = filtered->nr; end: free(filtered); return 0; } + +static int +merge_chain_branch(struct callchain_node *dst, struct callchain_node *src, + struct resolved_chain *chain) +{ + struct callchain_node *child, *next_child; + struct callchain_list *list, *next_list; + int old_pos = chain->nr; + int err = 0; + + list_for_each_entry_safe(list, next_list, &src->val, list) { + chain->ips[chain->nr].ip = list->ip; + chain->ips[chain->nr].ms = list->ms; + chain->nr++; + list_del(&list->list); + free(list); + } + + if (src->hit) + append_chain_children(dst, chain, 0, src->hit); + + chain_for_each_child_safe(child, next_child, src) { + err = merge_chain_branch(dst, child, chain); + if (err) + break; + + list_del(&child->brothers); + free(child); + } + + chain->nr = old_pos; + + return err; +} + +int callchain_merge(struct callchain_root *dst, struct callchain_root *src) +{ + struct resolved_chain *chain; + int err; + + chain = malloc(sizeof(*chain) + + src->max_depth * sizeof(struct resolved_ip)); + if (!chain) + return -ENOMEM; + + chain->nr = 0; + + err = merge_chain_branch(&dst->node, &src->node, chain); + + free(chain); + + return err; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 6de4313924fb..c15fb8c24ad2 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -26,9 +26,14 @@ struct callchain_node { u64 children_hit; }; +struct callchain_root { + u64 max_depth; + struct callchain_node node; +}; + struct callchain_param; -typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_node *, +typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *, u64, struct callchain_param *); struct callchain_param { @@ -44,15 +49,16 @@ struct callchain_list { struct list_head list; }; -static inline void callchain_init(struct callchain_node *node) +static inline void callchain_init(struct callchain_root *root) { - INIT_LIST_HEAD(&node->brothers); - INIT_LIST_HEAD(&node->children); - INIT_LIST_HEAD(&node->val); + INIT_LIST_HEAD(&root->node.brothers); + INIT_LIST_HEAD(&root->node.children); + INIT_LIST_HEAD(&root->node.val); - node->children_hit = 0; - node->parent = NULL; - node->hit = 0; + root->node.parent = NULL; + root->node.hit = 0; + root->node.children_hit = 0; + root->max_depth = 0; } static inline u64 cumul_hits(struct callchain_node *node) @@ -61,8 +67,9 @@ static inline u64 cumul_hits(struct callchain_node *node) } int register_callchain_param(struct callchain_param *param); -int append_chain(struct callchain_node *root, struct ip_callchain *chain, - struct map_symbol *syms, u64 period); +int callchain_append(struct callchain_root *root, struct ip_callchain *chain, + struct map_symbol *syms, u64 period); +int callchain_merge(struct callchain_root *dst, struct callchain_root *src); bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index be22ae6ef055..2022e8740994 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -87,7 +87,7 @@ static void hist_entry__add_cpumode_period(struct hist_entry *self, static struct hist_entry *hist_entry__new(struct hist_entry *template) { - size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_node) : 0; + size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; struct hist_entry *self = malloc(sizeof(*self) + callchain_size); if (self != NULL) { @@ -226,6 +226,8 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he) if (!cmp) { iter->period += he->period; + if (symbol_conf.use_callchain) + callchain_merge(iter->callchain, he->callchain); hist_entry__free(he); return false; } diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 58a470d036dd..bd7497711424 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -22,6 +22,7 @@ static const char *get_perf_dir(void) return "."; } +#ifdef NO_STRLCPY size_t strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); @@ -33,7 +34,7 @@ size_t strlcpy(char *dest, const char *src, size_t size) } return ret; } - +#endif static char *get_pathname(void) { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 46e531d09e8b..0b91053a7d11 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -70,7 +70,7 @@ struct hist_entry { struct hist_entry *pair; struct rb_root sorted_chain; }; - struct callchain_node callchain[0]; + struct callchain_root callchain[0]; }; enum sort_type { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1a367734e016..a08e1cbcbbbd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -388,6 +388,20 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp) return fprintf(fp, "%s", sbuild_id); } +size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp) +{ + size_t ret = 0; + struct rb_node *nd; + struct symbol_name_rb_node *pos; + + for (nd = rb_first(&self->symbol_names[type]); nd; nd = rb_next(nd)) { + pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); + fprintf(fp, "%s\n", pos->sym.name); + } + + return ret; +} + size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) { struct rb_node *nd; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index b7a8da4af5a0..0a2c460b6d81 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -181,6 +181,7 @@ size_t machines__fprintf_dsos(struct rb_root *self, FILE *fp); size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits); size_t dso__fprintf_buildid(struct dso *self, FILE *fp); +size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp); size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); enum dso_origin { diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c index 66f2d583d8c4..930c4acaf56a 100644 --- a/tools/perf/util/ui/browser.c +++ b/tools/perf/util/ui/browser.c @@ -1,16 +1,6 @@ -#define _GNU_SOURCE -#include <stdio.h> -#undef _GNU_SOURCE -/* - * slang versions <= 2.0.6 have a "#if HAVE_LONG_LONG" that breaks - * the build if it isn't defined. Use the equivalent one that glibc - * has on features.h. - */ -#include <features.h> -#ifndef HAVE_LONG_LONG -#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG -#endif #include <slang.h> +#include "libslang.h" +#include <linux/compiler.h> #include <linux/list.h> #include <linux/rbtree.h> #include <stdlib.h> @@ -19,17 +9,9 @@ #include "helpline.h" #include "../color.h" #include "../util.h" +#include <stdio.h> -#if SLANG_VERSION < 20104 -#define sltt_set_color(obj, name, fg, bg) \ - SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg) -#else -#define sltt_set_color SLtt_set_color -#endif - -newtComponent newt_form__new(void); - -int ui_browser__percent_color(double percent, bool current) +static int ui_browser__percent_color(double percent, bool current) { if (current) return HE_COLORSET_SELECTED; @@ -40,6 +22,23 @@ int ui_browser__percent_color(double percent, bool current) return HE_COLORSET_NORMAL; } +void ui_browser__set_color(struct ui_browser *self __used, int color) +{ + SLsmg_set_color(color); +} + +void ui_browser__set_percent_color(struct ui_browser *self, + double percent, bool current) +{ + int color = ui_browser__percent_color(percent, current); + ui_browser__set_color(self, color); +} + +void ui_browser__gotorc(struct ui_browser *self, int y, int x) +{ + SLsmg_gotorc(self->y + y, self->x + x); +} + void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence) { struct list_head *head = self->entries; @@ -111,7 +110,7 @@ unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self) nd = self->top; while (nd != NULL) { - SLsmg_gotorc(self->y + row, self->x); + ui_browser__gotorc(self, row, 0); self->write(self, nd, row); if (++row == self->height) break; @@ -146,10 +145,28 @@ void ui_browser__reset_index(struct ui_browser *self) self->seek(self, 0, SEEK_SET); } +void ui_browser__add_exit_key(struct ui_browser *self, int key) +{ + newtFormAddHotKey(self->form, key); +} + +void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]) +{ + int i = 0; + + while (keys[i] && i < 64) { + ui_browser__add_exit_key(self, keys[i]); + ++i; + } +} + int ui_browser__show(struct ui_browser *self, const char *title, const char *helpline, ...) { va_list ap; + int keys[] = { NEWT_KEY_UP, NEWT_KEY_DOWN, NEWT_KEY_PGUP, + NEWT_KEY_PGDN, NEWT_KEY_HOME, NEWT_KEY_END, ' ', + NEWT_KEY_LEFT, NEWT_KEY_ESCAPE, 'q', CTRL('c'), 0 }; if (self->form != NULL) { newtFormDestroy(self->form); @@ -157,7 +174,7 @@ int ui_browser__show(struct ui_browser *self, const char *title, } ui_browser__refresh_dimensions(self); newtCenteredWindow(self->width, self->height, title); - self->form = newt_form__new(); + self->form = newtForm(NULL, NULL, 0); if (self->form == NULL) return -1; @@ -167,13 +184,7 @@ int ui_browser__show(struct ui_browser *self, const char *title, if (self->sb == NULL) return -1; - newtFormAddHotKey(self->form, NEWT_KEY_UP); - newtFormAddHotKey(self->form, NEWT_KEY_DOWN); - newtFormAddHotKey(self->form, NEWT_KEY_PGUP); - newtFormAddHotKey(self->form, NEWT_KEY_PGDN); - newtFormAddHotKey(self->form, NEWT_KEY_HOME); - newtFormAddHotKey(self->form, NEWT_KEY_END); - newtFormAddHotKey(self->form, ' '); + ui_browser__add_exit_keys(self, keys); newtFormAddComponent(self->form, self->sb); va_start(ap, helpline); @@ -196,28 +207,28 @@ int ui_browser__refresh(struct ui_browser *self) newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); row = self->refresh(self); - SLsmg_set_color(HE_COLORSET_NORMAL); + ui_browser__set_color(self, HE_COLORSET_NORMAL); SLsmg_fill_region(self->y + row, self->x, self->height - row, self->width, ' '); return 0; } -int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es) +int ui_browser__run(struct ui_browser *self) { + struct newtExitStruct es; + if (ui_browser__refresh(self) < 0) return -1; while (1) { off_t offset; - newtFormRun(self->form, es); + newtFormRun(self->form, &es); - if (es->reason != NEWT_EXIT_HOTKEY) + if (es.reason != NEWT_EXIT_HOTKEY) break; - if (is_exit_key(es->u.key)) - return es->u.key; - switch (es->u.key) { + switch (es.u.key) { case NEWT_KEY_DOWN: if (self->index == self->nr_entries - 1) break; @@ -274,12 +285,12 @@ int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es) self->seek(self, -offset, SEEK_END); break; default: - return es->u.key; + return es.u.key; } if (ui_browser__refresh(self) < 0) return -1; } - return 0; + return -1; } unsigned int ui_browser__list_head_refresh(struct ui_browser *self) @@ -294,7 +305,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *self) pos = self->top; list_for_each_from(pos, head) { - SLsmg_gotorc(self->y + row, self->x); + ui_browser__gotorc(self, row, 0); self->write(self, pos, row); if (++row == self->height) break; diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h index 0b9f829214f7..0dc7e4da36f5 100644 --- a/tools/perf/util/ui/browser.h +++ b/tools/perf/util/ui/browser.h @@ -25,16 +25,21 @@ struct ui_browser { }; -int ui_browser__percent_color(double percent, bool current); +void ui_browser__set_color(struct ui_browser *self, int color); +void ui_browser__set_percent_color(struct ui_browser *self, + double percent, bool current); bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row); void ui_browser__refresh_dimensions(struct ui_browser *self); void ui_browser__reset_index(struct ui_browser *self); +void ui_browser__gotorc(struct ui_browser *self, int y, int x); +void ui_browser__add_exit_key(struct ui_browser *self, int key); +void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]); int ui_browser__show(struct ui_browser *self, const char *title, const char *helpline, ...); void ui_browser__hide(struct ui_browser *self); int ui_browser__refresh(struct ui_browser *self); -int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es); +int ui_browser__run(struct ui_browser *self); void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence); unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self); diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index a90273e63f4f..82b78f99251b 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -40,14 +40,12 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro if (ol->offset != -1) { struct objdump_line_rb_node *olrb = objdump_line__rb(ol); - int color = ui_browser__percent_color(olrb->percent, current_entry); - SLsmg_set_color(color); + ui_browser__set_percent_color(self, olrb->percent, current_entry); slsmg_printf(" %7.2f ", olrb->percent); if (!current_entry) - SLsmg_set_color(HE_COLORSET_CODE); + ui_browser__set_color(self, HE_COLORSET_CODE); } else { - int color = ui_browser__percent_color(0, current_entry); - SLsmg_set_color(color); + ui_browser__set_percent_color(self, 0, current_entry); slsmg_write_nstring(" ", 9); } @@ -135,32 +133,31 @@ static void annotate_browser__set_top(struct annotate_browser *self, self->curr_hot = nd; } -static int annotate_browser__run(struct annotate_browser *self, - struct newtExitStruct *es) +static int annotate_browser__run(struct annotate_browser *self) { struct rb_node *nd; struct hist_entry *he = self->b.priv; + int key; if (ui_browser__show(&self->b, he->ms.sym->name, - "<- or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) + "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) return -1; - - newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); - newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); + /* + * To allow builtin-annotate to cycle thru multiple symbols by + * examining the exit key for this function. + */ + ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT); nd = self->curr_hot; if (nd) { - newtFormAddHotKey(self->b.form, NEWT_KEY_TAB); - newtFormAddHotKey(self->b.form, NEWT_KEY_UNTAB); + int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 }; + ui_browser__add_exit_keys(&self->b, tabs); } while (1) { - ui_browser__run(&self->b, es); - - if (es->reason != NEWT_EXIT_HOTKEY) - break; + key = ui_browser__run(&self->b); - switch (es->u.key) { + switch (key) { case NEWT_KEY_TAB: nd = rb_prev(nd); if (nd == NULL) @@ -179,12 +176,11 @@ static int annotate_browser__run(struct annotate_browser *self, } out: ui_browser__hide(&self->b); - return es->u.key; + return key; } int hist_entry__tui_annotate(struct hist_entry *self) { - struct newtExitStruct es; struct objdump_line *pos, *n; struct objdump_line_rb_node *rbpos; LIST_HEAD(head); @@ -232,7 +228,7 @@ int hist_entry__tui_annotate(struct hist_entry *self) annotate_browser__set_top(&browser, browser.curr_hot); browser.b.width += 18; /* Percentage */ - ret = annotate_browser__run(&browser, &es); + ret = annotate_browser__run(&browser); list_for_each_entry_safe(pos, n, &head, node) { list_del(&pos->node); objdump_line__free(pos); diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index dafdf6775d77..2fc1ba3a4680 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -58,6 +58,11 @@ static char callchain_list__folded(const struct callchain_list *self) return map_symbol__folded(&self->ms); } +static void map_symbol__set_folding(struct map_symbol *self, bool unfold) +{ + self->unfolded = unfold ? self->has_children : false; +} + static int callchain_node__count_rows_rb_tree(struct callchain_node *self) { int n = 0; @@ -129,16 +134,16 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *se for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); struct callchain_list *chain; - int first = true; + bool first = true; list_for_each_entry(chain, &child->val, list) { if (first) { first = false; chain->ms.has_children = chain->list.next != &child->val || - rb_first(&child->rb_root) != NULL; + !RB_EMPTY_ROOT(&child->rb_root); } else chain->ms.has_children = chain->list.next == &child->val && - rb_first(&child->rb_root) != NULL; + !RB_EMPTY_ROOT(&child->rb_root); } callchain_node__init_have_children_rb_tree(child); @@ -150,7 +155,7 @@ static void callchain_node__init_have_children(struct callchain_node *self) struct callchain_list *chain; list_for_each_entry(chain, &self->val, list) - chain->ms.has_children = rb_first(&self->rb_root) != NULL; + chain->ms.has_children = !RB_EMPTY_ROOT(&self->rb_root); callchain_node__init_have_children_rb_tree(self); } @@ -168,6 +173,7 @@ static void callchain__init_have_children(struct rb_root *self) static void hist_entry__init_have_children(struct hist_entry *self) { if (!self->init_have_children) { + self->ms.has_children = !RB_EMPTY_ROOT(&self->sorted_chain); callchain__init_have_children(&self->sorted_chain); self->init_have_children = true; } @@ -195,9 +201,98 @@ static bool hist_browser__toggle_fold(struct hist_browser *self) return false; } -static int hist_browser__run(struct hist_browser *self, const char *title, - struct newtExitStruct *es) +static int callchain_node__set_folding_rb_tree(struct callchain_node *self, bool unfold) +{ + int n = 0; + struct rb_node *nd; + + for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { + struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); + struct callchain_list *chain; + bool has_children = false; + + list_for_each_entry(chain, &child->val, list) { + ++n; + map_symbol__set_folding(&chain->ms, unfold); + has_children = chain->ms.has_children; + } + + if (has_children) + n += callchain_node__set_folding_rb_tree(child, unfold); + } + + return n; +} + +static int callchain_node__set_folding(struct callchain_node *node, bool unfold) +{ + struct callchain_list *chain; + bool has_children = false; + int n = 0; + + list_for_each_entry(chain, &node->val, list) { + ++n; + map_symbol__set_folding(&chain->ms, unfold); + has_children = chain->ms.has_children; + } + + if (has_children) + n += callchain_node__set_folding_rb_tree(node, unfold); + + return n; +} + +static int callchain__set_folding(struct rb_root *chain, bool unfold) { + struct rb_node *nd; + int n = 0; + + for (nd = rb_first(chain); nd; nd = rb_next(nd)) { + struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); + n += callchain_node__set_folding(node, unfold); + } + + return n; +} + +static void hist_entry__set_folding(struct hist_entry *self, bool unfold) +{ + hist_entry__init_have_children(self); + map_symbol__set_folding(&self->ms, unfold); + + if (self->ms.has_children) { + int n = callchain__set_folding(&self->sorted_chain, unfold); + self->nr_rows = unfold ? n : 0; + } else + self->nr_rows = 0; +} + +static void hists__set_folding(struct hists *self, bool unfold) +{ + struct rb_node *nd; + + self->nr_entries = 0; + + for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); + hist_entry__set_folding(he, unfold); + self->nr_entries += 1 + he->nr_rows; + } +} + +static void hist_browser__set_folding(struct hist_browser *self, bool unfold) +{ + hists__set_folding(self->hists, unfold); + self->b.nr_entries = self->hists->nr_entries; + /* Go to the start, we may be way after valid entries after a collapse */ + ui_browser__reset_index(&self->b); +} + +static int hist_browser__run(struct hist_browser *self, const char *title) +{ + int key; + int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't', + NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, }; char str[256], unit; unsigned long nr_events = self->hists->stats.nr_events[PERF_RECORD_SAMPLE]; @@ -215,23 +310,12 @@ static int hist_browser__run(struct hist_browser *self, const char *title, "Press '?' for help on key bindings") < 0) return -1; - newtFormAddHotKey(self->b.form, 'a'); - newtFormAddHotKey(self->b.form, '?'); - newtFormAddHotKey(self->b.form, 'h'); - newtFormAddHotKey(self->b.form, 'd'); - newtFormAddHotKey(self->b.form, 'D'); - newtFormAddHotKey(self->b.form, 't'); - - newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); - newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); - newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER); + ui_browser__add_exit_keys(&self->b, exit_keys); while (1) { - ui_browser__run(&self->b, es); + key = ui_browser__run(&self->b); - if (es->reason != NEWT_EXIT_HOTKEY) - break; - switch (es->u.key) { + switch (key) { case 'D': { /* Debug */ static int seq; struct hist_entry *h = rb_entry(self->b.top, @@ -245,18 +329,26 @@ static int hist_browser__run(struct hist_browser *self, const char *title, self->b.top_idx, h->row_offset, h->nr_rows); } - continue; + break; + case 'C': + /* Collapse the whole world. */ + hist_browser__set_folding(self, false); + break; + case 'E': + /* Expand the whole world. */ + hist_browser__set_folding(self, true); + break; case NEWT_KEY_ENTER: if (hist_browser__toggle_fold(self)) break; /* fall thru */ default: - return 0; + goto out; } } - +out: ui_browser__hide(&self->b); - return 0; + return key; } static char *callchain_list__sym_name(struct callchain_list *self, @@ -306,15 +398,10 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self, int color; bool was_first = first; - if (first) { + if (first) first = false; - chain->ms.has_children = chain->list.next != &child->val || - rb_first(&child->rb_root) != NULL; - } else { + else extra_offset = LEVEL_OFFSET_STEP; - chain->ms.has_children = chain->list.next == &child->val && - rb_first(&child->rb_root) != NULL; - } folded_sign = callchain_list__folded(chain); if (*row_offset != 0) { @@ -341,8 +428,8 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self, *is_current_entry = true; } - SLsmg_set_color(color); - SLsmg_gotorc(self->b.y + row, self->b.x); + ui_browser__set_color(&self->b, color); + ui_browser__gotorc(&self->b, row, 0); slsmg_write_nstring(" ", offset + extra_offset); slsmg_printf("%c ", folded_sign); slsmg_write_nstring(str, width); @@ -384,12 +471,7 @@ static int hist_browser__show_callchain_node(struct hist_browser *self, list_for_each_entry(chain, &node->val, list) { char ipstr[BITS_PER_LONG / 4 + 1], *s; int color; - /* - * FIXME: This should be moved to somewhere else, - * probably when the callchain is created, so as not to - * traverse it all over again - */ - chain->ms.has_children = rb_first(&node->rb_root) != NULL; + folded_sign = callchain_list__folded(chain); if (*row_offset != 0) { @@ -405,8 +487,8 @@ static int hist_browser__show_callchain_node(struct hist_browser *self, } s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); - SLsmg_gotorc(self->b.y + row, self->b.x); - SLsmg_set_color(color); + ui_browser__gotorc(&self->b, row, 0); + ui_browser__set_color(&self->b, color); slsmg_write_nstring(" ", offset); slsmg_printf("%c ", folded_sign); slsmg_write_nstring(s, width - 2); @@ -465,7 +547,7 @@ static int hist_browser__show_entry(struct hist_browser *self, } if (symbol_conf.use_callchain) { - entry->ms.has_children = !RB_EMPTY_ROOT(&entry->sorted_chain); + hist_entry__init_have_children(entry); folded_sign = hist_entry__folded(entry); } @@ -484,8 +566,8 @@ static int hist_browser__show_entry(struct hist_browser *self, color = HE_COLORSET_NORMAL; } - SLsmg_set_color(color); - SLsmg_gotorc(self->b.y + row, self->b.x); + ui_browser__set_color(&self->b, color); + ui_browser__gotorc(&self->b, row, 0); if (symbol_conf.use_callchain) { slsmg_printf("%c ", folded_sign); width -= 2; @@ -687,8 +769,6 @@ static struct hist_browser *hist_browser__new(struct hists *hists) static void hist_browser__delete(struct hist_browser *self) { - newtFormDestroy(self->b.form); - newtPopWindow(); free(self); } @@ -725,7 +805,6 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name) struct pstack *fstack; const struct thread *thread_filter = NULL; const struct dso *dso_filter = NULL; - struct newtExitStruct es; char msg[160]; int key = -1; @@ -749,70 +828,63 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name) annotate = -2, zoom_dso = -2, zoom_thread = -2, browse_map = -2; - if (hist_browser__run(browser, msg, &es)) - break; + key = hist_browser__run(browser, msg); thread = hist_browser__selected_thread(browser); dso = browser->selection->map ? browser->selection->map->dso : NULL; - if (es.reason == NEWT_EXIT_HOTKEY) { - key = es.u.key; - - switch (key) { - case NEWT_KEY_F1: - goto do_help; - case NEWT_KEY_TAB: - case NEWT_KEY_UNTAB: - /* - * Exit the browser, let hists__browser_tree - * go to the next or previous - */ - goto out_free_stack; - default:; - } - - switch (key) { - case 'a': - if (browser->selection->map == NULL && - browser->selection->map->dso->annotate_warned) - continue; - goto do_annotate; - case 'd': - goto zoom_dso; - case 't': - goto zoom_thread; - case 'h': - case '?': -do_help: - ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n" - "<- Zoom out\n" - "a Annotate current symbol\n" - "h/?/F1 Show this window\n" - "d Zoom into current DSO\n" - "t Zoom into current Thread\n" - "q/CTRL+C Exit browser"); + switch (key) { + case NEWT_KEY_TAB: + case NEWT_KEY_UNTAB: + /* + * Exit the browser, let hists__browser_tree + * go to the next or previous + */ + goto out_free_stack; + case 'a': + if (browser->selection->map == NULL && + browser->selection->map->dso->annotate_warned) continue; - default:; - } - if (is_exit_key(key)) { - if (key == NEWT_KEY_ESCAPE && - !ui__dialog_yesno("Do you really want to exit?")) - continue; - break; - } - - if (es.u.key == NEWT_KEY_LEFT) { - const void *top; + goto do_annotate; + case 'd': + goto zoom_dso; + case 't': + goto zoom_thread; + case NEWT_KEY_F1: + case 'h': + case '?': + ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n" + "<- Zoom out\n" + "a Annotate current symbol\n" + "h/?/F1 Show this window\n" + "C Collapse all callchains\n" + "E Expand all callchains\n" + "d Zoom into current DSO\n" + "t Zoom into current Thread\n" + "q/CTRL+C Exit browser"); + continue; + case NEWT_KEY_ENTER: + case NEWT_KEY_RIGHT: + /* menu */ + break; + case NEWT_KEY_LEFT: { + const void *top; - if (pstack__empty(fstack)) - continue; - top = pstack__pop(fstack); - if (top == &dso_filter) - goto zoom_out_dso; - if (top == &thread_filter) - goto zoom_out_thread; + if (pstack__empty(fstack)) continue; - } + top = pstack__pop(fstack); + if (top == &dso_filter) + goto zoom_out_dso; + if (top == &thread_filter) + goto zoom_out_thread; + continue; + } + case NEWT_KEY_ESCAPE: + if (!ui__dialog_yesno("Do you really want to exit?")) + continue; + /* Fall thru */ + default: + goto out_free_stack; } if (browser->selection->sym != NULL && @@ -925,10 +997,6 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help) const char *ev_name = __event_name(hists->type, hists->config); key = hists__browse(hists, help, ev_name); - - if (is_exit_key(key)) - break; - switch (key) { case NEWT_KEY_TAB: next = rb_next(nd); @@ -940,7 +1008,7 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help) continue; nd = rb_prev(nd); default: - break; + return key; } } diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c index 142b825b42bf..1bf09796cb31 100644 --- a/tools/perf/util/ui/browsers/map.c +++ b/tools/perf/util/ui/browsers/map.c @@ -1,6 +1,5 @@ #include "../libslang.h" #include <elf.h> -#include <newt.h> #include <sys/ttydefaults.h> #include <ctype.h> #include <string.h> @@ -56,9 +55,8 @@ static void map_browser__write(struct ui_browser *self, void *nd, int row) struct symbol *sym = rb_entry(nd, struct symbol, rb_node); struct map_browser *mb = container_of(self, struct map_browser, b); bool current_entry = ui_browser__is_current_entry(self, row); - int color = ui_browser__percent_color(0, current_entry); - SLsmg_set_color(color); + ui_browser__set_percent_color(self, 0, current_entry); slsmg_printf("%*llx %*llx %c ", mb->addrlen, sym->start, mb->addrlen, sym->end, sym->binding == STB_GLOBAL ? 'g' : @@ -98,31 +96,29 @@ static int map_browser__search(struct map_browser *self) return 0; } -static int map_browser__run(struct map_browser *self, struct newtExitStruct *es) +static int map_browser__run(struct map_browser *self) { + int key; + if (ui_browser__show(&self->b, self->map->dso->long_name, "Press <- or ESC to exit, %s / to search", verbose ? "" : "restart with -v to use") < 0) return -1; - newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); - newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER); if (verbose) - newtFormAddHotKey(self->b.form, '/'); + ui_browser__add_exit_key(&self->b, '/'); while (1) { - ui_browser__run(&self->b, es); + key = ui_browser__run(&self->b); - if (es->reason != NEWT_EXIT_HOTKEY) - break; - if (verbose && es->u.key == '/') + if (verbose && key == '/') map_browser__search(self); else break; } ui_browser__hide(&self->b); - return 0; + return key; } int map__browse(struct map *self) @@ -136,7 +132,6 @@ int map__browse(struct map *self) }, .map = self, }; - struct newtExitStruct es; struct rb_node *nd; char tmp[BITS_PER_LONG / 4]; u64 maxaddr = 0; @@ -157,5 +152,5 @@ int map__browse(struct map *self) mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr); mb.b.width += mb.addrlen * 2 + 4 + mb.namelen; - return map_browser__run(&mb, &es); + return map_browser__run(&mb); } diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c index 04600e26ceea..9706d9d40279 100644 --- a/tools/perf/util/ui/util.c +++ b/tools/perf/util/ui/util.c @@ -11,8 +11,6 @@ #include "helpline.h" #include "util.h" -newtComponent newt_form__new(void); - static void newt_form__set_exit_keys(newtComponent self) { newtFormAddHotKey(self, NEWT_KEY_LEFT); @@ -22,7 +20,7 @@ static void newt_form__set_exit_keys(newtComponent self) newtFormAddHotKey(self, CTRL('c')); } -newtComponent newt_form__new(void) +static newtComponent newt_form__new(void) { newtComponent self = newtForm(NULL, NULL, 0); if (self) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index f380fed74359..7562707ddd1c 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -266,19 +266,6 @@ bool strglobmatch(const char *str, const char *pat); bool strlazymatch(const char *str, const char *pat); unsigned long convert_unit(unsigned long value, char *unit); -#ifndef ESC -#define ESC 27 -#endif - -static inline bool is_exit_key(int key) -{ - char up; - if (key == CTRL('c') || key == ESC) - return true; - up = toupper(key); - return up == 'Q'; -} - #define _STR(x) #x #define STR(x) _STR(x) |