From 112c554702cf1ea384ef71a116e3a2c10aeed116 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 5 Feb 2024 06:58:19 -0800 Subject: perf script: Print source line for each jump in brstackinsn With the srcline option, the perf script only prints a source line at the beginning of a sample with call/ret from functions, but not for each jump in brstackinsn. It's useful to print a source line for each jump in brstackinsn when the end user analyze the full assembler sequences of branch sequences for the sample. The srccode option can also be used to locate the source code line. However, it's printed almost for every line and makes the output less readable. $perf script -F +brstackinsn,+srcline --xed Before the patch, tchain_edit_deb 1463275 15228549.107820: 282495 instructions:u: 401133 f3+0xd (/home/kan/os.li> tchain_edit.c:22 f3+40: tchain_edit.c:20 000000000040114e jle 0x401133 # PRED 6 cycles [6] 0000000000401133 movl -0x4(%rbp), %eax 0000000000401136 and $0x1, %eax 0000000000401139 test %eax, %eax 000000000040113b jz 0x401143 000000000040113d addl $0x1, -0x4(%rbp) 0000000000401141 jmp 0x401147 # PRED 3 cycles [9] 2.00 IPC 0000000000401147 cmpl $0x3e7, -0x4(%rbp) 000000000040114e jle 0x401133 # PRED 6 cycles [15] 0.33 IPC After the patch, tchain_edit_deb 1463275 15228549.107820: 282495 instructions:u: 401133 f3+0xd (/home/kan/os.li> tchain_edit.c:22 f3+40: tchain_edit.c:20 000000000040114e jle 0x401133 srcline: tchain_edit.c:20 # PRED 6 cycles [6] 0000000000401133 movl -0x4(%rbp), %eax 0000000000401136 and $0x1, %eax 0000000000401139 test %eax, %eax 000000000040113b jz 0x401143 000000000040113d addl $0x1, -0x4(%rbp) 0000000000401141 jmp 0x401147 srcline: tchain_edit.c:23 # PRED 3 cycles [9] 2.00 IPC 0000000000401147 cmpl $0x3e7, -0x4(%rbp) 000000000040114e jle 0x401133 srcline: tchain_edit.c:20 # PRED 6 cycles [15] 0.33 IPC Signed-off-by: Kan Liang Reviewed-by: Ian Rogers Cc: ahmad.yasin@intel.com Cc: amiri.khalil@intel.com Cc: ak@linux.intel.com Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240205145819.1943114-1-kan.liang@linux.intel.com --- tools/perf/builtin-script.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b1f57401ff23..af63b7c37c8a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1162,7 +1162,8 @@ out: static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, struct perf_insn *x, u8 *inbuf, int len, int insn, FILE *fp, int *total_cycles, - struct perf_event_attr *attr) + struct perf_event_attr *attr, + struct thread *thread) { int ilen = 0; int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, @@ -1171,6 +1172,16 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "ilen: %d\t", ilen); + if (PRINT_FIELD(SRCLINE)) { + struct addr_location al; + + addr_location__init(&al); + thread__find_map(thread, x->cpumode, ip, &al); + printed += map__fprintf_srcline(al.map, al.addr, " srcline: ", fp); + printed += fprintf(fp, "\t"); + addr_location__exit(&al); + } + printed += fprintf(fp, "#%s%s%s%s", en->flags.predicted ? " PRED" : "", en->flags.mispred ? " MISPRED" : "", @@ -1182,6 +1193,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, if (insn) printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles); } + return printed + fprintf(fp, "\n"); } @@ -1260,7 +1272,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, x.cpumode, x.cpu, &lastsym, attr, fp); printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], &x, buffer, len, 0, fp, &total_cycles, - attr); + attr, thread); if (PRINT_FIELD(SRCCODE)) printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); } @@ -1291,7 +1303,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, - &total_cycles, attr); + &total_cycles, attr, thread); if (PRINT_FIELD(SRCCODE)) printed += print_srccode(thread, x.cpumode, ip); break; -- cgit v1.2.3-59-g8ed1b From 0bdfbd04c67e0578f304aef10a0b3b5cff392022 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 8 Feb 2024 10:53:25 +0200 Subject: perf tools: Make it possible to see perf's kernel and module memory mappings Dump kmaps if using 'perf --debug kmaps' or verbose > 2 (e.g. -vvv) for tools 'perf script' and 'perf report' if there is no browser. Example: $ perf --debug kmaps script 2>&1 >/dev/null | grep kvm.intel build id event received for /lib/modules/6.7.2-local/kernel/arch/x86/kvm/kvm-intel.ko: 0691d75e10e72ebbbd45a44c59f6d00a5604badf [20] Map: 0-3a3 4f5d8 [kvm_intel].modinfo Map: 0-5240 5f280 [kvm_intel]__versions Map: 0-30 64 [kvm_intel].note.Linux Map: 0-14 644c0 [kvm_intel].orc_header Map: 0-5297 43680 [kvm_intel].rodata Map: 0-5bee 3b837 [kvm_intel].text.unlikely Map: 0-7e0 41430 [kvm_intel].noinstr.text Map: 0-2080 713c0 [kvm_intel].bss Map: 0-26 705c8 [kvm_intel].data..read_mostly Map: 0-5888 6a4c0 [kvm_intel].data Map: 0-22 70220 [kvm_intel].data.once Map: 0-40 705f0 [kvm_intel].data..percpu Map: 0-1685 41d20 [kvm_intel].init.text Map: 0-4b8 6fd60 [kvm_intel].init.data Map: 0-380 70248 [kvm_intel]__dyndbg Map: 0-8 70218 [kvm_intel].exit.data Map: 0-438 4f980 [kvm_intel]__param Map: 0-5f5 4ca0f [kvm_intel].rodata.str1.1 Map: 0-3657 493b8 [kvm_intel].rodata.str1.8 Map: 0-e0 70640 [kvm_intel].data..ro_after_init Map: 0-500 70ec0 [kvm_intel].gnu.linkonce.this_module Map: ffffffffc13a7000-ffffffffc1421000 a0 /lib/modules/6.7.2-local/kernel/arch/x86/kvm/kvm-intel.ko The example above shows how the module section mappings are all wrong except for the main .text mapping at 0xffffffffc13a7000. Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Like Xu Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240208085326.13432-2-adrian.hunter@intel.com --- tools/perf/Documentation/perf.txt | 2 ++ tools/perf/builtin-report.c | 2 ++ tools/perf/builtin-script.c | 3 +++ tools/perf/util/debug.c | 3 +++ tools/perf/util/debug.h | 1 + tools/perf/util/python.c | 1 + tools/perf/util/session.c | 11 +++++++++++ tools/perf/util/session.h | 2 ++ 8 files changed, 25 insertions(+) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index a7cf7bc2f968..09f516f3fdfb 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -63,6 +63,8 @@ OPTIONS in browser mode perf-event-open - Print perf_event_open() arguments and return value + kmaps - Print kernel and module maps (perf script + and perf report without browser) --debug-file:: Write debug output to a specified file. diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ccb91fe6b876..8e16fa261e6f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1766,6 +1766,8 @@ repeat: } else ret = 0; + if (!use_browser && (verbose > 2 || debug_kmaps)) + perf_session__dump_kmaps(session); error: if (report.ptime_range) { itrace_synth_opts__clear_time_range(&itrace_synth_opts); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index af63b7c37c8a..24baa8284add 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -4378,6 +4378,9 @@ script_found: flush_scripting(); + if (verbose > 2 || debug_kmaps) + perf_session__dump_kmaps(session); + out_delete: if (script.ptime_range) { itrace_synth_opts__clear_time_range(&itrace_synth_opts); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index e282b4ceb4d2..c39ee0fcb8cf 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -33,6 +33,7 @@ #endif int verbose; +int debug_kmaps; int debug_peo_args; bool dump_trace = false, quiet = false; int debug_ordered_events; @@ -229,6 +230,7 @@ static struct sublevel_option debug_opts[] = { { .name = "stderr", .value_ptr = &redirect_to_stderr}, { .name = "data-convert", .value_ptr = &debug_data_convert }, { .name = "perf-event-open", .value_ptr = &debug_peo_args }, + { .name = "kmaps", .value_ptr = &debug_kmaps }, { .name = NULL, } }; @@ -267,6 +269,7 @@ int perf_quiet_option(void) /* For debug variables that are used as bool types, set to 0. */ redirect_to_stderr = 0; debug_peo_args = 0; + debug_kmaps = 0; return 0; } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index de8870980d44..35a7a5ae762e 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -9,6 +9,7 @@ #include extern int verbose; +extern int debug_kmaps; extern int debug_peo_args; extern bool quiet, dump_trace; extern int debug_ordered_events; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8761f51b5c7c..075c0f79b1b9 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -181,6 +181,7 @@ int perf_bpf_filter__destroy(struct evsel *evsel __maybe_unused) * implementing 'verbose' and 'eprintf'. */ int verbose; +int debug_kmaps; int debug_peo_args; int eprintf(int level, int var, const char *fmt, ...); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 199d3e8df315..06d0bd7fb459 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2720,6 +2720,17 @@ size_t perf_session__fprintf(struct perf_session *session, FILE *fp) return machine__fprintf(&session->machines.host, fp); } +void perf_session__dump_kmaps(struct perf_session *session) +{ + int save_verbose = verbose; + + fflush(stdout); + fprintf(stderr, "Kernel and module maps:\n"); + verbose = 0; /* Suppress verbose to print a summary only */ + maps__fprintf(machine__kernel_maps(&session->machines.host), stderr); + verbose = save_verbose; +} + struct evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type) { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index ee3715e8563b..5064c6ec11e7 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -133,6 +133,8 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, bool skip_empty); +void perf_session__dump_kmaps(struct perf_session *session); + struct evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); -- cgit v1.2.3-59-g8ed1b From 8f0ec15ff66243896ff3e534696c6af7ff013901 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 17 Feb 2024 15:40:43 +0800 Subject: perf: util: use capstone disasm engine to show assembly instructions Currently, the instructions of samples are shown as raw hex strings which are hard to read. x86 has a special option '--xed' to disassemble the hex string via intel XED tool. Here we use capstone as our disassembler engine to give more friendly instructions. We select libcapstone because capstone can provide more insn details. Perf will fallback to raw instructions if libcapstone is not available. The advantages compared to XED tool: * Support arm, arm64, x86-32, x86_64 (more could be supported), xed only for x86_64. * Immediate address operands are shown as symbol+offs. Signed-off-by: Changbin Du Reviewed-by: Adrian Hunter Cc: changbin.du@gmail.com Cc: Thomas Richter Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240217074046.4100789-3-changbin.du@huawei.com --- tools/perf/builtin-script.c | 8 +-- tools/perf/util/Build | 1 + tools/perf/util/print_insn.c | 135 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/print_insn.h | 16 +++++ tools/perf/util/thread.h | 1 - 5 files changed, 155 insertions(+), 6 deletions(-) create mode 100644 tools/perf/util/print_insn.c create mode 100644 tools/perf/util/print_insn.h (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 24baa8284add..0a57c518640c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -34,6 +34,7 @@ #include "util/event.h" #include "ui/ui.h" #include "print_binary.h" +#include "print_insn.h" #include "archinsn.h" #include #include @@ -1523,11 +1524,8 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, if (PRINT_FIELD(INSNLEN)) printed += fprintf(fp, " ilen: %d", sample->insn_len); if (PRINT_FIELD(INSN) && sample->insn_len) { - int i; - - printed += fprintf(fp, " insn:"); - for (i = 0; i < sample->insn_len; i++) - printed += fprintf(fp, " %02x", (unsigned char)sample->insn[i]); + printed += fprintf(fp, " insn: "); + printed += sample__fprintf_insn_raw(sample, fp); } if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8027f450fa3e..2cbeeb79b6ef 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -32,6 +32,7 @@ perf-y += perf_regs.o perf-y += perf-regs-arch/ perf-y += path.o perf-y += print_binary.o +perf-y += print_insn.o perf-y += rlimit.o perf-y += argv_split.o perf-y += rbtree.o diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c new file mode 100644 index 000000000000..459e0e93d7b1 --- /dev/null +++ b/tools/perf/util/print_insn.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Instruction binary disassembler based on capstone. + * + * Author(s): Changbin Du + */ +#include +#include +#include "debug.h" +#include "sample.h" +#include "symbol.h" +#include "machine.h" +#include "thread.h" +#include "print_insn.h" + +size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < sample->insn_len; i++) { + printed += fprintf(fp, "%02x", (unsigned char)sample->insn[i]); + if (sample->insn_len - i > 1) + printed += fprintf(fp, " "); + } + return printed; +} + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +#include + +static int capstone_init(struct machine *machine, csh *cs_handle) +{ + cs_arch arch; + cs_mode mode; + + if (machine__is(machine, "x86_64")) { + arch = CS_ARCH_X86; + mode = CS_MODE_64; + } else if (machine__normalized_is(machine, "x86")) { + arch = CS_ARCH_X86; + mode = CS_MODE_32; + } else if (machine__normalized_is(machine, "arm64")) { + arch = CS_ARCH_ARM64; + mode = CS_MODE_ARM; + } else if (machine__normalized_is(machine, "arm")) { + arch = CS_ARCH_ARM; + mode = CS_MODE_ARM + CS_MODE_V8; + } else if (machine__normalized_is(machine, "s390")) { + arch = CS_ARCH_SYSZ; + mode = CS_MODE_BIG_ENDIAN; + } else { + return -1; + } + + if (cs_open(arch, mode, cs_handle) != CS_ERR_OK) { + pr_warning_once("cs_open failed\n"); + return -1; + } + + if (machine__normalized_is(machine, "x86")) { + cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); + /* + * Resolving address operands to symbols is implemented + * on x86 by investigating instruction details. + */ + cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON); + } + + return 0; +} + +static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, + cs_insn *insn, FILE *fp) +{ + struct addr_location al; + size_t printed = 0; + + if (insn->detail && insn->detail->x86.op_count == 1) { + cs_x86_op *op = &insn->detail->x86.operands[0]; + + addr_location__init(&al); + if (op->type == X86_OP_IMM && + thread__find_symbol(thread, sample->cpumode, op->imm, &al)) { + printed += fprintf(fp, "%s ", insn[0].mnemonic); + printed += symbol__fprintf_symname_offs(al.sym, &al, fp); + addr_location__exit(&al); + return printed; + } + addr_location__exit(&al); + } + + printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); + return printed; +} + +size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, + struct machine *machine, FILE *fp) +{ + csh cs_handle; + cs_insn *insn; + size_t count; + size_t printed = 0; + int ret; + + /* TODO: Try to initiate capstone only once but need a proper place. */ + ret = capstone_init(machine, &cs_handle); + if (ret < 0) { + /* fallback */ + return sample__fprintf_insn_raw(sample, fp); + } + + count = cs_disasm(cs_handle, (uint8_t *)sample->insn, sample->insn_len, + sample->ip, 1, &insn); + if (count > 0) { + if (machine__normalized_is(machine, "x86")) + printed += print_insn_x86(sample, thread, &insn[0], fp); + else + printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); + cs_free(insn, count); + } else { + printed += fprintf(fp, "illegal instruction"); + } + + cs_close(&cs_handle); + return printed; +} +#else +size_t sample__fprintf_insn_asm(struct perf_sample *sample __maybe_unused, + struct thread *thread __maybe_unused, + struct machine *machine __maybe_unused, + FILE *fp __maybe_unused) +{ + return 0; +} +#endif diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h new file mode 100644 index 000000000000..465bdcfcc2fd --- /dev/null +++ b/tools/perf/util/print_insn.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef PERF_PRINT_INSN_H +#define PERF_PRINT_INSN_H + +#include +#include + +struct perf_sample; +struct thread; +struct machine; + +size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, + struct machine *machine, FILE *fp); +size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); + +#endif /* PERF_PRINT_INSN_H */ diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 0df775b5c110..df344262eaee 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -13,7 +13,6 @@ #include #include #include "rwsem.h" -#include "event.h" #include "callchain.h" #include -- cgit v1.2.3-59-g8ed1b From 9941723438eec4c2388f588e1d4fd98f4a49ab01 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 17 Feb 2024 15:40:44 +0800 Subject: perf: script: add field 'disasm' to display mnemonic instructions In addition to the 'insn' field, this adds a new field 'disasm' to display mnemonic instructions instead of the raw code. $ sudo perf script -F +disasm perf-exec 1443864 [006] 2275506.209848: psb: psb offs: 0 0 [unknown] ([unknown]) perf-exec 1443864 [006] 2275506.209848: cbr: cbr: 41 freq: 4100 MHz (114%) 0 [unknown] ([unknown]) ls 1443864 [006] 2275506.209905: 1 branches:uH: 7f216b426100 _start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) movq %rsp, %rdi ls 1443864 [006] 2275506.209908: 1 branches:uH: 7f216b426103 _start+0x3 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) callq _dl_start+0x0 Signed-off-by: Changbin Du Reviewed-by: Adrian Hunter Cc: changbin.du@gmail.com Cc: Thomas Richter Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240217074046.4100789-4-changbin.du@huawei.com --- tools/perf/Documentation/perf-script.txt | 13 +++++++------ tools/perf/builtin-script.c | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index ff9a52e44688..578fa59f51a5 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -132,9 +132,10 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, - brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth, - phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat, - machine_pid, vcpu, cgroup, retire_lat. + brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm, + insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, + code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat. + Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -217,9 +218,9 @@ OPTIONS Instruction Trace decoding. For calls and returns, it will display the name of the symbol indented with spaces to reflect the stack depth. - When doing instruction trace decoding insn and insnlen give the - instruction bytes and the instruction length of the current - instruction. + When doing instruction trace decoding, insn, disasm and insnlen give the + instruction bytes, disassembled instructions (requires libcapstone support) + and the instruction length of the current instruction respectively. The synth field is used by synthesized events which may be created when Instruction Trace decoding. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0a57c518640c..ba4cfe040bd6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -135,6 +135,7 @@ enum perf_output_field { PERF_OUTPUT_CGROUP = 1ULL << 39, PERF_OUTPUT_RETIRE_LAT = 1ULL << 40, PERF_OUTPUT_DSOFF = 1ULL << 41, + PERF_OUTPUT_DISASM = 1ULL << 42, }; struct perf_script { @@ -190,6 +191,7 @@ struct output_option { {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, {.str = "insn", .field = PERF_OUTPUT_INSN}, + {.str = "disasm", .field = PERF_OUTPUT_DISASM}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, @@ -1527,6 +1529,10 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, printed += fprintf(fp, " insn: "); printed += sample__fprintf_insn_raw(sample, fp); } + if (PRINT_FIELD(DISASM) && sample->insn_len) { + printed += fprintf(fp, "\t\t"); + printed += sample__fprintf_insn_asm(sample, thread, machine, fp); + } if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); @@ -3118,6 +3124,13 @@ parse: rc = -EINVAL; goto out; } +#ifndef HAVE_LIBCAPSTONE_SUPPORT + if (change != REMOVE && strcmp(tok, "disasm") == 0) { + fprintf(stderr, "Field \"disasm\" requires perf to be built with libcapstone support.\n"); + rc = -EINVAL; + goto out; + } +#endif if (type == -1) { /* add user option to all events types for @@ -3912,7 +3925,7 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," - "brstackinsnlen,brstackoff,callindent,insn,insnlen,synth," + "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth," "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat", parse_output_fields), -- cgit v1.2.3-59-g8ed1b From 6750ba4b6442fa5ea4bf5c0e4b4ff8b0249ef71d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 17 Feb 2024 15:40:45 +0800 Subject: perf: script: add raw|disasm arguments to --insn-trace option Now '--insn-trace' accept a argument to specify the output format: - raw: display raw instructions. - disasm: display mnemonic instructions (if capstone is installed). $ sudo perf script --insn-trace=raw ls 1443864 [006] 2275506.209908875: 7f216b426100 _start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) insn: 48 89 e7 ls 1443864 [006] 2275506.209908875: 7f216b426103 _start+0x3 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) insn: e8 e8 0c 00 00 ls 1443864 [006] 2275506.209908875: 7f216b426df0 _dl_start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) insn: f3 0f 1e fa $ sudo perf script --insn-trace=disasm ls 1443864 [006] 2275506.209908875: 7f216b426100 _start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) movq %rsp, %rdi ls 1443864 [006] 2275506.209908875: 7f216b426103 _start+0x3 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) callq _dl_start+0x0 ls 1443864 [006] 2275506.209908875: 7f216b426df0 _dl_start+0x0 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) illegal instruction ls 1443864 [006] 2275506.209908875: 7f216b426df4 _dl_start+0x4 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) pushq %rbp ls 1443864 [006] 2275506.209908875: 7f216b426df5 _dl_start+0x5 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) movq %rsp, %rbp ls 1443864 [006] 2275506.209908875: 7f216b426df8 _dl_start+0x8 (/usr/lib/x86_64-linux-gnu/ld-2.31.so) pushq %r15 Signed-off-by: Changbin Du Reviewed-by: Adrian Hunter Cc: changbin.du@gmail.com Cc: Thomas Richter Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240217074046.4100789-5-changbin.du@huawei.com --- tools/perf/Documentation/perf-script.txt | 7 ++++--- tools/perf/builtin-script.c | 22 ++++++++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 578fa59f51a5..005e51df855e 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -442,9 +442,10 @@ include::itrace.txt[] will be printed. Each entry has function name and file/line. Enabled by default, disable with --no-inline. ---insn-trace:: - Show instruction stream for intel_pt traces. Combine with --xed to - show disassembly. +--insn-trace[=]:: + Show instruction stream in bytes (raw) or disassembled (disasm) + for intel_pt traces. The default is 'raw'. To use xed, combine + 'raw' with --xed to show disassembly done by xed. --xed:: Run xed disassembler on output. Requires installing the xed disassembler. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ba4cfe040bd6..37088cc0ff1b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3788,10 +3788,24 @@ static int perf_script__process_auxtrace_info(struct perf_session *session, #endif static int parse_insn_trace(const struct option *opt __maybe_unused, - const char *str __maybe_unused, - int unset __maybe_unused) + const char *str, int unset __maybe_unused) { - parse_output_fields(NULL, "+insn,-event,-period", 0); + const char *fields = "+insn,-event,-period"; + int ret; + + if (str) { + if (strcmp(str, "disasm") == 0) + fields = "+disasm,-event,-period"; + else if (strlen(str) != 0 && strcmp(str, "raw") != 0) { + fprintf(stderr, "Only accept raw|disasm\n"); + return -EINVAL; + } + } + + ret = parse_output_fields(NULL, fields, 0); + if (ret < 0) + return ret; + itrace_parse_synth_opts(opt, "i0ns", 0); symbol_conf.nanosecs = true; return 0; @@ -3937,7 +3951,7 @@ int cmd_script(int argc, const char **argv) "only consider these symbols"), OPT_INTEGER(0, "addr-range", &symbol_conf.addr_range, "Use with -S to list traced records within address range"), - OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL, + OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, "raw|disasm", "Decode instructions from itrace", parse_insn_trace), OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL, "Run xed disassembler on output", parse_xed), -- cgit v1.2.3-59-g8ed1b