aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorKan Liang <kan.liang@intel.com>2015-01-05 13:23:04 -0500
committerIngo Molnar <mingo@kernel.org>2015-02-18 17:16:17 +0100
commitaad2b21c151273fa7abc419dac51a980eff1dd17 (patch)
tree6408263bd2daf71567178e4fe07d094de934414c /tools/perf
parentperf/x86/intel: Expose LBR callstack to user space tooling (diff)
downloadlinux-dev-aad2b21c151273fa7abc419dac51a980eff1dd17.tar.xz
linux-dev-aad2b21c151273fa7abc419dac51a980eff1dd17.zip
perf tools: Enable LBR call stack support
Currently, there are two call chain recording options, fp and dwarf. Haswell has a new feature that utilizes the existing LBR facility to record call chains. Kernel side LBR support code provides this as a third option to record call chains. This patch enables the lbr call stack support on the tooling side. LBR call stack has some limitations: - It reuses current LBR facility, so LBR call stack and branch record can not be enabled at the same time. - It is only available for user-space callchains. However, it also offers some advantages: - LBR call stack can work on user apps which don't have frame-pointers or dwarf debug info compiled. It is a good alternative when nothing else works. Tested-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Kan Liang <kan.liang@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Cody P Schafer <cody@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Don Zickus <dzickus@redhat.com> Cc: Jacob Shin <jacob.w.shin@gmail.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Rodrigo Campos <rodrigo@sdfg.com.ar> Cc: Stephane Eranian <eranian@google.com> Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> Link: http://lkml.kernel.org/r/1420482185-29830-2-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-record.txt8
-rw-r--r--tools/perf/builtin-record.c6
-rw-r--r--tools/perf/builtin-report.c2
-rw-r--r--tools/perf/util/callchain.c8
-rw-r--r--tools/perf/util/callchain.h1
-rw-r--r--tools/perf/util/evsel.c21
6 files changed, 40 insertions, 6 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 31e977459c51..1c7e50f62b1f 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -115,13 +115,19 @@ OPTIONS
implies -g.
Allows specifying "fp" (frame pointer) or "dwarf"
- (DWARF's CFI - Call Frame Information) as the method to collect
+ (DWARF's CFI - Call Frame Information) or "lbr"
+ (Hardware Last Branch Record facility) as the method to collect
the information used to show the call graphs.
In some systems, where binaries are build with gcc
--fomit-frame-pointer, using the "fp" method will produce bogus
call graphs, using "dwarf", if available (perf tools linked to
the libunwind library) should be used instead.
+ Using the "lbr" method doesn't require any compiler options. It
+ will produce call graphs from the hardware LBR registers. The
+ main limition is that it is only available on new Intel
+ platforms, such as Haswell. It can only get user call chain. It
+ doesn't work with branch stack sampling at the same time.
-q::
--quiet::
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 404ab3434052..d0d02a811ecd 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -658,7 +658,7 @@ error:
static void callchain_debug(void)
{
- static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
+ static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
@@ -751,9 +751,9 @@ static struct record record = {
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
#ifdef HAVE_DWARF_UNWIND_SUPPORT
-const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
#else
-const char record_callchain_help[] = CALLCHAIN_HELP "fp";
+const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
#endif
/*
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2f91094e228b..0ba5f07906fb 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -249,6 +249,8 @@ static int report__setup_sample_type(struct report *rep)
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER))
callchain_param.record_mode = CALLCHAIN_DWARF;
+ else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
}
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 14e7a123d43b..9f643ee77001 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg)
callchain_param.dump_size = size;
}
#endif /* HAVE_DWARF_UNWIND_SUPPORT */
+ } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
+ if (!strtok_r(NULL, ",", &saveptr)) {
+ callchain_param.record_mode = CALLCHAIN_LBR;
+ ret = 0;
+ } else
+ pr_err("callchain: No more arguments "
+ "needed for --call-graph lbr\n");
+ break;
} else {
pr_err("callchain: Unknown --call-graph option "
"value: %s\n", arg);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index c0ec1acc38e4..6033a0a212ca 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -11,6 +11,7 @@ enum perf_call_graph_mode {
CALLCHAIN_NONE,
CALLCHAIN_FP,
CALLCHAIN_DWARF,
+ CALLCHAIN_LBR,
CALLCHAIN_MAX
};
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ea51a90e20a0..f93e5208c762 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -537,13 +537,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
}
static void
-perf_evsel__config_callgraph(struct perf_evsel *evsel)
+perf_evsel__config_callgraph(struct perf_evsel *evsel,
+ struct record_opts *opts)
{
bool function = perf_evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->attr;
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+ if (callchain_param.record_mode == CALLCHAIN_LBR) {
+ if (!opts->branch_stack) {
+ if (attr->exclude_user) {
+ pr_warning("LBR callstack option is only available "
+ "to get user callchain information. "
+ "Falling back to framepointers.\n");
+ } else {
+ perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK;
+ }
+ } else
+ pr_warning("Cannot use LBR callstack with branch stack. "
+ "Falling back to framepointers.\n");
+ }
+
if (callchain_param.record_mode == CALLCHAIN_DWARF) {
if (!function) {
perf_evsel__set_sample_bit(evsel, REGS_USER);
@@ -667,7 +684,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
evsel->attr.exclude_callchain_user = 1;
if (callchain_param.enabled && !evsel->no_aux_samples)
- perf_evsel__config_callgraph(evsel);
+ perf_evsel__config_callgraph(evsel, opts);
if (opts->sample_intr_regs) {
attr->sample_regs_intr = PERF_REGS_MASK;