aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-12-08 07:45:45 +0100
committerIngo Molnar <mingo@kernel.org>2014-12-08 07:45:45 +0100
commitcfa0bd52d0ba9b852f76c7b3f1055edd5e5c7846 (patch)
tree1076abb283310578da29e08b0eda281388ce22ea /tools/perf/util
parentMerge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core (diff)
parentperf report: In branch stack mode use address history sorting (diff)
downloadlinux-dev-cfa0bd52d0ba9b852f76c7b3f1055edd5e5c7846.tar.xz
linux-dev-cfa0bd52d0ba9b852f76c7b3f1055edd5e5c7846.zip
Merge tag 'perf-core-for-mingo-2' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements from Arnaldo Carvalho de Melo: User visible changes: - Support handling complete branch stacks as histograms (Andi Kleen) Infrastructure changes: - Prep work for supporting per-pkg and snapshot counters in 'perf stat' (Jiri Olsa) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/callchain.c4
-rw-r--r--tools/perf/util/callchain.h1
-rw-r--r--tools/perf/util/evsel.c34
-rw-r--r--tools/perf/util/evsel.h30
-rw-r--r--tools/perf/util/machine.c126
-rw-r--r--tools/perf/util/symbol.h3
6 files changed, 125 insertions, 73 deletions
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 517ed84db97a..cf524a35cc84 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value)
callchain_param.key = CCKEY_ADDRESS;
return 0;
}
+ if (!strncmp(value, "branch", strlen(value))) {
+ callchain_param.branch_callstack = 1;
+ return 0;
+ }
return -1;
}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 3f158474c892..dbc08cf5f970 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -63,6 +63,7 @@ struct callchain_param {
sort_chain_func_t sort;
enum chain_order order;
enum chain_key key;
+ bool branch_callstack;
};
extern struct callchain_param callchain_param;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 2d26b7ad6fe0..1e90c8557ede 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -954,40 +954,6 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
return 0;
}
-int __perf_evsel__read(struct perf_evsel *evsel,
- int ncpus, int nthreads, bool scale)
-{
- size_t nv = scale ? 3 : 1;
- int cpu, thread;
- struct perf_counts_values *aggr = &evsel->counts->aggr, count;
-
- if (evsel->system_wide)
- nthreads = 1;
-
- aggr->val = aggr->ena = aggr->run = 0;
-
- for (cpu = 0; cpu < ncpus; cpu++) {
- for (thread = 0; thread < nthreads; thread++) {
- if (FD(evsel, cpu, thread) < 0)
- continue;
-
- if (readn(FD(evsel, cpu, thread),
- &count, nv * sizeof(u64)) < 0)
- return -errno;
-
- aggr->val += count.val;
- if (scale) {
- aggr->ena += count.ena;
- aggr->run += count.run;
- }
- }
- }
-
- perf_evsel__compute_deltas(evsel, -1, aggr);
- perf_counts_values__scale(aggr, scale, &evsel->counts->scaled);
- return 0;
-}
-
static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
{
struct perf_evsel *leader = evsel->leader;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b18d58da580b..38622747d130 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -93,6 +93,7 @@ struct perf_evsel {
bool system_wide;
bool tracking;
bool per_pkg;
+ unsigned long *per_pkg_mask;
/* parse modifier helper */
int exclude_GH;
int nr_members;
@@ -271,35 +272,6 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
}
-int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
- bool scale);
-
-/**
- * perf_evsel__read - Read the aggregate results on all CPUs
- *
- * @evsel - event selector to read value
- * @ncpus - Number of cpus affected, from zero
- * @nthreads - Number of threads affected, from zero
- */
-static inline int perf_evsel__read(struct perf_evsel *evsel,
- int ncpus, int nthreads)
-{
- return __perf_evsel__read(evsel, ncpus, nthreads, false);
-}
-
-/**
- * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
- *
- * @evsel - event selector to read value
- * @ncpus - Number of cpus affected, from zero
- * @nthreads - Number of threads affected, from zero
- */
-static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
- int ncpus, int nthreads)
-{
- return __perf_evsel__read(evsel, ncpus, nthreads, true);
-}
-
int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
struct perf_sample *sample);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b75b487574c7..15dd0a9691ce 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -12,6 +12,7 @@
#include <stdbool.h>
#include <symbol/kallsyms.h>
#include "unwind.h"
+#include "linux/hash.h"
static void dsos__init(struct dsos *dsos)
{
@@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread,
al.filtered = 0;
al.sym = NULL;
- thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
+ if (cpumode == -1)
+ thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
+ ip, &al);
+ else
+ thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
ip, &al);
if (al.sym != NULL) {
if (sort__has_parent && !*parent &&
@@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
return bi;
}
+#define CHASHSZ 127
+#define CHASHBITS 7
+#define NO_ENTRY 0xff
+
+#define PERF_MAX_BRANCH_DEPTH 127
+
+/* Remove loops. */
+static int remove_loops(struct branch_entry *l, int nr)
+{
+ int i, j, off;
+ unsigned char chash[CHASHSZ];
+
+ memset(chash, NO_ENTRY, sizeof(chash));
+
+ BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
+
+ for (i = 0; i < nr; i++) {
+ int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
+
+ /* no collision handling for now */
+ if (chash[h] == NO_ENTRY) {
+ chash[h] = i;
+ } else if (l[chash[h]].from == l[i].from) {
+ bool is_loop = true;
+ /* check if it is a real loop */
+ off = 0;
+ for (j = chash[h]; j < i && i + off < nr; j++, off++)
+ if (l[j].from != l[i + off].from) {
+ is_loop = false;
+ break;
+ }
+ if (is_loop) {
+ memmove(l + i, l + i + off,
+ (nr - (i + off)) * sizeof(*l));
+ nr -= off;
+ }
+ }
+ }
+ return nr;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct ip_callchain *chain,
+ struct branch_stack *branch,
struct symbol **parent,
struct addr_location *root_al,
int max_stack)
@@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i;
int j;
int err;
- int skip_idx __maybe_unused;
+ int skip_idx = -1;
+ int first_call = 0;
+
+ /*
+ * Based on DWARF debug information, some architectures skip
+ * a callchain entry saved by the kernel.
+ */
+ if (chain->nr < PERF_MAX_STACK_DEPTH)
+ skip_idx = arch_skip_callchain_idx(thread, chain);
callchain_cursor_reset(&callchain_cursor);
+ /*
+ * Add branches to call stack for easier browsing. This gives
+ * more context for a sample than just the callers.
+ *
+ * This uses individual histograms of paths compared to the
+ * aggregated histograms the normal LBR mode uses.
+ *
+ * Limitations for now:
+ * - No extra filters
+ * - No annotations (should annotate somehow)
+ */
+
+ if (branch && callchain_param.branch_callstack) {
+ int nr = min(max_stack, (int)branch->nr);
+ struct branch_entry be[nr];
+
+ if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
+ pr_warning("corrupted branch chain. skipping...\n");
+ goto check_calls;
+ }
+
+ for (i = 0; i < nr; i++) {
+ if (callchain_param.order == ORDER_CALLEE) {
+ be[i] = branch->entries[i];
+ /*
+ * Check for overlap into the callchain.
+ * The return address is one off compared to
+ * the branch entry. To adjust for this
+ * assume the calling instruction is not longer
+ * than 8 bytes.
+ */
+ if (i == skip_idx ||
+ chain->ips[first_call] >= PERF_CONTEXT_MAX)
+ first_call++;
+ else if (be[i].from < chain->ips[first_call] &&
+ be[i].from >= chain->ips[first_call] - 8)
+ first_call++;
+ } else
+ be[i] = branch->entries[branch->nr - i - 1];
+ }
+
+ nr = remove_loops(be, nr);
+
+ for (i = 0; i < nr; i++) {
+ err = add_callchain_ip(thread, parent, root_al,
+ -1, be[i].to);
+ if (!err)
+ err = add_callchain_ip(thread, parent, root_al,
+ -1, be[i].from);
+ if (err == -EINVAL)
+ break;
+ if (err)
+ return err;
+ }
+ chain_nr -= nr;
+ }
+
+check_calls:
if (chain->nr > PERF_MAX_STACK_DEPTH) {
pr_warning("corrupted callchain. skipping...\n");
return 0;
}
- /*
- * Based on DWARF debug information, some architectures skip
- * a callchain entry saved by the kernel.
- */
- skip_idx = arch_skip_callchain_idx(thread, chain);
-
- for (i = 0; i < chain_nr; i++) {
+ for (i = first_call; i < chain_nr; i++) {
u64 ip;
if (callchain_param.order == ORDER_CALLEE)
@@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread,
int max_stack)
{
int ret = thread__resolve_callchain_sample(thread, sample->callchain,
+ sample->branch_stack,
parent, root_al, max_stack);
if (ret)
return ret;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index e0b297c50f9d..9d602e9c6f59 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -102,7 +102,8 @@ struct symbol_conf {
demangle,
demangle_kernel,
filter_relative,
- show_hist_headers;
+ show_hist_headers,
+ branch_callstack;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,