aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/thread-stack.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/thread-stack.c')
-rw-r--r--tools/perf/util/thread-stack.c161
1 files changed, 155 insertions, 6 deletions
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index f52c0f90915d..a8b45168513c 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -20,6 +20,7 @@
#include "thread.h"
#include "event.h"
#include "machine.h"
+#include "env.h"
#include "util.h"
#include "debug.h"
#include "symbol.h"
@@ -29,6 +30,19 @@
#define STACK_GROWTH 2048
+/*
+ * State of retpoline detection.
+ *
+ * RETPOLINE_NONE: no retpoline detection
+ * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
+ * X86_RETPOLINE_DETECTED: x86 retpoline detected
+ */
+enum retpoline_state_t {
+ RETPOLINE_NONE,
+ X86_RETPOLINE_POSSIBLE,
+ X86_RETPOLINE_DETECTED,
+};
+
/**
* struct thread_stack_entry - thread stack entry.
* @ret_addr: return address
@@ -64,6 +78,7 @@ struct thread_stack_entry {
* @crp: call/return processor
* @comm: current comm
* @arr_sz: size of array if this is the first element of an array
+ * @rstate: used to detect retpolines
*/
struct thread_stack {
struct thread_stack_entry *stack;
@@ -76,6 +91,7 @@ struct thread_stack {
struct call_return_processor *crp;
struct comm *comm;
unsigned int arr_sz;
+ enum retpoline_state_t rstate;
};
/*
@@ -115,10 +131,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
if (err)
return err;
- if (thread->mg && thread->mg->machine)
- ts->kernel_start = machine__kernel_start(thread->mg->machine);
- else
+ if (thread->mg && thread->mg->machine) {
+ struct machine *machine = thread->mg->machine;
+ const char *arch = perf_env__arch(machine->env);
+
+ ts->kernel_start = machine__kernel_start(machine);
+ if (!strcmp(arch, "x86"))
+ ts->rstate = X86_RETPOLINE_POSSIBLE;
+ } else {
ts->kernel_start = 1ULL << 63;
+ }
ts->crp = crp;
return 0;
@@ -638,14 +660,57 @@ static int thread_stack__no_call_return(struct thread *thread,
else
parent = root;
- /* This 'return' had no 'call', so push and pop top of stack */
- cp = call_path__findnew(cpr, parent, fsym, ip, ks);
+ if (parent->sym == from_al->sym) {
+ /*
+ * At the bottom of the stack, assume the missing 'call' was
+ * before the trace started. So, pop the current symbol and push
+ * the 'to' symbol.
+ */
+ if (ts->cnt == 1) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ tm, ref, false);
+ if (err)
+ return err;
+ }
+
+ if (!ts->cnt) {
+ cp = call_path__findnew(cpr, root, tsym, addr, ks);
+
+ return thread_stack__push_cp(ts, addr, tm, ref, cp,
+ true, false);
+ }
+
+ /*
+ * Otherwise assume the 'return' is being used as a jump (e.g.
+ * retpoline) and just push the 'to' symbol.
+ */
+ cp = call_path__findnew(cpr, parent, tsym, addr, ks);
+
+ err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
+ if (!err)
+ ts->stack[ts->cnt - 1].non_call = true;
+
+ return err;
+ }
+
+ /*
+ * Assume 'parent' has not yet returned, so push 'to', and then push and
+ * pop 'from'.
+ */
+
+ cp = call_path__findnew(cpr, parent, tsym, addr, ks);
err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
if (err)
return err;
- return thread_stack__pop_cp(thread, ts, addr, tm, ref, tsym);
+ cp = call_path__findnew(cpr, cp, fsym, ip, ks);
+
+ err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
+ if (err)
+ return err;
+
+ return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
}
static int thread_stack__trace_begin(struct thread *thread,
@@ -690,6 +755,70 @@ static int thread_stack__trace_end(struct thread_stack *ts,
false, true);
}
+static bool is_x86_retpoline(const char *name)
+{
+ const char *p = strstr(name, "__x86_indirect_thunk_");
+
+ return p == name || !strcmp(name, "__indirect_thunk_start");
+}
+
+/*
+ * x86 retpoline functions pollute the call graph. This function removes them.
+ * This does not handle function return thunks, nor is there any improvement
+ * for the handling of inline thunks or extern thunks.
+ */
+static int thread_stack__x86_retpoline(struct thread_stack *ts,
+ struct perf_sample *sample,
+ struct addr_location *to_al)
+{
+ struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct symbol *sym = tse->cp->sym;
+ struct symbol *tsym = to_al->sym;
+ struct call_path *cp;
+
+ if (sym && is_x86_retpoline(sym->name)) {
+ /*
+ * This is a x86 retpoline fn. It pollutes the call graph by
+ * showing up everywhere there is an indirect branch, but does
+ * not itself mean anything. Here the top-of-stack is removed,
+ * by decrementing the stack count, and then further down, the
+ * resulting top-of-stack is replaced with the actual target.
+ * The result is that the retpoline functions will no longer
+ * appear in the call graph. Note this only affects the call
+ * graph, since all the original branches are left unchanged.
+ */
+ ts->cnt -= 1;
+ sym = ts->stack[ts->cnt - 2].cp->sym;
+ if (sym && sym == tsym && to_al->addr != tsym->start) {
+ /*
+ * Target is back to the middle of the symbol we came
+ * from so assume it is an indirect jmp and forget it
+ * altogether.
+ */
+ ts->cnt -= 1;
+ return 0;
+ }
+ } else if (sym && sym == tsym) {
+ /*
+ * Target is back to the symbol we came from so assume it is an
+ * indirect jmp and forget it altogether.
+ */
+ ts->cnt -= 1;
+ return 0;
+ }
+
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
+ sample->addr, ts->kernel_start);
+ if (!cp)
+ return -ENOMEM;
+
+ /* Replace the top-of-stack with the actual target */
+ ts->stack[ts->cnt - 1].cp = cp;
+
+ return 0;
+}
+
int thread_stack__process(struct thread *thread, struct comm *comm,
struct perf_sample *sample,
struct addr_location *from_al,
@@ -697,6 +826,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
struct call_return_processor *crp)
{
struct thread_stack *ts = thread__stack(thread, sample->cpu);
+ enum retpoline_state_t rstate;
int err = 0;
if (ts && !ts->crp) {
@@ -712,6 +842,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
ts->comm = comm;
}
+ rstate = ts->rstate;
+ if (rstate == X86_RETPOLINE_DETECTED)
+ ts->rstate = X86_RETPOLINE_POSSIBLE;
+
/* Flush stack on exec */
if (ts->comm != comm && thread->pid_ == thread->tid) {
err = __thread_stack__flush(thread, ts);
@@ -748,10 +882,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
ts->kernel_start);
err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
cp, false, trace_end);
+
+ /*
+ * A call to the same symbol but not the start of the symbol,
+ * may be the start of a x86 retpoline.
+ */
+ if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
+ from_al->sym == to_al->sym &&
+ to_al->addr != to_al->sym->start)
+ ts->rstate = X86_RETPOLINE_DETECTED;
+
} else if (sample->flags & PERF_IP_FLAG_RETURN) {
if (!sample->ip || !sample->addr)
return 0;
+ /* x86 retpoline 'return' doesn't match the stack */
+ if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
+ ts->stack[ts->cnt - 1].ret_addr != sample->addr)
+ return thread_stack__x86_retpoline(ts, sample, to_al);
+
err = thread_stack__pop_cp(thread, ts, sample->addr,
sample->time, ref, from_al->sym);
if (err) {