aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/machine.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r--tools/perf/util/machine.c362
1 files changed, 287 insertions, 75 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 16bba9fff2c8..15dd0a9691ce 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -12,13 +12,20 @@
#include <stdbool.h>
#include <symbol/kallsyms.h>
#include "unwind.h"
+#include "linux/hash.h"
+
+static void dsos__init(struct dsos *dsos)
+{
+ INIT_LIST_HEAD(&dsos->head);
+ dsos->root = RB_ROOT;
+}
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
- map_groups__init(&machine->kmaps);
+ map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
- INIT_LIST_HEAD(&machine->user_dsos);
- INIT_LIST_HEAD(&machine->kernel_dsos);
+ dsos__init(&machine->user_dsos);
+ dsos__init(&machine->kernel_dsos);
machine->threads = RB_ROOT;
INIT_LIST_HEAD(&machine->dead_threads);
@@ -26,11 +33,12 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->vdso_info = NULL;
- machine->kmaps.machine = machine;
machine->pid = pid;
machine->symbol_filter = NULL;
machine->id_hdr_size = 0;
+ machine->comm_exec = false;
+ machine->kernel_start = 0;
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
@@ -70,11 +78,12 @@ out_delete:
return NULL;
}
-static void dsos__delete(struct list_head *dsos)
+static void dsos__delete(struct dsos *dsos)
{
struct dso *pos, *n;
- list_for_each_entry_safe(pos, n, dsos, node) {
+ list_for_each_entry_safe(pos, n, &dsos->head, node) {
+ RB_CLEAR_NODE(&pos->rb_node);
list_del(&pos->node);
dso__delete(pos);
}
@@ -179,6 +188,19 @@ void machines__set_symbol_filter(struct machines *machines,
}
}
+void machines__set_comm_exec(struct machines *machines, bool comm_exec)
+{
+ struct rb_node *nd;
+
+ machines->host.comm_exec = comm_exec;
+
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+ machine->comm_exec = comm_exec;
+ }
+}
+
struct machine *machines__find(struct machines *machines, pid_t pid)
{
struct rb_node **p = &machines->guests.rb_node;
@@ -297,7 +319,7 @@ static void machine__update_thread_pid(struct machine *machine,
goto out_err;
if (!leader->mg)
- leader->mg = map_groups__new();
+ leader->mg = map_groups__new(machine);
if (!leader->mg)
goto out_err;
@@ -398,17 +420,31 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid,
return __machine__findnew_thread(machine, pid, tid, false);
}
+struct comm *machine__thread_exec_comm(struct machine *machine,
+ struct thread *thread)
+{
+ if (machine->comm_exec)
+ return thread__exec_comm(thread);
+ else
+ return thread__comm(thread);
+}
+
int machine__process_comm_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample)
{
struct thread *thread = machine__findnew_thread(machine,
event->comm.pid,
event->comm.tid);
+ bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
+
+ if (exec)
+ machine->comm_exec = true;
if (dump_trace)
perf_event__fprintf_comm(event, stdout);
- if (thread == NULL || thread__set_comm(thread, event->comm.comm, sample->time)) {
+ if (thread == NULL ||
+ __thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
return -1;
}
@@ -429,6 +465,7 @@ struct map *machine__new_module(struct machine *machine, u64 start,
{
struct map *map;
struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename);
+ bool compressed;
if (dso == NULL)
return NULL;
@@ -441,6 +478,11 @@ struct map *machine__new_module(struct machine *machine, u64 start,
dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
else
dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+
+ /* _KMODULE_COMP should be next to _KMODULE */
+ if (is_kernel_module(filename, &compressed) && compressed)
+ dso->symtab_type++;
+
map_groups__insert(&machine->kmaps, map);
return map;
}
@@ -448,23 +490,23 @@ struct map *machine__new_module(struct machine *machine, u64 start,
size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
{
struct rb_node *nd;
- size_t ret = __dsos__fprintf(&machines->host.kernel_dsos, fp) +
- __dsos__fprintf(&machines->host.user_dsos, fp);
+ size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) +
+ __dsos__fprintf(&machines->host.user_dsos.head, fp);
for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
- ret += __dsos__fprintf(&pos->kernel_dsos, fp);
- ret += __dsos__fprintf(&pos->user_dsos, fp);
+ ret += __dsos__fprintf(&pos->kernel_dsos.head, fp);
+ ret += __dsos__fprintf(&pos->user_dsos.head, fp);
}
return ret;
}
-size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm)
{
- return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, skip, parm) +
- __dsos__fprintf_buildid(&machine->user_dsos, fp, skip, parm);
+ return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) +
+ __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm);
}
size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
@@ -565,8 +607,8 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
* Returns the name of the start symbol in *symbol_name. Pass in NULL as
* symbol_name if it's not that important.
*/
-static u64 machine__get_kernel_start_addr(struct machine *machine,
- const char **symbol_name)
+static u64 machine__get_running_kernel_start(struct machine *machine,
+ const char **symbol_name)
{
char filename[PATH_MAX];
int i;
@@ -593,7 +635,7 @@ static u64 machine__get_kernel_start_addr(struct machine *machine,
int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
{
enum map_type type;
- u64 start = machine__get_kernel_start_addr(machine, NULL);
+ u64 start = machine__get_running_kernel_start(machine, NULL);
for (type = 0; type < MAP__NR_TYPES; ++type) {
struct kmap *kmap;
@@ -826,8 +868,14 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg,
struct map *map;
char *long_name;
- if (dot == NULL || strcmp(dot, ".ko"))
+ if (dot == NULL)
continue;
+
+ /* On some system, modules are compressed like .ko.gz */
+ if (is_supported_compression(dot + 1) &&
+ is_kmodule_extension(dot - 2))
+ dot -= 3;
+
snprintf(dso_name, sizeof(dso_name), "[%.*s]",
(int)(dot - dent->d_name), dent->d_name);
@@ -912,7 +960,7 @@ int machine__create_kernel_maps(struct machine *machine)
{
struct dso *kernel = machine__get_kernel(machine);
const char *name;
- u64 addr = machine__get_kernel_start_addr(machine, &name);
+ u64 addr = machine__get_running_kernel_start(machine, &name);
if (!addr)
return -1;
@@ -965,7 +1013,7 @@ static bool machine__uses_kcore(struct machine *machine)
{
struct dso *dso;
- list_for_each_entry(dso, &machine->kernel_dsos, node) {
+ list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
if (dso__is_kcore(dso))
return true;
}
@@ -1009,6 +1057,11 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
dot = strrchr(name, '.');
if (dot == NULL)
goto out_problem;
+ /* On some system, modules are compressed like .ko.gz */
+ if (is_supported_compression(dot + 1))
+ dot -= 3;
+ if (!is_kmodule_extension(dot + 1))
+ goto out_problem;
snprintf(short_module_name, sizeof(short_module_name),
"[%.*s]", (int)(dot - name), name);
strxfrchar(short_module_name, '-', '_');
@@ -1033,8 +1086,20 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
* Should be there already, from the build-id table in
* the header.
*/
- struct dso *kernel = __dsos__findnew(&machine->kernel_dsos,
- kmmap_prefix);
+ struct dso *kernel = NULL;
+ struct dso *dso;
+
+ list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
+ if (is_kernel_module(dso->long_name, NULL))
+ continue;
+
+ kernel = dso;
+ break;
+ }
+
+ if (kernel == NULL)
+ kernel = __dsos__findnew(&machine->kernel_dsos,
+ kmmap_prefix);
if (kernel == NULL)
goto out_problem;
@@ -1042,6 +1107,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (__machine__create_kernel_maps(machine, kernel) < 0)
goto out_problem;
+ if (strstr(kernel->long_name, "vmlinux"))
+ dso__set_short_name(kernel, "[kernel.vmlinux]", false);
+
machine__set_kernel_mmap_len(machine, event);
/*
@@ -1254,7 +1322,7 @@ static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
return 0;
}
-static void ip__resolve_ams(struct machine *machine, struct thread *thread,
+static void ip__resolve_ams(struct thread *thread,
struct addr_map_symbol *ams,
u64 ip)
{
@@ -1268,7 +1336,7 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread,
* Thus, we have to try consecutively until we find a match
* or else, the symbol is unknown
*/
- thread__find_cpumode_addr_location(thread, machine, MAP__FUNCTION, ip, &al);
+ thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al);
ams->addr = ip;
ams->al_addr = al.addr;
@@ -1276,15 +1344,23 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread,
ams->map = al.map;
}
-static void ip__resolve_data(struct machine *machine, struct thread *thread,
+static void ip__resolve_data(struct thread *thread,
u8 m, struct addr_map_symbol *ams, u64 addr)
{
struct addr_location al;
memset(&al, 0, sizeof(al));
- thread__find_addr_location(thread, machine, m, MAP__VARIABLE, addr,
- &al);
+ thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al);
+ if (al.map == NULL) {
+ /*
+ * some shared data regions have execute bit set which puts
+ * their mapping in the MAP__FUNCTION type array.
+ * Check there as a fallback option before dropping the sample.
+ */
+ thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al);
+ }
+
ams->addr = addr;
ams->al_addr = al.addr;
ams->sym = al.sym;
@@ -1299,14 +1375,45 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
if (!mi)
return NULL;
- ip__resolve_ams(al->machine, al->thread, &mi->iaddr, sample->ip);
- ip__resolve_data(al->machine, al->thread, al->cpumode,
- &mi->daddr, sample->addr);
+ ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
+ ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr);
mi->data_src.val = sample->data_src;
return mi;
}
+static int add_callchain_ip(struct thread *thread,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int cpumode,
+ u64 ip)
+{
+ struct addr_location al;
+
+ al.filtered = 0;
+ al.sym = NULL;
+ if (cpumode == -1)
+ thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
+ ip, &al);
+ else
+ thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
+ ip, &al);
+ if (al.sym != NULL) {
+ if (sort__has_parent && !*parent &&
+ symbol__match_regex(al.sym, &parent_regex))
+ *parent = al.sym;
+ else if (have_ignore_callees && root_al &&
+ symbol__match_regex(al.sym, &ignore_callees_regex)) {
+ /* Treat this symbol as the root,
+ forgetting its callees. */
+ *root_al = al;
+ callchain_cursor_reset(&callchain_cursor);
+ }
+ }
+
+ return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym);
+}
+
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
struct addr_location *al)
{
@@ -1318,16 +1425,57 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
return NULL;
for (i = 0; i < bs->nr; i++) {
- ip__resolve_ams(al->machine, al->thread, &bi[i].to, bs->entries[i].to);
- ip__resolve_ams(al->machine, al->thread, &bi[i].from, bs->entries[i].from);
+ ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to);
+ ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from);
bi[i].flags = bs->entries[i].flags;
}
return bi;
}
-static int machine__resolve_callchain_sample(struct machine *machine,
- struct thread *thread,
+#define CHASHSZ 127
+#define CHASHBITS 7
+#define NO_ENTRY 0xff
+
+#define PERF_MAX_BRANCH_DEPTH 127
+
+/* Remove loops. */
+static int remove_loops(struct branch_entry *l, int nr)
+{
+ int i, j, off;
+ unsigned char chash[CHASHSZ];
+
+ memset(chash, NO_ENTRY, sizeof(chash));
+
+ BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
+
+ for (i = 0; i < nr; i++) {
+ int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
+
+ /* no collision handling for now */
+ if (chash[h] == NO_ENTRY) {
+ chash[h] = i;
+ } else if (l[chash[h]].from == l[i].from) {
+ bool is_loop = true;
+ /* check if it is a real loop */
+ off = 0;
+ for (j = chash[h]; j < i && i + off < nr; j++, off++)
+ if (l[j].from != l[i + off].from) {
+ is_loop = false;
+ break;
+ }
+ if (is_loop) {
+ memmove(l + i, l + i + off,
+ (nr - (i + off)) * sizeof(*l));
+ nr -= off;
+ }
+ }
+ }
+ return nr;
+}
+
+static int thread__resolve_callchain_sample(struct thread *thread,
struct ip_callchain *chain,
+ struct branch_stack *branch,
struct symbol **parent,
struct addr_location *root_al,
int max_stack)
@@ -1337,24 +1485,83 @@ static int machine__resolve_callchain_sample(struct machine *machine,
int i;
int j;
int err;
- int skip_idx __maybe_unused;
+ int skip_idx = -1;
+ int first_call = 0;
+
+ /*
+ * Based on DWARF debug information, some architectures skip
+ * a callchain entry saved by the kernel.
+ */
+ if (chain->nr < PERF_MAX_STACK_DEPTH)
+ skip_idx = arch_skip_callchain_idx(thread, chain);
callchain_cursor_reset(&callchain_cursor);
+ /*
+ * Add branches to call stack for easier browsing. This gives
+ * more context for a sample than just the callers.
+ *
+ * This uses individual histograms of paths compared to the
+ * aggregated histograms the normal LBR mode uses.
+ *
+ * Limitations for now:
+ * - No extra filters
+ * - No annotations (should annotate somehow)
+ */
+
+ if (branch && callchain_param.branch_callstack) {
+ int nr = min(max_stack, (int)branch->nr);
+ struct branch_entry be[nr];
+
+ if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
+ pr_warning("corrupted branch chain. skipping...\n");
+ goto check_calls;
+ }
+
+ for (i = 0; i < nr; i++) {
+ if (callchain_param.order == ORDER_CALLEE) {
+ be[i] = branch->entries[i];
+ /*
+ * Check for overlap into the callchain.
+ * The return address is one off compared to
+ * the branch entry. To adjust for this
+ * assume the calling instruction is not longer
+ * than 8 bytes.
+ */
+ if (i == skip_idx ||
+ chain->ips[first_call] >= PERF_CONTEXT_MAX)
+ first_call++;
+ else if (be[i].from < chain->ips[first_call] &&
+ be[i].from >= chain->ips[first_call] - 8)
+ first_call++;
+ } else
+ be[i] = branch->entries[branch->nr - i - 1];
+ }
+
+ nr = remove_loops(be, nr);
+
+ for (i = 0; i < nr; i++) {
+ err = add_callchain_ip(thread, parent, root_al,
+ -1, be[i].to);
+ if (!err)
+ err = add_callchain_ip(thread, parent, root_al,
+ -1, be[i].from);
+ if (err == -EINVAL)
+ break;
+ if (err)
+ return err;
+ }
+ chain_nr -= nr;
+ }
+
+check_calls:
if (chain->nr > PERF_MAX_STACK_DEPTH) {
pr_warning("corrupted callchain. skipping...\n");
return 0;
}
- /*
- * Based on DWARF debug information, some architectures skip
- * a callchain entry saved by the kernel.
- */
- skip_idx = arch_skip_callchain_idx(machine, thread, chain);
-
- for (i = 0; i < chain_nr; i++) {
+ for (i = first_call; i < chain_nr; i++) {
u64 ip;
- struct addr_location al;
if (callchain_param.order == ORDER_CALLEE)
j = i;
@@ -1391,24 +1598,10 @@ static int machine__resolve_callchain_sample(struct machine *machine,
continue;
}
- al.filtered = 0;
- thread__find_addr_location(thread, machine, cpumode,
- MAP__FUNCTION, ip, &al);
- if (al.sym != NULL) {
- if (sort__has_parent && !*parent &&
- symbol__match_regex(al.sym, &parent_regex))
- *parent = al.sym;
- else if (have_ignore_callees && root_al &&
- symbol__match_regex(al.sym, &ignore_callees_regex)) {
- /* Treat this symbol as the root,
- forgetting its callees. */
- *root_al = al;
- callchain_cursor_reset(&callchain_cursor);
- }
- }
-
- err = callchain_cursor_append(&callchain_cursor,
- ip, al.map, al.sym);
+ err = add_callchain_ip(thread, parent, root_al,
+ cpumode, ip);
+ if (err == -EINVAL)
+ break;
if (err)
return err;
}
@@ -1423,19 +1616,16 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
entry->map, entry->sym);
}
-int machine__resolve_callchain(struct machine *machine,
- struct perf_evsel *evsel,
- struct thread *thread,
- struct perf_sample *sample,
- struct symbol **parent,
- struct addr_location *root_al,
- int max_stack)
+int thread__resolve_callchain(struct thread *thread,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack)
{
- int ret;
-
- ret = machine__resolve_callchain_sample(machine, thread,
- sample->callchain, parent,
- root_al, max_stack);
+ int ret = thread__resolve_callchain_sample(thread, sample->callchain,
+ sample->branch_stack,
+ parent, root_al, max_stack);
if (ret)
return ret;
@@ -1449,7 +1639,7 @@ int machine__resolve_callchain(struct machine *machine,
(!sample->user_stack.size))
return 0;
- return unwind__get_entries(unwind_entry, &callchain_cursor, machine,
+ return unwind__get_entries(unwind_entry, &callchain_cursor,
thread, sample, max_stack);
}
@@ -1531,3 +1721,25 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
return 0;
}
+
+int machine__get_kernel_start(struct machine *machine)
+{
+ struct map *map = machine__kernel_map(machine, MAP__FUNCTION);
+ int err = 0;
+
+ /*
+ * The only addresses above 2^63 are kernel addresses of a 64-bit
+ * kernel. Note that addresses are unsigned so that on a 32-bit system
+ * all addresses including kernel addresses are less than 2^32. In
+ * that case (32-bit system), if the kernel mapping is unknown, all
+ * addresses will be assumed to be in user space - see
+ * machine__kernel_ip().
+ */
+ machine->kernel_start = 1ULL << 63;
+ if (map) {
+ err = map__load(map, machine->symbol_filter);
+ if (map->start)
+ machine->kernel_start = map->start;
+ }
+ return err;
+}