diff options
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r-- | tools/perf/util/machine.c | 257 |
1 files changed, 206 insertions, 51 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fb8496df8432..76316e459c3d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -16,6 +16,7 @@ #include "map_symbol.h" #include "branch.h" #include "mem-events.h" +#include "path.h" #include "srcline.h" #include "symbol.h" #include "sort.h" @@ -34,6 +35,7 @@ #include "bpf-event.h" #include <internal/lib.h> // page_size #include "cgroup.h" +#include "arm64-frame-pointer-unwind-support.h" #include <linux/ctype.h> #include <symbol/kallsyms.h> @@ -82,12 +84,23 @@ static int machine__set_mmap_name(struct machine *machine) return machine->mmap_name ? 0 : -ENOMEM; } +static void thread__set_guest_comm(struct thread *thread, pid_t pid) +{ + char comm[64]; + + snprintf(comm, sizeof(comm), "[guest/%d]", pid); + thread__set_comm(thread, comm, 0); +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { int err = -ENOMEM; memset(machine, 0, sizeof(*machine)); - maps__init(&machine->kmaps, machine); + machine->kmaps = maps__new(machine); + if (machine->kmaps == NULL) + return -ENOMEM; + RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); @@ -106,7 +119,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->root_dir = strdup(root_dir); if (machine->root_dir == NULL) - return -ENOMEM; + goto out; if (machine__set_mmap_name(machine)) goto out; @@ -114,13 +127,11 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) if (pid != HOST_KERNEL_ID) { struct thread *thread = machine__findnew_thread(machine, -1, pid); - char comm[64]; if (thread == NULL) goto out; - snprintf(comm, sizeof(comm), "[guest/%d]", pid); - thread__set_comm(thread, comm, 0); + thread__set_guest_comm(thread, pid); thread__put(thread); } @@ -129,6 +140,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) out: if (err) { + zfree(&machine->kmaps); zfree(&machine->root_dir); zfree(&machine->mmap_name); } @@ -218,12 +230,13 @@ void machine__exit(struct machine *machine) return; machine__destroy_kernel_maps(machine); - maps__exit(&machine->kmaps); + maps__delete(machine->kmaps); dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); zfree(&machine->mmap_name); zfree(&machine->current_tid); + zfree(&machine->kallsyms_filename); for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; @@ -293,6 +306,8 @@ struct machine *machines__add(struct machines *machines, pid_t pid, rb_link_node(&machine->rb_node, parent, p); rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost); + machine->machines = machines; + return machine; } @@ -378,6 +393,93 @@ struct machine *machines__find_guest(struct machines *machines, pid_t pid) return machine; } +/* + * A common case for KVM test programs is that the test program acts as the + * hypervisor, creating, running and destroying the virtual machine, and + * providing the guest object code from its own object code. In this case, + * the VM is not running an OS, but only the functions loaded into it by the + * hypervisor test program, and conveniently, loaded at the same virtual + * addresses. + * + * Normally to resolve addresses, MMAP events are needed to map addresses + * back to the object code and debug symbols for that object code. + * + * Currently, there is no way to get such mapping information from guests + * but, in the scenario described above, the guest has the same mappings + * as the hypervisor, so support for that scenario can be achieved. + * + * To support that, copy the host thread's maps to the guest thread's maps. + * Note, we do not discover the guest until we encounter a guest event, + * which works well because it is not until then that we know that the host + * thread's maps have been set up. + * + * This function returns the guest thread. Apart from keeping the data + * structures sane, using a thread belonging to the guest machine, instead + * of the host thread, allows it to have its own comm (refer + * thread__set_guest_comm()). + */ +static struct thread *findnew_guest_code(struct machine *machine, + struct machine *host_machine, + pid_t pid) +{ + struct thread *host_thread; + struct thread *thread; + int err; + + if (!machine) + return NULL; + + thread = machine__findnew_thread(machine, -1, pid); + if (!thread) + return NULL; + + /* Assume maps are set up if there are any */ + if (thread->maps->nr_maps) + return thread; + + host_thread = machine__find_thread(host_machine, -1, pid); + if (!host_thread) + goto out_err; + + thread__set_guest_comm(thread, pid); + + /* + * Guest code can be found in hypervisor process at the same address + * so copy host maps. + */ + err = maps__clone(thread, host_thread->maps); + thread__put(host_thread); + if (err) + goto out_err; + + return thread; + +out_err: + thread__zput(thread); + return NULL; +} + +struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid) +{ + struct machine *host_machine = machines__find(machines, HOST_KERNEL_ID); + struct machine *machine = machines__findnew(machines, pid); + + return findnew_guest_code(machine, host_machine, pid); +} + +struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid) +{ + struct machines *machines = machine->machines; + struct machine *host_machine; + + if (!machines) + return NULL; + + host_machine = machines__find(machines, HOST_KERNEL_ID); + + return findnew_guest_code(machine, host_machine, pid); +} + void machines__process_guests(struct machines *machines, machine__process_t process, void *data) { @@ -776,7 +878,7 @@ static int machine__process_ksymbol_register(struct machine *machine, struct perf_sample *sample __maybe_unused) { struct symbol *sym; - struct map *map = maps__find(&machine->kmaps, event->ksymbol.addr); + struct map *map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr); if (!map) { struct dso *dso = dso__new(event->ksymbol.name); @@ -799,7 +901,7 @@ static int machine__process_ksymbol_register(struct machine *machine, map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; - maps__insert(&machine->kmaps, map); + maps__insert(machine__kernel_maps(machine), map); map__put(map); dso__set_loaded(dso); @@ -825,12 +927,12 @@ static int machine__process_ksymbol_unregister(struct machine *machine, struct symbol *sym; struct map *map; - map = maps__find(&machine->kmaps, event->ksymbol.addr); + map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr); if (!map) return 0; if (map != machine->vmlinux_map) - maps__remove(&machine->kmaps, map); + maps__remove(machine__kernel_maps(machine), map); else { sym = dso__find_symbol(map->dso, map->map_ip(map, map->start)); if (sym) @@ -856,7 +958,7 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused, int machine__process_text_poke(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { - struct map *map = maps__find(&machine->kmaps, event->text_poke.addr); + struct map *map = maps__find(machine__kernel_maps(machine), event->text_poke.addr); u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; if (dump_trace) @@ -912,7 +1014,7 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start if (map == NULL) goto out; - maps__insert(&machine->kmaps, map); + maps__insert(machine__kernel_maps(machine), map); /* Put the map here because maps__insert already got it */ map__put(map); @@ -1026,10 +1128,6 @@ static struct dso *machine__get_kernel(struct machine *machine) return kernel; } -struct process_args { - u64 start; -}; - void machine__get_kallsyms_filename(struct machine *machine, char *buf, size_t bufsz) { @@ -1098,7 +1196,7 @@ int machine__create_extra_kernel_map(struct machine *machine, strlcpy(kmap->name, xm->name, KMAP_NAME_LEN); - maps__insert(&machine->kmaps, map); + maps__insert(machine__kernel_maps(machine), map); pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n", kmap->name, map->start, map->end); @@ -1143,7 +1241,7 @@ static u64 find_entry_trampoline(struct dso *dso) int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel) { - struct maps *kmaps = &machine->kmaps; + struct maps *kmaps = machine__kernel_maps(machine); int nr_cpus_avail, cpu; bool found = false; struct map *map; @@ -1213,7 +1311,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) return -1; machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip; - maps__insert(&machine->kmaps, machine->vmlinux_map); + maps__insert(machine__kernel_maps(machine), machine->vmlinux_map); return 0; } @@ -1226,7 +1324,7 @@ void machine__destroy_kernel_maps(struct machine *machine) return; kmap = map__kmap(map); - maps__remove(&machine->kmaps, map); + maps__remove(machine__kernel_maps(machine), map); if (kmap && kmap->ref_reloc_sym) { zfree((char **)&kmap->ref_reloc_sym->name); zfree(&kmap->ref_reloc_sym); @@ -1321,7 +1419,7 @@ int machine__load_kallsyms(struct machine *machine, const char *filename) * kernel, with modules between them, fixup the end of all * sections. */ - maps__fixup_end(&machine->kmaps); + maps__fixup_end(machine__kernel_maps(machine)); } return ret; @@ -1415,7 +1513,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i struct stat st; /*sshfs might return bad dent->d_type, so we have to stat*/ - snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name); + path__join(path, sizeof(path), dir_name, dent->d_name); if (stat(path, &st)) continue; @@ -1469,7 +1567,7 @@ static int machine__set_modules_path(struct machine *machine) machine->root_dir, version); free(version); - return maps__set_modules_path_dir(&machine->kmaps, modules_path, 0); + return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0); } int __weak arch__fix_module_text_start(u64 *start __maybe_unused, u64 *size __maybe_unused, @@ -1542,11 +1640,11 @@ static void machine__update_kernel_mmap(struct machine *machine, struct map *map = machine__kernel_map(machine); map__get(map); - maps__remove(&machine->kmaps, map); + maps__remove(machine__kernel_maps(machine), map); machine__set_kernel_mmap(machine, start, end); - maps__insert(&machine->kmaps, map); + maps__insert(machine__kernel_maps(machine), map); map__put(map); } @@ -1641,6 +1739,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct map *map; enum dso_space_type dso_space; bool is_kernel_mmap; + const char *mmap_name = machine->mmap_name; /* If we have maps from kcore then we do not need or want any others */ if (machine__uses_kcore(machine)) @@ -1651,8 +1750,16 @@ static int machine__process_kernel_mmap_event(struct machine *machine, else dso_space = DSO_SPACE__KERNEL_GUEST; - is_kernel_mmap = memcmp(xm->name, machine->mmap_name, - strlen(machine->mmap_name) - 1) == 0; + is_kernel_mmap = memcmp(xm->name, mmap_name, strlen(mmap_name) - 1) == 0; + if (!is_kernel_mmap && !machine__is_host(machine)) { + /* + * If the event was recorded inside the guest and injected into + * the host perf.data file, then it will match a host mmap_name, + * so try that - see machine__set_mmap_name(). + */ + mmap_name = "[kernel.kallsyms]"; + is_kernel_mmap = memcmp(xm->name, mmap_name, strlen(mmap_name) - 1) == 0; + } if (xm->name[0] == '/' || (!is_kernel_mmap && xm->name[0] == '[')) { map = machine__addnew_module_map(machine, xm->start, @@ -1666,7 +1773,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, dso__set_build_id(map->dso, bid); } else if (is_kernel_mmap) { - const char *symbol_name = (xm->name + strlen(machine->mmap_name)); + const char *symbol_name = xm->name + strlen(mmap_name); /* * Should be there already, from the build-id table in * the header. @@ -2071,6 +2178,7 @@ static void ip__resolve_ams(struct thread *thread, ams->addr = ip; ams->al_addr = al.addr; + ams->al_level = al.level; ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; @@ -2090,6 +2198,7 @@ static void ip__resolve_data(struct thread *thread, ams->addr = addr; ams->al_addr = al.addr; + ams->al_level = al.level; ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; @@ -2710,6 +2819,15 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, return err; } +static u64 get_leaf_frame_caller(struct perf_sample *sample, + struct thread *thread, int usr_idx) +{ + if (machine__normalized_is(thread->maps->machine, "arm64")) + return get_leaf_frame_caller_aarch64(sample, thread, usr_idx); + else + return 0; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct callchain_cursor *cursor, struct evsel *evsel, @@ -2723,9 +2841,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain = sample->callchain; int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err, nr_entries; + int i, j, err, nr_entries, usr_idx; int skip_idx = -1; int first_call = 0; + u64 leaf_frame_caller; if (chain) chain_nr = chain->nr; @@ -2850,6 +2969,34 @@ check_calls: continue; } + /* + * PERF_CONTEXT_USER allows us to locate where the user stack ends. + * Depending on callchain_param.order and the position of PERF_CONTEXT_USER, + * the index will be different in order to add the missing frame + * at the right place. + */ + + usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1; + + if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) { + + leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx); + + /* + * check if leaf_frame_Caller != ip to not add the same + * value twice. + */ + + if (leaf_frame_caller && leaf_frame_caller != ip) { + + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, leaf_frame_caller, + false, NULL, NULL, 0); + if (err) + return (err < 0) ? err : 0; + } + } + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, false, NULL, NULL, 0); @@ -2941,7 +3088,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread, return 0; return unwind__get_entries(unwind_entry, cursor, - thread, sample, max_stack); + thread, sample, max_stack, false); } int thread__resolve_callchain(struct thread *thread, @@ -3033,9 +3180,7 @@ int machines__for_each_thread(struct machines *machines, pid_t machine__get_current_tid(struct machine *machine, int cpu) { - int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); - - if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid) + if (cpu < 0 || (size_t)cpu >= machine->current_tid_sz) return -1; return machine->current_tid[cpu]; @@ -3045,26 +3190,16 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid) { struct thread *thread; - int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); + const pid_t init_val = -1; if (cpu < 0) return -EINVAL; - if (!machine->current_tid) { - int i; - - machine->current_tid = calloc(nr_cpus, sizeof(pid_t)); - if (!machine->current_tid) - return -ENOMEM; - for (i = 0; i < nr_cpus; i++) - machine->current_tid[i] = -1; - } - - if (cpu >= nr_cpus) { - pr_err("Requested CPU %d too large. ", cpu); - pr_err("Consider raising MAX_NR_CPUS\n"); - return -EINVAL; - } + if (realloc_array_as_needed(machine->current_tid, + machine->current_tid_sz, + (unsigned int)cpu, + &init_val)) + return -ENOMEM; machine->current_tid[cpu] = tid; @@ -3079,14 +3214,19 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, } /* - * Compares the raw arch string. N.B. see instead perf_env__arch() if a - * normalized arch is needed. + * Compares the raw arch string. N.B. see instead perf_env__arch() or + * machine__normalized_is() if a normalized arch is needed. */ bool machine__is(struct machine *machine, const char *arch) { return machine && !strcmp(perf_env__raw_arch(machine->env), arch); } +bool machine__normalized_is(struct machine *machine, const char *arch) +{ + return machine && !strcmp(perf_env__arch(machine->env), arch); +} + int machine__nr_cpus_avail(struct machine *machine) { return machine ? perf_env__nr_cpus_avail(machine->env) : 0; @@ -3181,3 +3321,18 @@ int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv } return err; } + +int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv) +{ + struct maps *maps = machine__kernel_maps(machine); + struct map *map; + int err = 0; + + for (map = maps__first(maps); map != NULL; map = map__next(map)) { + err = fn(map, priv); + if (err != 0) { + break; + } + } + return err; +} |