diff options
Diffstat (limited to 'tools/perf/util')
54 files changed, 2564 insertions, 633 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index ea0a452550b0..8052373bcd6a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -106,6 +106,7 @@ libperf-y += units.o libperf-y += time-utils.o libperf-y += expr-bison.o libperf-y += branch.o +libperf-y += mem2node.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 28b233c3dcbe..3a428d7c59b9 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -14,6 +14,7 @@ #include "sort.h" #include "build-id.h" #include "color.h" +#include "config.h" #include "cache.h" #include "symbol.h" #include "debug.h" @@ -27,8 +28,25 @@ #include <linux/bitops.h> #include <linux/kernel.h> +/* FIXME: For the HE_COLORSET */ +#include "ui/browser.h" + +/* + * FIXME: Using the same values as slang.h, + * but that header may not be available everywhere + */ +#define LARROW_CHAR ((unsigned char)',') +#define RARROW_CHAR ((unsigned char)'+') +#define DARROW_CHAR ((unsigned char)'.') +#define UARROW_CHAR ((unsigned char)'-') + #include "sane_ctype.h" +struct annotation_options annotation__default_options = { + .use_offset = true, + .jump_arrows = true, +}; + const char *disassembler_style; const char *objdump_path; static regex_t file_lineno; @@ -184,9 +202,13 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) return arch->ins_is_fused(arch, ins1, ins2); } -static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map) +static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) { char *endptr, *tok, *name; + struct map *map = ms->map; + struct addr_map_symbol target = { + .map = map, + }; ops->target.addr = strtoull(ops->raw, &endptr, 16); @@ -208,32 +230,36 @@ static int call__parse(struct arch *arch, struct ins_operands *ops, struct map * ops->target.name = strdup(name); *tok = '>'; - return ops->target.name == NULL ? -1 : 0; + if (ops->target.name == NULL) + return -1; +find_target: + target.addr = map__objdump_2mem(map, ops->target.addr); -indirect_call: - tok = strchr(endptr, '*'); - if (tok == NULL) { - struct symbol *sym = map__find_symbol(map, map->map_ip(map, ops->target.addr)); - if (sym != NULL) - ops->target.name = strdup(sym->name); - else - ops->target.addr = 0; - return 0; - } + if (map_groups__find_ams(&target) == 0 && + map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr) + ops->target.sym = target.sym; - ops->target.addr = strtoull(tok + 1, NULL, 16); return 0; + +indirect_call: + tok = strchr(endptr, '*'); + if (tok != NULL) + ops->target.addr = strtoull(tok + 1, NULL, 16); + goto find_target; } static int call__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - if (ops->target.name) - return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name); + if (ops->target.sym) + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name); if (ops->target.addr == 0) return ins__raw_scnprintf(ins, bf, size, ops); + if (ops->target.name) + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name); + return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr); } @@ -244,14 +270,29 @@ static struct ins_ops call_ops = { bool ins__is_call(const struct ins *ins) { - return ins->ops == &call_ops; + return ins->ops == &call_ops || ins->ops == &s390_call_ops; } -static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) +static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms) { - const char *s = strchr(ops->raw, '+'); + struct map *map = ms->map; + struct symbol *sym = ms->sym; + struct addr_map_symbol target = { + .map = map, + }; const char *c = strchr(ops->raw, ','); - + u64 start, end; + /* + * Examples of lines to parse for the _cpp_lex_token@@Base + * function: + * + * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> + * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> + * + * The first is a jump to an offset inside the same function, + * the second is to another function, i.e. that 0xa72 is an + * offset in the cpp_named_operator2name@@base function. + */ /* * skip over possible up to 2 operands to get to address, e.g.: * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> @@ -267,8 +308,36 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op ops->target.addr = strtoull(ops->raw, NULL, 16); } - if (s++ != NULL) { - ops->target.offset = strtoull(s, NULL, 16); + target.addr = map__objdump_2mem(map, ops->target.addr); + start = map->unmap_ip(map, sym->start), + end = map->unmap_ip(map, sym->end); + + ops->target.outside = target.addr < start || target.addr > end; + + /* + * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): + + cpp_named_operator2name@@Base+0xa72 + + * Point to a place that is after the cpp_named_operator2name + * boundaries, i.e. in the ELF symbol table for cc1 + * cpp_named_operator2name is marked as being 32-bytes long, but it in + * fact is much larger than that, so we seem to need a symbols__find() + * routine that looks for >= current->start and < next_symbol->start, + * possibly just for C++ objects? + * + * For now lets just make some progress by marking jumps to outside the + * current function as call like. + * + * Actual navigation will come next, with further understanding of how + * the symbol searching and disassembly should be done. + */ + if (map_groups__find_ams(&target) == 0 && + map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr) + ops->target.sym = target.sym; + + if (!ops->target.outside) { + ops->target.offset = target.addr - start; ops->target.offset_avail = true; } else { ops->target.offset_avail = false; @@ -280,11 +349,15 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - const char *c = strchr(ops->raw, ','); + const char *c; if (!ops->target.addr || ops->target.offset < 0) return ins__raw_scnprintf(ins, bf, size, ops); + if (ops->target.outside && ops->target.sym != NULL) + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name); + + c = strchr(ops->raw, ','); if (c != NULL) { const char *c2 = strchr(c + 1, ','); @@ -340,7 +413,7 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) return 0; } -static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map *map) +static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) { ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); if (ops->locked.ops == NULL) @@ -355,7 +428,7 @@ static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map * goto out_free_ops; if (ops->locked.ins.ops->parse && - ops->locked.ins.ops->parse(arch, ops->locked.ops, map) < 0) + ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0) goto out_free_ops; return 0; @@ -398,7 +471,7 @@ static struct ins_ops lock_ops = { .scnprintf = lock__scnprintf, }; -static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *map __maybe_unused) +static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *comment, prev; @@ -459,7 +532,7 @@ static struct ins_ops mov_ops = { .scnprintf = mov__scnprintf, }; -static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) +static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) { char *target, *comment, *s, prev; @@ -826,6 +899,66 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, return err; } +static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 end) +{ + unsigned n_insn = 0; + u64 offset; + + for (offset = start; offset <= end; offset++) { + if (notes->offsets[offset]) + n_insn++; + } + return n_insn; +} + +static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch) +{ + unsigned n_insn; + u64 offset; + + n_insn = annotation__count_insn(notes, start, end); + if (n_insn && ch->num && ch->cycles) { + float ipc = n_insn / ((double)ch->cycles / (double)ch->num); + + /* Hide data when there are too many overlaps. */ + if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) + return; + + for (offset = start; offset <= end; offset++) { + struct annotation_line *al = notes->offsets[offset]; + + if (al) + al->ipc = ipc; + } + } +} + +void annotation__compute_ipc(struct annotation *notes, size_t size) +{ + u64 offset; + + if (!notes->src || !notes->src->cycles_hist) + return; + + pthread_mutex_lock(¬es->lock); + for (offset = 0; offset < size; ++offset) { + struct cyc_hist *ch; + + ch = ¬es->src->cycles_hist[offset]; + if (ch && ch->cycles) { + struct annotation_line *al; + + if (ch->have_start) + annotation__count_and_fill(notes, ch->start, offset, ch); + al = notes->offsets[offset]; + if (al && ch->num_aggr) + al->cycles = ch->cycles_aggr / ch->num_aggr; + notes->have_cycles = true; + } + } + pthread_mutex_unlock(¬es->lock); +} + int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, int evidx) { @@ -838,14 +971,14 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip, sample); } -static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map) +static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) { dl->ins.ops = ins__find(arch, dl->ins.name); if (!dl->ins.ops) return; - if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, map) < 0) + if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0) dl->ins.ops = NULL; } @@ -882,7 +1015,7 @@ out_free_name: struct annotate_args { size_t privsize; struct arch *arch; - struct map *map; + struct map_symbol ms; struct perf_evsel *evsel; s64 offset; char *line; @@ -964,7 +1097,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args) if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, args->arch, args->map); + disasm_line__init_ins(dl, args->arch, &args->ms); } } @@ -1222,7 +1355,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, struct annotate_args *args, int *line_nr) { - struct map *map = args->map; + struct map *map = args->ms.map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; char *line = NULL, *parsed_line, *tmp, *tmp2; @@ -1269,6 +1402,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, args->offset = offset; args->line = parsed_line; args->line_nr = *line_nr; + args->ms.sym = sym; dl = disasm_line__new(args); free(line); @@ -1277,14 +1411,14 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, if (dl == NULL) return -1; - if (!disasm_line__has_offset(dl)) { + if (!disasm_line__has_local_offset(dl)) { dl->ops.target.offset = dl->ops.target.addr - map__rip_2objdump(map, sym->start); dl->ops.target.offset_avail = true; } - /* kcore has no symbols, so add the call target name */ - if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) { + /* kcore has no symbols, so add the call target symbol */ + if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { struct addr_map_symbol target = { .map = map, .addr = dl->ops.target.addr, @@ -1292,7 +1426,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, if (!map_groups__find_ams(&target) && target.sym->start == target.al_addr) - dl->ops.target.name = strdup(target.sym->name); + dl->ops.target.sym = target.sym; } annotation_line__add(&dl->al, ¬es->src->source); @@ -1421,9 +1555,9 @@ fallback: static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { - struct map *map = args->map; + struct map *map = args->ms.map; struct dso *dso = map->dso; - char command[PATH_MAX * 2]; + char *command; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1464,7 +1598,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) strcpy(symfs_filename, tmp); } - snprintf(command, sizeof(command), + err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", @@ -1477,12 +1611,17 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) symbol_conf.annotate_src ? "-S" : "", symfs_filename, symfs_filename); + if (err < 0) { + pr_err("Failure allocating memory for the command to run\n"); + goto out_remove_tmp; + } + pr_debug("Executing: %s\n", command); err = -1; if (pipe(stdout_fd) < 0) { pr_err("Failure creating the pipe to run %s\n", command); - goto out_remove_tmp; + goto out_free_command; } pid = fork(); @@ -1509,7 +1648,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) * If we were using debug info should retry with * original binary. */ - goto out_remove_tmp; + goto out_free_command; } nline = 0; @@ -1537,6 +1676,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) fclose(file); err = 0; +out_free_command: + free(command); out_remove_tmp: close(stdout_fd[0]); @@ -1550,7 +1691,7 @@ out: out_close_stdout: close(stdout_fd[1]); - goto out_remove_tmp; + goto out_free_command; } static void calc_percent(struct sym_hist *hist, @@ -1613,7 +1754,6 @@ int symbol__annotate(struct symbol *sym, struct map *map, { struct annotate_args args = { .privsize = privsize, - .map = map, .evsel = evsel, }; struct perf_env *env = perf_evsel__env(evsel); @@ -1639,6 +1779,9 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } + args.ms.map = map; + args.ms.sym = sym; + return symbol__disassemble(sym, &args); } @@ -1879,6 +2022,103 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, return more; } +static void FILE__set_percent_color(void *fp __maybe_unused, + double percent __maybe_unused, + bool current __maybe_unused) +{ +} + +static int FILE__set_jumps_percent_color(void *fp __maybe_unused, + int nr __maybe_unused, bool current __maybe_unused) +{ + return 0; +} + +static int FILE__set_color(void *fp __maybe_unused, int color __maybe_unused) +{ + return 0; +} + +static void FILE__printf(void *fp, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vfprintf(fp, fmt, args); + va_end(args); +} + +static void FILE__write_graph(void *fp, int graph) +{ + const char *s; + switch (graph) { + + case DARROW_CHAR: s = "↓"; break; + case UARROW_CHAR: s = "↑"; break; + case LARROW_CHAR: s = "←"; break; + case RARROW_CHAR: s = "→"; break; + default: s = "?"; break; + } + + fputs(s, fp); +} + +int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) +{ + struct annotation *notes = symbol__annotation(sym); + struct annotation_write_ops ops = { + .first_line = true, + .obj = fp, + .set_color = FILE__set_color, + .set_percent_color = FILE__set_percent_color, + .set_jumps_percent_color = FILE__set_jumps_percent_color, + .printf = FILE__printf, + .write_graph = FILE__write_graph, + }; + struct annotation_line *al; + + list_for_each_entry(al, ¬es->src->source, node) { + if (annotation_line__filter(al, notes)) + continue; + annotation_line__write(al, notes, &ops); + fputc('\n', fp); + ops.first_line = false; + } + + return 0; +} + +int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel) +{ + const char *ev_name = perf_evsel__name(evsel); + char buf[1024]; + char *filename; + int err = -1; + FILE *fp; + + if (asprintf(&filename, "%s.annotation", ms->sym->name) < 0) + return -1; + + fp = fopen(filename, "w"); + if (fp == NULL) + goto out_free_filename; + + if (perf_evsel__is_group_event(evsel)) { + perf_evsel__group_desc(evsel, buf, sizeof(buf)); + ev_name = buf; + } + + fprintf(fp, "%s() %s\nEvent: %s\n\n", + ms->sym->name, ms->map->dso->long_name, ev_name); + symbol__annotate_fprintf2(ms->sym, fp); + + fclose(fp); + err = 0; +out_free_filename: + free(filename); + return err; +} + void symbol__annotate_zero_histogram(struct symbol *sym, int evidx) { struct annotation *notes = symbol__annotation(sym); @@ -1938,8 +2178,109 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp) return printed; } +bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym) +{ + if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) || + !disasm_line__has_local_offset(dl) || dl->ops.target.offset < 0 || + dl->ops.target.offset >= (s64)symbol__size(sym)) + return false; + + return true; +} + +void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) +{ + u64 offset, size = symbol__size(sym); + + /* PLT symbols contain external offsets */ + if (strstr(sym->name, "@plt")) + return; + + for (offset = 0; offset < size; ++offset) { + struct annotation_line *al = notes->offsets[offset]; + struct disasm_line *dl; + + dl = disasm_line(al); + + if (!disasm_line__is_valid_local_jump(dl, sym)) + continue; + + al = notes->offsets[dl->ops.target.offset]; + + /* + * FIXME: Oops, no jump target? Buggy disassembler? Or do we + * have to adjust to the previous offset? + */ + if (al == NULL) + continue; + + if (++al->jump_sources > notes->max_jump_sources) + notes->max_jump_sources = al->jump_sources; + + ++notes->nr_jumps; + } +} + +void annotation__set_offsets(struct annotation *notes, s64 size) +{ + struct annotation_line *al; + + notes->max_line_len = 0; + + list_for_each_entry(al, ¬es->src->source, node) { + size_t line_len = strlen(al->line); + + if (notes->max_line_len < line_len) + notes->max_line_len = line_len; + al->idx = notes->nr_entries++; + if (al->offset != -1) { + al->idx_asm = notes->nr_asm_entries++; + /* + * FIXME: short term bandaid to cope with assembly + * routines that comes with labels in the same column + * as the address in objdump, sigh. + * + * E.g. copy_user_generic_unrolled + */ + if (al->offset < size) + notes->offsets[al->offset] = al; + } else + al->idx_asm = -1; + } +} + +static inline int width_jumps(int n) +{ + if (n >= 100) + return 5; + if (n / 10) + return 2; + return 1; +} + +void annotation__init_column_widths(struct annotation *notes, struct symbol *sym) +{ + notes->widths.addr = notes->widths.target = + notes->widths.min_addr = hex_width(symbol__size(sym)); + notes->widths.max_addr = hex_width(sym->end); + notes->widths.jumps = width_jumps(notes->max_jump_sources); +} + +void annotation__update_column_widths(struct annotation *notes) +{ + if (notes->options->use_offset) + notes->widths.target = notes->widths.min_addr; + else + notes->widths.target = notes->widths.max_addr; + + notes->widths.addr = notes->widths.target; + + if (notes->options->show_nr_jumps) + notes->widths.addr += notes->widths.jumps + 1; +} + static void annotation__calc_lines(struct annotation *notes, struct map *map, - struct rb_root *root, u64 start) + struct rb_root *root) { struct annotation_line *al; struct rb_root tmp_root = RB_ROOT; @@ -1960,8 +2301,8 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, if (percent_max <= 0.5) continue; - al->path = get_srcline(map->dso, start + al->offset, NULL, - false, true, start + al->offset); + al->path = get_srcline(map->dso, notes->start + al->offset, NULL, + false, true, notes->start + al->offset); insert_source_line(&tmp_root, al); } @@ -1972,9 +2313,40 @@ static void symbol__calc_lines(struct symbol *sym, struct map *map, struct rb_root *root) { struct annotation *notes = symbol__annotation(sym); - u64 start = map__rip_2objdump(map, sym->start); - annotation__calc_lines(notes, map, root, start); + annotation__calc_lines(notes, map, root); +} + +int symbol__tty_annotate2(struct symbol *sym, struct map *map, + struct perf_evsel *evsel, bool print_lines, + bool full_paths) +{ + struct dso *dso = map->dso; + struct rb_root source_line = RB_ROOT; + struct annotation_options opts = annotation__default_options; + const char *ev_name = perf_evsel__name(evsel); + char buf[1024]; + + if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0) + return -1; + + if (print_lines) { + srcline_full_filename = full_paths; + symbol__calc_lines(sym, map, &source_line); + print_summary(&source_line, dso->long_name); + } + + if (perf_evsel__is_group_event(evsel)) { + perf_evsel__group_desc(evsel, buf, sizeof(buf)); + ev_name = buf; + } + + fprintf(stdout, "%s() %s\nEvent: %s\n\n", sym->name, dso->long_name, ev_name); + symbol__annotate_fprintf2(sym, stdout); + + annotated_source__purge(symbol__annotation(sym)->src); + + return 0; } int symbol__tty_annotate(struct symbol *sym, struct map *map, @@ -2007,3 +2379,276 @@ bool ui__has_annotation(void) { return use_browser == 1 && perf_hpp_list.sym; } + + +double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes) +{ + double percent_max = 0.0; + int i; + + for (i = 0; i < notes->nr_events; i++) { + if (al->samples[i].percent > percent_max) + percent_max = al->samples[i].percent; + } + + return percent_max; +} + +static void disasm_line__write(struct disasm_line *dl, struct annotation *notes, + void *obj, char *bf, size_t size, + void (*obj__printf)(void *obj, const char *fmt, ...), + void (*obj__write_graph)(void *obj, int graph)) +{ + if (dl->ins.ops && dl->ins.ops->scnprintf) { + if (ins__is_jump(&dl->ins)) { + bool fwd; + + if (dl->ops.target.outside) + goto call_like; + fwd = dl->ops.target.offset > dl->al.offset; + obj__write_graph(obj, fwd ? DARROW_CHAR : UARROW_CHAR); + obj__printf(obj, " "); + } else if (ins__is_call(&dl->ins)) { +call_like: + obj__write_graph(obj, RARROW_CHAR); + obj__printf(obj, " "); + } else if (ins__is_ret(&dl->ins)) { + obj__write_graph(obj, LARROW_CHAR); + obj__printf(obj, " "); + } else { + obj__printf(obj, " "); + } + } else { + obj__printf(obj, " "); + } + + disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset); +} + +static void __annotation_line__write(struct annotation_line *al, struct annotation *notes, + bool first_line, bool current_entry, bool change_color, int width, + void *obj, + int (*obj__set_color)(void *obj, int color), + void (*obj__set_percent_color)(void *obj, double percent, bool current), + int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current), + void (*obj__printf)(void *obj, const char *fmt, ...), + void (*obj__write_graph)(void *obj, int graph)) + +{ + double percent_max = annotation_line__max_percent(al, notes); + int pcnt_width = annotation__pcnt_width(notes), + cycles_width = annotation__cycles_width(notes); + bool show_title = false; + char bf[256]; + int printed; + + if (first_line && (al->offset == -1 || percent_max == 0.0)) { + if (notes->have_cycles) { + if (al->ipc == 0.0 && al->cycles == 0) + show_title = true; + } else + show_title = true; + } + + if (al->offset != -1 && percent_max != 0.0) { + int i; + + for (i = 0; i < notes->nr_events; i++) { + obj__set_percent_color(obj, al->samples[i].percent, current_entry); + if (notes->options->show_total_period) { + obj__printf(obj, "%11" PRIu64 " ", al->samples[i].he.period); + } else if (notes->options->show_nr_samples) { + obj__printf(obj, "%6" PRIu64 " ", + al->samples[i].he.nr_samples); + } else { + obj__printf(obj, "%6.2f ", + al->samples[i].percent); + } + } + } else { + obj__set_percent_color(obj, 0, current_entry); + + if (!show_title) + obj__printf(obj, "%-*s", pcnt_width, " "); + else { + obj__printf(obj, "%-*s", pcnt_width, + notes->options->show_total_period ? "Period" : + notes->options->show_nr_samples ? "Samples" : "Percent"); + } + } + + if (notes->have_cycles) { + if (al->ipc) + obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc); + else if (!show_title) + obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " "); + else + obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); + + if (al->cycles) + obj__printf(obj, "%*" PRIu64 " ", + ANNOTATION__CYCLES_WIDTH - 1, al->cycles); + else if (!show_title) + obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " "); + else + obj__printf(obj, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle"); + } + + obj__printf(obj, " "); + + if (!*al->line) + obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " "); + else if (al->offset == -1) { + if (al->line_nr && notes->options->show_linenr) + printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr); + else + printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); + obj__printf(obj, bf); + obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line); + } else { + u64 addr = al->offset; + int color = -1; + + if (!notes->options->use_offset) + addr += notes->start; + + if (!notes->options->use_offset) { + printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); + } else { + if (al->jump_sources) { + if (notes->options->show_nr_jumps) { + int prev; + printed = scnprintf(bf, sizeof(bf), "%*d ", + notes->widths.jumps, + al->jump_sources); + prev = obj__set_jumps_percent_color(obj, al->jump_sources, + current_entry); + obj__printf(obj, bf); + obj__set_color(obj, prev); + } + + printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", + notes->widths.target, addr); + } else { + printed = scnprintf(bf, sizeof(bf), "%-*s ", + notes->widths.addr, " "); + } + } + + if (change_color) + color = obj__set_color(obj, HE_COLORSET_ADDR); + obj__printf(obj, bf); + if (change_color) + obj__set_color(obj, color); + + disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), obj__printf, obj__write_graph); + + obj__printf(obj, "%-*s", width - pcnt_width - cycles_width - 3 - printed, bf); + } + +} + +void annotation_line__write(struct annotation_line *al, struct annotation *notes, + struct annotation_write_ops *ops) +{ + __annotation_line__write(al, notes, ops->first_line, ops->current_entry, + ops->change_color, ops->width, ops->obj, + ops->set_color, ops->set_percent_color, + ops->set_jumps_percent_color, ops->printf, + ops->write_graph); +} + +int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel, + struct annotation_options *options, struct arch **parch) +{ + struct annotation *notes = symbol__annotation(sym); + size_t size = symbol__size(sym); + int nr_pcnt = 1, err; + + notes->offsets = zalloc(size * sizeof(struct annotation_line *)); + if (notes->offsets == NULL) + return -1; + + if (perf_evsel__is_group_event(evsel)) + nr_pcnt = evsel->nr_members; + + err = symbol__annotate(sym, map, evsel, 0, parch); + if (err) + goto out_free_offsets; + + notes->options = options; + + symbol__calc_percent(sym, evsel); + + notes->start = map__rip_2objdump(map, sym->start); + + annotation__set_offsets(notes, size); + annotation__mark_jump_targets(notes, sym); + annotation__compute_ipc(notes, size); + annotation__init_column_widths(notes, sym); + notes->nr_events = nr_pcnt; + + annotation__update_column_widths(notes); + + return 0; + +out_free_offsets: + zfree(¬es->offsets); + return -1; +} + +#define ANNOTATION__CFG(n) \ + { .name = #n, .value = &annotation__default_options.n, } + +/* + * Keep the entries sorted, they are bsearch'ed + */ +static struct annotation_config { + const char *name; + bool *value; +} annotation__configs[] = { + ANNOTATION__CFG(hide_src_code), + ANNOTATION__CFG(jump_arrows), + ANNOTATION__CFG(show_linenr), + ANNOTATION__CFG(show_nr_jumps), + ANNOTATION__CFG(show_nr_samples), + ANNOTATION__CFG(show_total_period), + ANNOTATION__CFG(use_offset), +}; + +#undef ANNOTATION__CFG + +static int annotation_config__cmp(const void *name, const void *cfgp) +{ + const struct annotation_config *cfg = cfgp; + + return strcmp(name, cfg->name); +} + +static int annotation__config(const char *var, const char *value, + void *data __maybe_unused) +{ + struct annotation_config *cfg; + const char *name; + + if (!strstarts(var, "annotate.")) + return 0; + + name = var + 9; + cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs), + sizeof(struct annotation_config), annotation_config__cmp); + + if (cfg == NULL) + pr_debug("%s variable unknown, ignoring...", var); + else + *cfg->value = perf_config_bool(name, value); + return 0; +} + +void annotation_config__init(void) +{ + perf_config(annotation__config, NULL); + + annotation__default_options.show_total_period = symbol_conf.show_total_period; + annotation__default_options.show_nr_samples = symbol_conf.show_nr_samples; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index ce427445671f..ff7e3df31efa 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -24,9 +24,11 @@ struct ins_operands { struct { char *raw; char *name; + struct symbol *sym; u64 addr; s64 offset; bool offset_avail; + bool outside; } target; union { struct { @@ -45,7 +47,7 @@ struct arch; struct ins_ops { void (*free)(struct ins_operands *ops); - int (*parse)(struct arch *arch, struct ins_operands *ops, struct map *map); + int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms); int (*scnprintf)(struct ins *ins, char *bf, size_t size, struct ins_operands *ops); }; @@ -57,6 +59,21 @@ bool ins__is_lock(const struct ins *ins); int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops); bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); +#define ANNOTATION__IPC_WIDTH 6 +#define ANNOTATION__CYCLES_WIDTH 6 + +struct annotation_options { + bool hide_src_code, + use_offset, + jump_arrows, + show_linenr, + show_nr_jumps, + show_nr_samples, + show_total_period; +}; + +extern struct annotation_options annotation__default_options; + struct annotation; struct sym_hist_entry { @@ -76,10 +93,13 @@ struct annotation_line { s64 offset; char *line; int line_nr; + int jump_sources; float ipc; u64 cycles; size_t privsize; char *path; + u32 idx; + int idx_asm; int samples_nr; struct annotation_data samples[0]; }; @@ -97,14 +117,40 @@ static inline struct disasm_line *disasm_line(struct annotation_line *al) return al ? container_of(al, struct disasm_line, al) : NULL; } -static inline bool disasm_line__has_offset(const struct disasm_line *dl) +/* + * Is this offset in the same function as the line it is used? + * asm functions jump to other functions, for instance. + */ +static inline bool disasm_line__has_local_offset(const struct disasm_line *dl) { - return dl->ops.target.offset_avail; + return dl->ops.target.offset_avail && !dl->ops.target.outside; } +/* + * Can we draw an arrow from the jump to its target, for instance? I.e. + * is the jump and its target in the same function? + */ +bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym); + void disasm_line__free(struct disasm_line *dl); struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); + +struct annotation_write_ops { + bool first_line, current_entry, change_color; + int width; + void *obj; + int (*set_color)(void *obj, int color); + void (*set_percent_color)(void *obj, double percent, bool current); + int (*set_jumps_percent_color)(void *obj, int nr, bool current); + void (*printf)(void *obj, const char *fmt, ...); + void (*write_graph)(void *obj, int graph); +}; + +double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes); +void annotation_line__write(struct annotation_line *al, struct annotation *notes, + struct annotation_write_ops *ops); + int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); @@ -150,9 +196,47 @@ struct annotated_source { struct annotation { pthread_mutex_t lock; u64 max_coverage; + u64 start; + struct annotation_options *options; + struct annotation_line **offsets; + int nr_events; + int nr_jumps; + int max_jump_sources; + int nr_entries; + int nr_asm_entries; + u16 max_line_len; + struct { + u8 addr; + u8 jumps; + u8 target; + u8 min_addr; + u8 max_addr; + } widths; + bool have_cycles; struct annotated_source *src; }; +static inline int annotation__cycles_width(struct annotation *notes) +{ + return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; +} + +static inline int annotation__pcnt_width(struct annotation *notes) +{ + return (notes->options->show_total_period ? 12 : 7) * notes->nr_events; +} + +static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes) +{ + return notes->options->hide_src_code && al->offset == -1; +} + +void annotation__set_offsets(struct annotation *notes, s64 size); +void annotation__compute_ipc(struct annotation *notes, size_t size); +void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym); +void annotation__update_column_widths(struct annotation *notes); +void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); + static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) { return (((void *)¬es->src->histograms) + @@ -180,6 +264,10 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, struct arch **parch); +int symbol__annotate2(struct symbol *sym, struct map *map, + struct perf_evsel *evsel, + struct annotation_options *options, + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, @@ -204,16 +292,23 @@ int symbol__strerror_disassemble(struct symbol *sym, struct map *map, int symbol__annotate_printf(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool full_paths, int min_pcnt, int max_lines, int context); +int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void annotated_source__purge(struct annotated_source *as); +int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel); + bool ui__has_annotation(void); int symbol__tty_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool print_lines, bool full_paths, int min_pcnt, int max_lines); +int symbol__tty_annotate2(struct symbol *sym, struct map *map, + struct perf_evsel *evsel, bool print_lines, + bool full_paths); + #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, @@ -231,4 +326,6 @@ static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused, extern const char *disassembler_style; +void annotation_config__init(void); + #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 9faf3b5367db..fb357a00dd86 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -60,6 +60,12 @@ #include "sane_ctype.h" #include "symbol/kallsyms.h" +static bool auxtrace__dont_decode(struct perf_session *session) +{ + return !session->itrace_synth_opts || + session->itrace_synth_opts->dont_decode; +} + int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, struct auxtrace_mmap_params *mp, void *userpg, int fd) @@ -227,9 +233,9 @@ static void *auxtrace_copy_data(u64 size, struct perf_session *session) return p; } -static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, - unsigned int idx, - struct auxtrace_buffer *buffer) +static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues, + unsigned int idx, + struct auxtrace_buffer *buffer) { struct auxtrace_queue *queue; int err; @@ -280,7 +286,7 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues, return -ENOMEM; b->size = BUFFER_LIMIT_FOR_32_BIT; b->consecutive = consecutive; - err = auxtrace_queues__add_buffer(queues, idx, b); + err = auxtrace_queues__queue_buffer(queues, idx, b); if (err) { auxtrace_buffer__free(b); return err; @@ -296,11 +302,14 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues, return 0; } -static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues, - struct perf_session *session, - unsigned int idx, - struct auxtrace_buffer *buffer) +static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, + struct perf_session *session, + unsigned int idx, + struct auxtrace_buffer *buffer, + struct auxtrace_buffer **buffer_ptr) { + int err; + if (session->one_mmap) { buffer->data = buffer->data_offset - session->one_mmap_offset + session->one_mmap_addr; @@ -311,14 +320,20 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues, buffer->data_needs_freeing = true; } else if (BITS_PER_LONG == 32 && buffer->size > BUFFER_LIMIT_FOR_32_BIT) { - int err; - err = auxtrace_queues__split_buffer(queues, idx, buffer); if (err) return err; } - return auxtrace_queues__add_buffer(queues, idx, buffer); + err = auxtrace_queues__queue_buffer(queues, idx, buffer); + if (err) + return err; + + /* FIXME: Doesn't work for split buffer */ + if (buffer_ptr) + *buffer_ptr = buffer; + + return 0; } static bool filter_cpu(struct perf_session *session, int cpu) @@ -353,13 +368,11 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, buffer->size = event->auxtrace.size; idx = event->auxtrace.idx; - err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer); + err = auxtrace_queues__add_buffer(queues, session, idx, buffer, + buffer_ptr); if (err) goto out_err; - if (buffer_ptr) - *buffer_ptr = buffer; - return 0; out_err: @@ -762,6 +775,9 @@ int auxtrace_queues__process_index(struct auxtrace_queues *queues, size_t i; int err; + if (auxtrace__dont_decode(session)) + return 0; + list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { for (i = 0; i < auxtrace_index->nr; i++) { ent = &auxtrace_index->entries[i]; @@ -892,12 +908,6 @@ out_free: return err; } -static bool auxtrace__dont_decode(struct perf_session *session) -{ - return !session->itrace_synth_opts || - session->itrace_synth_opts->dont_decode; -} - int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_session *session) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 453c148d2158..e731f55da072 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -130,6 +130,7 @@ struct auxtrace_index { /** * struct auxtrace - session callbacks to allow AUX area data decoding. * @process_event: lets the decoder see all session events + * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event * @flush_events: process any remaining data * @free_events: free resources associated with event processing * @free: free resources associated with the session @@ -301,6 +302,7 @@ struct auxtrace_mmap_params { * @parse_snapshot_options: parse snapshot options * @reference: provide a 64-bit reference number for auxtrace_event * @read_finish: called after reading from an auxtrace mmap + * @alignment: alignment (if any) for AUX area data */ struct auxtrace_record { int (*recording_options)(struct auxtrace_record *itr, diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 7f8553630c4d..537eadd81914 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -316,7 +316,6 @@ static int machine__write_buildid_table(struct machine *machine, struct feat_fd *fd) { int err = 0; - char nm[PATH_MAX]; struct dso *pos; u16 kmisc = PERF_RECORD_MISC_KERNEL, umisc = PERF_RECORD_MISC_USER; @@ -338,9 +337,8 @@ static int machine__write_buildid_table(struct machine *machine, name = pos->short_name; name_len = pos->short_name_len; } else if (dso__is_kcore(pos)) { - machine__mmap_name(machine, nm, sizeof(nm)); - name = nm; - name_len = strlen(nm); + name = machine->mmap_name; + name_len = strlen(name); } else { name = pos->long_name; name_len = pos->long_name_len; @@ -813,12 +811,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) bool is_kallsyms = dso__is_kallsyms(dso); bool is_vdso = dso__is_vdso(dso); const char *name = dso->long_name; - char nm[PATH_MAX]; if (dso__is_kcore(dso)) { is_kallsyms = true; - machine__mmap_name(machine, nm, sizeof(nm)); - name = nm; + name = machine->mmap_name; } return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, dso->nsinfo, is_kallsyms, is_vdso); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 984f69144f87..decb91f9da82 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -71,7 +71,7 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen) return -1; } -static int open_cgroup(char *name) +static int open_cgroup(const char *name) { char path[PATH_MAX + 1]; char mnt[PATH_MAX + 1]; @@ -81,7 +81,7 @@ static int open_cgroup(char *name) if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) return -1; - snprintf(path, PATH_MAX, "%s/%s", mnt, name); + scnprintf(path, PATH_MAX, "%s/%s", mnt, name); fd = open(path, O_RDONLY); if (fd == -1) @@ -90,41 +90,64 @@ static int open_cgroup(char *name) return fd; } -static int add_cgroup(struct perf_evlist *evlist, char *str) +static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str) { struct perf_evsel *counter; - struct cgroup_sel *cgrp = NULL; - int n; + struct cgroup *cgrp = NULL; /* * check if cgrp is already defined, if so we reuse it */ evlist__for_each_entry(evlist, counter) { - cgrp = counter->cgrp; - if (!cgrp) + if (!counter->cgrp) continue; - if (!strcmp(cgrp->name, str)) { - refcount_inc(&cgrp->refcnt); + if (!strcmp(counter->cgrp->name, str)) { + cgrp = cgroup__get(counter->cgrp); break; } - - cgrp = NULL; } - if (!cgrp) { - cgrp = zalloc(sizeof(*cgrp)); - if (!cgrp) - return -1; + return cgrp; +} - cgrp->name = str; - refcount_set(&cgrp->refcnt, 1); +static struct cgroup *cgroup__new(const char *name) +{ + struct cgroup *cgroup = zalloc(sizeof(*cgroup)); - cgrp->fd = open_cgroup(str); - if (cgrp->fd == -1) { - free(cgrp); - return -1; - } + if (cgroup != NULL) { + refcount_set(&cgroup->refcnt, 1); + + cgroup->name = strdup(name); + if (!cgroup->name) + goto out_err; + cgroup->fd = open_cgroup(name); + if (cgroup->fd == -1) + goto out_free_name; } + return cgroup; + +out_free_name: + free(cgroup->name); +out_err: + free(cgroup); + return NULL; +} + +struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name) +{ + struct cgroup *cgroup = evlist__find_cgroup(evlist, name); + + return cgroup ?: cgroup__new(name); +} + +static int add_cgroup(struct perf_evlist *evlist, const char *str) +{ + struct perf_evsel *counter; + struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str); + int n; + + if (!cgrp) + return -1; /* * find corresponding event * if add cgroup N, then need to find event N @@ -135,31 +158,58 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) goto found; n++; } - if (refcount_dec_and_test(&cgrp->refcnt)) - free(cgrp); + cgroup__put(cgrp); return -1; found: counter->cgrp = cgrp; return 0; } -void close_cgroup(struct cgroup_sel *cgrp) +static void cgroup__delete(struct cgroup *cgroup) +{ + close(cgroup->fd); + zfree(&cgroup->name); + free(cgroup); +} + +void cgroup__put(struct cgroup *cgrp) { if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) { - close(cgrp->fd); - zfree(&cgrp->name); - free(cgrp); + cgroup__delete(cgrp); } } -int parse_cgroups(const struct option *opt __maybe_unused, const char *str, +struct cgroup *cgroup__get(struct cgroup *cgroup) +{ + if (cgroup) + refcount_inc(&cgroup->refcnt); + return cgroup; +} + +static void evsel__set_default_cgroup(struct perf_evsel *evsel, struct cgroup *cgroup) +{ + if (evsel->cgrp == NULL) + evsel->cgrp = cgroup__get(cgroup); +} + +void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__set_default_cgroup(evsel, cgroup); +} + +int parse_cgroups(const struct option *opt, const char *str, int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; + struct perf_evsel *counter; + struct cgroup *cgrp = NULL; const char *p, *e, *eos = str + strlen(str); char *s; - int ret; + int ret, i; if (list_empty(&evlist->entries)) { fprintf(stderr, "must define events before cgroups\n"); @@ -177,10 +227,9 @@ int parse_cgroups(const struct option *opt __maybe_unused, const char *str, if (!s) return -1; ret = add_cgroup(evlist, s); - if (ret) { - free(s); + free(s); + if (ret) return -1; - } } /* nr_cgroups is increased een for empty cgroups */ nr_cgroups++; @@ -188,5 +237,18 @@ int parse_cgroups(const struct option *opt __maybe_unused, const char *str, break; str = p+1; } + /* for the case one cgroup combine to multiple events */ + i = 0; + if (nr_cgroups == 1) { + evlist__for_each_entry(evlist, counter) { + if (i == 0) + cgrp = counter->cgrp; + else { + counter->cgrp = cgrp; + refcount_inc(&cgrp->refcnt); + } + i++; + } + } return 0; } diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index afafc87e9201..f033a80c1b14 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -6,7 +6,7 @@ struct option; -struct cgroup_sel { +struct cgroup { char *name; int fd; refcount_t refcnt; @@ -14,7 +14,16 @@ struct cgroup_sel { extern int nr_cgroups; /* number of explicit cgroups defined */ -void close_cgroup(struct cgroup_sel *cgrp); + +struct cgroup *cgroup__get(struct cgroup *cgroup); +void cgroup__put(struct cgroup *cgroup); + +struct perf_evlist; + +struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name); + +void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup); + int parse_cgroups(const struct option *opt, const char *str, int unset); #endif /* __CGROUP_H__ */ diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 1fb01849f1c7..640af88331b4 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -78,6 +78,8 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) { ocsd_datapath_resp_t dp_ret; + decoder->prev_return = OCSD_RESP_CONT; + dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 0, 0, NULL, NULL); if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) @@ -253,16 +255,16 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_count = 0; for (i = 0; i < MAX_BUFFER; i++) { decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; - decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; - decoder->packet_buffer[i].exc = false; - decoder->packet_buffer[i].exc_ret = false; - decoder->packet_buffer[i].cpu = INT_MIN; + decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].last_instr_taken_branch = false; + decoder->packet_buffer[i].exc = false; + decoder->packet_buffer[i].exc_ret = false; + decoder->packet_buffer[i].cpu = INT_MIN; } } static ocsd_datapath_resp_t cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, - const ocsd_generic_trace_elem *elem, const u8 trace_chan_id, enum cs_etm_sample_type sample_type) { @@ -278,18 +280,16 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, return OCSD_RESP_FATAL_SYS_ERR; et = decoder->tail; + et = (et + 1) & (MAX_BUFFER - 1); + decoder->tail = et; + decoder->packet_count++; + decoder->packet_buffer[et].sample_type = sample_type; - decoder->packet_buffer[et].start_addr = elem->st_addr; - decoder->packet_buffer[et].end_addr = elem->en_addr; decoder->packet_buffer[et].exc = false; decoder->packet_buffer[et].exc_ret = false; decoder->packet_buffer[et].cpu = *((int *)inode->priv); - - /* Wrap around if need be */ - et = (et + 1) & (MAX_BUFFER - 1); - - decoder->tail = et; - decoder->packet_count++; + decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; @@ -297,6 +297,47 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, return OCSD_RESP_CONT; } +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem, + const uint8_t trace_chan_id) +{ + int ret = 0; + struct cs_etm_packet *packet; + + ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_RANGE); + if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) + return ret; + + packet = &decoder->packet_buffer[decoder->tail]; + + packet->start_addr = elem->st_addr; + packet->end_addr = elem->en_addr; + switch (elem->last_i_type) { + case OCSD_INSTR_BR: + case OCSD_INSTR_BR_INDIRECT: + packet->last_instr_taken_branch = elem->last_instr_exec; + break; + case OCSD_INSTR_ISB: + case OCSD_INSTR_DSB_DMB: + case OCSD_INSTR_OTHER: + default: + packet->last_instr_taken_branch = false; + break; + } + + return ret; +} + +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder, + const uint8_t trace_chan_id) +{ + return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_TRACE_ON); +} + static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, const ocsd_trc_index_t indx __maybe_unused, @@ -313,12 +354,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( decoder->trace_on = false; break; case OCSD_GEN_TRC_ELEM_TRACE_ON: + resp = cs_etm_decoder__buffer_trace_on(decoder, + trace_chan_id); decoder->trace_on = true; break; case OCSD_GEN_TRC_ELEM_INSTR_RANGE: - resp = cs_etm_decoder__buffer_packet(decoder, elem, - trace_chan_id, - CS_ETM_RANGE); + resp = cs_etm_decoder__buffer_range(decoder, elem, + trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: decoder->packet_buffer[decoder->tail].exc = true; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 3d2e6205d186..743f5f444304 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -24,12 +24,14 @@ struct cs_etm_buffer { enum cs_etm_sample_type { CS_ETM_RANGE = 1 << 0, + CS_ETM_TRACE_ON = 1 << 1, }; struct cs_etm_packet { enum cs_etm_sample_type sample_type; u64 start_addr; u64 end_addr; + u8 last_instr_taken_branch; u8 exc; u8 exc_ret; int cpu; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index b9f0a53dfa65..1b0d422373be 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -32,6 +32,14 @@ #define MAX_TIMESTAMP (~0ULL) +/* + * A64 instructions are always 4 bytes + * + * Only A64 is supported, so can use this constant for converting between + * addresses and instruction counts, calculting offsets etc + */ +#define A64_INSTR_SIZE 4 + struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -45,11 +53,15 @@ struct cs_etm_auxtrace { u8 snapshot_mode; u8 data_queued; u8 sample_branches; + u8 sample_instructions; int num_cpu; u32 auxtrace_type; u64 branches_sample_type; u64 branches_id; + u64 instructions_sample_type; + u64 instructions_sample_period; + u64 instructions_id; u64 **metadata; u64 kernel_start; unsigned int pmu_type; @@ -68,6 +80,12 @@ struct cs_etm_queue { u64 time; u64 timestamp; u64 offset; + u64 period_instructions; + struct branch_stack *last_branch; + struct branch_stack *last_branch_rb; + size_t last_branch_pos; + struct cs_etm_packet *prev_packet; + struct cs_etm_packet *packet; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); @@ -174,6 +192,16 @@ static void cs_etm__free_queue(void *priv) { struct cs_etm_queue *etmq = priv; + if (!etmq) + return; + + thread__zput(etmq->thread); + cs_etm_decoder__free(etmq->decoder); + zfree(&etmq->event_buf); + zfree(&etmq->last_branch); + zfree(&etmq->last_branch_rb); + zfree(&etmq->prev_packet); + zfree(&etmq->packet); free(etmq); } @@ -270,11 +298,35 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params; struct cs_etm_queue *etmq; + size_t szp = sizeof(struct cs_etm_packet); etmq = zalloc(sizeof(*etmq)); if (!etmq) return NULL; + etmq->packet = zalloc(szp); + if (!etmq->packet) + goto out_free; + + if (etm->synth_opts.last_branch || etm->sample_branches) { + etmq->prev_packet = zalloc(szp); + if (!etmq->prev_packet) + goto out_free; + } + + if (etm->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += etm->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + etmq->last_branch = zalloc(sz); + if (!etmq->last_branch) + goto out_free; + etmq->last_branch_rb = zalloc(sz); + if (!etmq->last_branch_rb) + goto out_free; + } + etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); if (!etmq->event_buf) goto out_free; @@ -329,6 +381,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, goto out_free_decoder; etmq->offset = 0; + etmq->period_instructions = 0; return etmq; @@ -336,6 +389,10 @@ out_free_decoder: cs_etm_decoder__free(etmq->decoder); out_free: zfree(&etmq->event_buf); + zfree(&etmq->last_branch); + zfree(&etmq->last_branch_rb); + zfree(&etmq->prev_packet); + zfree(&etmq->packet); free(etmq); return NULL; @@ -389,6 +446,129 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) return 0; } +static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) +{ + struct branch_stack *bs_src = etmq->last_branch_rb; + struct branch_stack *bs_dst = etmq->last_branch; + size_t nr = 0; + + /* + * Set the number of records before early exit: ->nr is used to + * determine how many branches to copy from ->entries. + */ + bs_dst->nr = bs_src->nr; + + /* + * Early exit when there is nothing to copy. + */ + if (!bs_src->nr) + return; + + /* + * As bs_src->entries is a circular buffer, we need to copy from it in + * two steps. First, copy the branches from the most recently inserted + * branch ->last_branch_pos until the end of bs_src->entries buffer. + */ + nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; + memcpy(&bs_dst->entries[0], + &bs_src->entries[etmq->last_branch_pos], + sizeof(struct branch_entry) * nr); + + /* + * If we wrapped around at least once, the branches from the beginning + * of the bs_src->entries buffer and until the ->last_branch_pos element + * are older valid branches: copy them over. The total number of + * branches copied over will be equal to the number of branches asked by + * the user in last_branch_sz. + */ + if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { + memcpy(&bs_dst->entries[nr], + &bs_src->entries[0], + sizeof(struct branch_entry) * etmq->last_branch_pos); + } +} + +static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) +{ + etmq->last_branch_pos = 0; + etmq->last_branch_rb->nr = 0; +} + +static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) +{ + /* + * The packet records the execution range with an exclusive end address + * + * A64 instructions are constant size, so the last executed + * instruction is A64_INSTR_SIZE before the end address + * Will need to do instruction level decode for T32 instructions as + * they can be variable size (not yet supported). + */ + return packet->end_addr - A64_INSTR_SIZE; +} + +static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet) +{ + /* + * Only A64 instructions are currently supported, so can get + * instruction count by dividing. + * Will need to do instruction level decode for T32 instructions as + * they can be variable size (not yet supported). + */ + return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE; +} + +static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet, + u64 offset) +{ + /* + * Only A64 instructions are currently supported, so can get + * instruction address by muliplying. + * Will need to do instruction level decode for T32 instructions as + * they can be variable size (not yet supported). + */ + return packet->start_addr + offset * A64_INSTR_SIZE; +} + +static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) +{ + struct branch_stack *bs = etmq->last_branch_rb; + struct branch_entry *be; + + /* + * The branches are recorded in a circular buffer in reverse + * chronological order: we start recording from the last element of the + * buffer down. After writing the first element of the stack, move the + * insert position back to the end of the buffer. + */ + if (!etmq->last_branch_pos) + etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; + + etmq->last_branch_pos -= 1; + + be = &bs->entries[etmq->last_branch_pos]; + be->from = cs_etm__last_executed_instr(etmq->prev_packet); + be->to = etmq->packet->start_addr; + /* No support for mispredict */ + be->flags.mispred = 0; + be->flags.predicted = 1; + + /* + * Increment bs->nr until reaching the number of last branches asked by + * the user on the command line. + */ + if (bs->nr < etmq->etm->synth_opts.last_branch_sz) + bs->nr += 1; +} + +static int cs_etm__inject_event(union perf_event *event, + struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + + static int cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) { @@ -453,35 +633,105 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, } } +static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, + u64 addr, u64 period) +{ + int ret = 0; + struct cs_etm_auxtrace *etm = etmq->etm; + union perf_event *event = etmq->event_buf; + struct perf_sample sample = {.ip = 0,}; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + sample.ip = addr; + sample.pid = etmq->pid; + sample.tid = etmq->tid; + sample.id = etmq->etm->instructions_id; + sample.stream_id = etmq->etm->instructions_id; + sample.period = period; + sample.cpu = etmq->packet->cpu; + sample.flags = 0; + sample.insn_len = 1; + sample.cpumode = event->header.misc; + + if (etm->synth_opts.last_branch) { + cs_etm__copy_last_branch_rb(etmq); + sample.branch_stack = etmq->last_branch; + } + + if (etm->synth_opts.inject) { + ret = cs_etm__inject_event(event, &sample, + etm->instructions_sample_type); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(etm->session, event, &sample); + + if (ret) + pr_err( + "CS ETM Trace: failed to deliver instruction event, error %d\n", + ret); + + if (etm->synth_opts.last_branch) + cs_etm__reset_last_branch_rb(etmq); + + return ret; +} + /* * The cs etm packet encodes an instruction range between a branch target * and the next taken branch. Generate sample accordingly. */ -static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, - struct cs_etm_packet *packet) +static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) { int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; struct perf_sample sample = {.ip = 0,}; union perf_event *event = etmq->event_buf; - u64 start_addr = packet->start_addr; - u64 end_addr = packet->end_addr; + struct dummy_branch_stack { + u64 nr; + struct branch_entry entries; + } dummy_bs; event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); - sample.ip = start_addr; + sample.ip = cs_etm__last_executed_instr(etmq->prev_packet); sample.pid = etmq->pid; sample.tid = etmq->tid; - sample.addr = end_addr; + sample.addr = etmq->packet->start_addr; sample.id = etmq->etm->branches_id; sample.stream_id = etmq->etm->branches_id; sample.period = 1; - sample.cpu = packet->cpu; + sample.cpu = etmq->packet->cpu; sample.flags = 0; sample.cpumode = PERF_RECORD_MISC_USER; + /* + * perf report cannot handle events without a branch stack + */ + if (etm->synth_opts.last_branch) { + dummy_bs = (struct dummy_branch_stack){ + .nr = 1, + .entries = { + .from = sample.ip, + .to = sample.addr, + }, + }; + sample.branch_stack = (struct branch_stack *)&dummy_bs; + } + + if (etm->synth_opts.inject) { + ret = cs_etm__inject_event(event, &sample, + etm->branches_sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(etm->session, event, &sample); if (ret) @@ -578,6 +828,24 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, etm->sample_branches = true; etm->branches_sample_type = attr.sample_type; etm->branches_id = id; + id += 1; + attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; + } + + if (etm->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + + if (etm->synth_opts.instructions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = etm->synth_opts.period; + etm->instructions_sample_period = attr.sample_period; + err = cs_etm__synth_event(session, &attr, id); + if (err) + return err; + etm->sample_instructions = true; + etm->instructions_sample_type = attr.sample_type; + etm->instructions_id = id; + id += 1; } return 0; @@ -585,25 +853,108 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, static int cs_etm__sample(struct cs_etm_queue *etmq) { + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_packet *tmp; int ret; - struct cs_etm_packet packet; + u64 instrs_executed; - while (1) { - ret = cs_etm_decoder__get_packet(etmq->decoder, &packet); - if (ret <= 0) + instrs_executed = cs_etm__instr_count(etmq->packet); + etmq->period_instructions += instrs_executed; + + /* + * Record a branch when the last instruction in + * PREV_PACKET is a branch. + */ + if (etm->synth_opts.last_branch && + etmq->prev_packet && + etmq->prev_packet->sample_type == CS_ETM_RANGE && + etmq->prev_packet->last_instr_taken_branch) + cs_etm__update_last_branch_rb(etmq); + + if (etm->sample_instructions && + etmq->period_instructions >= etm->instructions_sample_period) { + /* + * Emit instruction sample periodically + * TODO: allow period to be defined in cycles and clock time + */ + + /* Get number of instructions executed after the sample point */ + u64 instrs_over = etmq->period_instructions - + etm->instructions_sample_period; + + /* + * Calculate the address of the sampled instruction (-1 as + * sample is reported as though instruction has just been + * executed, but PC has not advanced to next instruction) + */ + u64 offset = (instrs_executed - instrs_over - 1); + u64 addr = cs_etm__instr_addr(etmq->packet, offset); + + ret = cs_etm__synth_instruction_sample( + etmq, addr, etm->instructions_sample_period); + if (ret) + return ret; + + /* Carry remaining instructions into next sample period */ + etmq->period_instructions = instrs_over; + } + + if (etm->sample_branches && + etmq->prev_packet && + etmq->prev_packet->sample_type == CS_ETM_RANGE && + etmq->prev_packet->last_instr_taken_branch) { + ret = cs_etm__synth_branch_sample(etmq); + if (ret) return ret; + } + if (etm->sample_branches || etm->synth_opts.last_branch) { /* - * If the packet contains an instruction range, generate an - * instruction sequence event. + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. */ - if (packet.sample_type & CS_ETM_RANGE) - cs_etm__synth_branch_sample(etmq, &packet); + tmp = etmq->packet; + etmq->packet = etmq->prev_packet; + etmq->prev_packet = tmp; } return 0; } +static int cs_etm__flush(struct cs_etm_queue *etmq) +{ + int err = 0; + struct cs_etm_packet *tmp; + + if (etmq->etm->synth_opts.last_branch && + etmq->prev_packet && + etmq->prev_packet->sample_type == CS_ETM_RANGE) { + /* + * Generate a last branch event for the branches left in the + * circular buffer at the end of the trace. + * + * Use the address of the end of the last reported execution + * range + */ + u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); + + err = cs_etm__synth_instruction_sample( + etmq, addr, + etmq->period_instructions); + etmq->period_instructions = 0; + + /* + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. + */ + tmp = etmq->packet; + etmq->packet = etmq->prev_packet; + etmq->prev_packet = tmp; + } + + return err; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { struct cs_etm_auxtrace *etm = etmq->etm; @@ -615,45 +966,72 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) etm->kernel_start = machine__kernel_start(etm->machine); /* Go through each buffer in the queue and decode them one by one */ -more: - buffer_used = 0; - memset(&buffer, 0, sizeof(buffer)); - err = cs_etm__get_trace(&buffer, etmq); - if (err <= 0) - return err; - /* - * We cannot assume consecutive blocks in the data file are contiguous, - * reset the decoder to force re-sync. - */ - err = cs_etm_decoder__reset(etmq->decoder); - if (err != 0) - return err; - - /* Run trace decoder until buffer consumed or end of trace */ - do { - processed = 0; - - err = cs_etm_decoder__process_data_block( - etmq->decoder, - etmq->offset, - &buffer.buf[buffer_used], - buffer.len - buffer_used, - &processed); - - if (err) + while (1) { + buffer_used = 0; + memset(&buffer, 0, sizeof(buffer)); + err = cs_etm__get_trace(&buffer, etmq); + if (err <= 0) return err; - - etmq->offset += processed; - buffer_used += processed; - /* - * Nothing to do with an error condition, let's hope the next - * chunk will be better. + * We cannot assume consecutive blocks in the data file are + * contiguous, reset the decoder to force re-sync. */ - err = cs_etm__sample(etmq); - } while (buffer.len > buffer_used); + err = cs_etm_decoder__reset(etmq->decoder); + if (err != 0) + return err; + + /* Run trace decoder until buffer consumed or end of trace */ + do { + processed = 0; + err = cs_etm_decoder__process_data_block( + etmq->decoder, + etmq->offset, + &buffer.buf[buffer_used], + buffer.len - buffer_used, + &processed); + if (err) + return err; + + etmq->offset += processed; + buffer_used += processed; + + /* Process each packet in this chunk */ + while (1) { + err = cs_etm_decoder__get_packet(etmq->decoder, + etmq->packet); + if (err <= 0) + /* + * Stop processing this chunk on + * end of data or error + */ + break; + + switch (etmq->packet->sample_type) { + case CS_ETM_RANGE: + /* + * If the packet contains an instruction + * range, generate instruction sequence + * events. + */ + cs_etm__sample(etmq); + break; + case CS_ETM_TRACE_ON: + /* + * Discontinuity in trace, flush + * previous branch stack + */ + cs_etm__flush(etmq); + break; + default: + break; + } + } + } while (buffer.len > buffer_used); -goto more; + if (err == 0) + /* Flush any remaining branch stack entries */ + err = cs_etm__flush(etmq); + } return err; } diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index f3a71db83947..3d6459626c2a 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -232,7 +232,6 @@ int perf_quiet_option(void) var++; } - quiet = true; return 0; } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6d311868d850..4c842762e3f2 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -32,6 +32,10 @@ void perf_env__exit(struct perf_env *env) for (i = 0; i < env->caches_cnt; i++) cpu_cache_level__free(&env->caches[i]); zfree(&env->caches); + + for (i = 0; i < env->nr_memory_nodes; i++) + free(env->memory_nodes[i].set); + zfree(&env->memory_nodes); } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index bf970f57dce0..c4ef2e523367 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -27,6 +27,12 @@ struct numa_node { struct cpu_map *map; }; +struct memory_node { + u64 node; + u64 size; + unsigned long *set; +}; + struct perf_env { char *hostname; char *os_release; @@ -43,6 +49,7 @@ struct perf_env { int nr_sibling_cores; int nr_sibling_threads; int nr_numa_nodes; + int nr_memory_nodes; int nr_pmu_mappings; int nr_groups; char *cmdline; @@ -54,6 +61,8 @@ struct perf_env { struct cpu_cache_level *caches; int caches_cnt; struct numa_node *numa_nodes; + struct memory_node *memory_nodes; + unsigned long long memory_bsize; }; extern struct perf_env perf_env; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 44e603c27944..f0a6cbd033cc 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -894,8 +894,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, struct machine *machine) { size_t size; - const char *mmap_name; - char name_buff[PATH_MAX]; struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; @@ -918,7 +916,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return -1; } - mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff)); if (machine__is_host(machine)) { /* * kernel uses PERF_RECORD_MISC_USER for user space maps, @@ -931,7 +928,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, kmap = map__kmap(map); size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), - "%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1; + "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - @@ -1591,17 +1588,6 @@ int machine__resolve(struct machine *machine, struct addr_location *al, return -1; dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); - /* - * Have we already created the kernel maps for this machine? - * - * This should have happened earlier, when we processed the kernel MMAP - * events, but for older perf.data files there was no such thing, so do - * it now. - */ - if (sample->cpumode == PERF_RECORD_MISC_KERNEL && - machine__kernel_map(machine) == NULL) - machine__create_kernel_maps(machine); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e5fc14e53c05..a59281d64368 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -702,29 +702,6 @@ static int perf_evlist__resume(struct perf_evlist *evlist) return perf_evlist__set_paused(evlist, false); } -union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) -{ - struct perf_mmap *md = &evlist->mmap[idx]; - - /* - * Check messup is required for forward overwritable ring buffer: - * memory pointed by md->prev can be overwritten in this case. - * No need for read-write ring buffer: kernel stop outputting when - * it hit md->prev (perf_mmap__consume()). - */ - return perf_mmap__read_forward(md); -} - -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) -{ - return perf_evlist__mmap_read_forward(evlist, idx); -} - -void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) -{ - perf_mmap__consume(&evlist->mmap[idx], false); -} - static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) { int i; @@ -745,7 +722,8 @@ void perf_evlist__munmap(struct perf_evlist *evlist) zfree(&evlist->overwrite_mmap); } -static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) +static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist, + bool overwrite) { int i; struct perf_mmap *map; @@ -759,9 +737,10 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) for (i = 0; i < evlist->nr_mmaps; i++) { map[i].fd = -1; + map[i].overwrite = overwrite; /* * When the perf_mmap() call is made we grab one refcount, plus - * one extra to let perf_evlist__mmap_consume() get the last + * one extra to let perf_mmap__consume() get the last * events after all real references (perf_mmap__get()) are * dropped. * @@ -802,7 +781,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, maps = evlist->overwrite_mmap; if (!maps) { - maps = perf_evlist__alloc_mmap(evlist); + maps = perf_evlist__alloc_mmap(evlist, true); if (!maps) return -1; evlist->overwrite_mmap = maps; @@ -1052,7 +1031,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, struct mmap_params mp; if (!evlist->mmap) - evlist->mmap = perf_evlist__alloc_mmap(evlist); + evlist->mmap = perf_evlist__alloc_mmap(evlist, false); if (!evlist->mmap) return -ENOMEM; @@ -1086,11 +1065,30 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { + bool all_threads = (target->per_thread && target->system_wide); struct cpu_map *cpus; struct thread_map *threads; + /* + * If specify '-a' and '--per-thread' to perf record, perf record + * will override '--per-thread'. target->per_thread = false and + * target->system_wide = true. + * + * If specify '--per-thread' only to perf record, + * target->per_thread = true and target->system_wide = false. + * + * So target->per_thread && target->system_wide is false. + * For perf record, thread_map__new_str doesn't call + * thread_map__new_all_cpus. That will keep perf record's + * current behavior. + * + * For perf stat, it allows the case that target->per_thread and + * target->system_wide are all true. It means to collect system-wide + * per-thread data. thread_map__new_str will call + * thread_map__new_all_cpus to enumerate all threads. + */ threads = thread_map__new_str(target->pid, target->tid, target->uid, - target->per_thread); + all_threads); if (!threads) return -1; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 336b838e6957..6c41b2f78713 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -129,10 +129,6 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); - -union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, - int idx); void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); int perf_evlist__open(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ef351688b797..1ac8d9236efd 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -244,6 +244,7 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->metric_name = NULL; evsel->metric_events = NULL; evsel->collect_stat = false; + evsel->pmu_name = NULL; } struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -621,22 +622,34 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel) return evsel->group_name ?: "anon group"; } +/* + * Returns the group details for the specified leader, + * with following rules. + * + * For record -e '{cycles,instructions}' + * 'anon group { cycles:u, instructions:u }' + * + * For record -e 'cycles,instructions' and report --group + * 'cycles:u, instructions:u' + */ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) { - int ret; + int ret = 0; struct perf_evsel *pos; const char *group_name = perf_evsel__group_name(evsel); - ret = scnprintf(buf, size, "%s", group_name); + if (!evsel->forced_leader) + ret = scnprintf(buf, size, "%s { ", group_name); - ret += scnprintf(buf + ret, size - ret, " { %s", + ret += scnprintf(buf + ret, size - ret, "%s", perf_evsel__name(evsel)); for_each_group_member(pos, evsel) ret += scnprintf(buf + ret, size - ret, ", %s", perf_evsel__name(pos)); - ret += scnprintf(buf + ret, size - ret, " }"); + if (!evsel->forced_leader) + ret += scnprintf(buf + ret, size - ret, " }"); return ret; } @@ -1233,7 +1246,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); perf_evsel__free_config_terms(evsel); - close_cgroup(evsel->cgrp); + cgroup__put(evsel->cgrp); cpu_map__put(evsel->cpus); cpu_map__put(evsel->own_cpus); thread_map__put(evsel->threads); @@ -1915,6 +1928,9 @@ try_fallback: goto fallback_missing_features; } out_close: + if (err) + threads->err_thread = thread; + do { while (--thread >= 0) { close(FD(evsel, cpu, thread)); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index a7487c6d1866..d3ee3af618ef 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -30,7 +30,7 @@ struct perf_sample_id { u64 period; }; -struct cgroup_sel; +struct cgroup; /* * The 'struct perf_evsel_config_term' is used to pass event @@ -107,7 +107,7 @@ struct perf_evsel { struct perf_stat_evsel *stats; void *priv; u64 db_id; - struct cgroup_sel *cgrp; + struct cgroup *cgrp; void *handler; struct cpu_map *cpus; struct cpu_map *own_cpus; @@ -125,6 +125,7 @@ struct perf_evsel { bool per_pkg; bool precise_max; bool ignore_missing_thread; + bool forced_leader; /* parse modifier helper */ int exclude_GH; int nr_members; @@ -142,6 +143,7 @@ struct perf_evsel { struct perf_evsel **metric_events; bool collect_stat; bool weak_group; + const char *pmu_name; }; union u64_swap { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a326e0d8b5b6..121df1683c36 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -17,6 +17,7 @@ #include <sys/stat.h> #include <sys/utsname.h> #include <linux/time64.h> +#include <dirent.h> #include "evlist.h" #include "evsel.h" @@ -37,6 +38,7 @@ #include "asm/bug.h" #include "tool.h" #include "time-utils.h" +#include "units.h" #include "sane_ctype.h" @@ -132,6 +134,25 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size) } /* Return: 0 if succeded, -ERR if failed. */ +static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size) +{ + u64 *p = (u64 *) set; + int i, ret; + + ret = do_write(ff, &size, sizeof(size)); + if (ret < 0) + return ret; + + for (i = 0; (u64) i < BITS_TO_U64(size); i++) { + ret = do_write(ff, p + i, sizeof(*p)); + if (ret < 0) + return ret; + } + + return 0; +} + +/* Return: 0 if succeded, -ERR if failed. */ int write_padded(struct feat_fd *ff, const void *bf, size_t count, size_t count_aligned) { @@ -243,6 +264,38 @@ static char *do_read_string(struct feat_fd *ff) return NULL; } +/* Return: 0 if succeded, -ERR if failed. */ +static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) +{ + unsigned long *set; + u64 size, *p; + int i, ret; + + ret = do_read_u64(ff, &size); + if (ret) + return ret; + + set = bitmap_alloc(size); + if (!set) + return -ENOMEM; + + bitmap_zero(set, size); + + p = (u64 *) set; + + for (i = 0; (u64) i < BITS_TO_U64(size); i++) { + ret = do_read_u64(ff, p + i); + if (ret < 0) { + free(set); + return ret; + } + } + + *pset = set; + *psize = size; + return 0; +} + static int write_tracing_data(struct feat_fd *ff, struct perf_evlist *evlist) { @@ -1196,6 +1249,176 @@ static int write_sample_time(struct feat_fd *ff, sizeof(evlist->last_sample_time)); } + +static int memory_node__read(struct memory_node *n, unsigned long idx) +{ + unsigned int phys, size = 0; + char path[PATH_MAX]; + struct dirent *ent; + DIR *dir; + +#define for_each_memory(mem, dir) \ + while ((ent = readdir(dir))) \ + if (strcmp(ent->d_name, ".") && \ + strcmp(ent->d_name, "..") && \ + sscanf(ent->d_name, "memory%u", &mem) == 1) + + scnprintf(path, PATH_MAX, + "%s/devices/system/node/node%lu", + sysfs__mountpoint(), idx); + + dir = opendir(path); + if (!dir) { + pr_warning("failed: cant' open memory sysfs data\n"); + return -1; + } + + for_each_memory(phys, dir) { + size = max(phys, size); + } + + size++; + + n->set = bitmap_alloc(size); + if (!n->set) { + closedir(dir); + return -ENOMEM; + } + + bitmap_zero(n->set, size); + n->node = idx; + n->size = size; + + rewinddir(dir); + + for_each_memory(phys, dir) { + set_bit(phys, n->set); + } + + closedir(dir); + return 0; +} + +static int memory_node__sort(const void *a, const void *b) +{ + const struct memory_node *na = a; + const struct memory_node *nb = b; + + return na->node - nb->node; +} + +static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) +{ + char path[PATH_MAX]; + struct dirent *ent; + DIR *dir; + u64 cnt = 0; + int ret = 0; + + scnprintf(path, PATH_MAX, "%s/devices/system/node/", + sysfs__mountpoint()); + + dir = opendir(path); + if (!dir) { + pr_warning("failed: can't open node sysfs data\n"); + return -1; + } + + while (!ret && (ent = readdir(dir))) { + unsigned int idx; + int r; + + if (!strcmp(ent->d_name, ".") || + !strcmp(ent->d_name, "..")) + continue; + + r = sscanf(ent->d_name, "node%u", &idx); + if (r != 1) + continue; + + if (WARN_ONCE(cnt >= size, + "failed to write MEM_TOPOLOGY, way too many nodes\n")) + return -1; + + ret = memory_node__read(&nodes[cnt++], idx); + } + + *cntp = cnt; + closedir(dir); + + if (!ret) + qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort); + + return ret; +} + +#define MAX_MEMORY_NODES 2000 + +/* + * The MEM_TOPOLOGY holds physical memory map for every + * node in system. The format of data is as follows: + * + * 0 - version | for future changes + * 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes + * 16 - count | number of nodes + * + * For each node we store map of physical indexes for + * each node: + * + * 32 - node id | node index + * 40 - size | size of bitmap + * 48 - bitmap | bitmap of memory indexes that belongs to node + */ +static int write_mem_topology(struct feat_fd *ff __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + static struct memory_node nodes[MAX_MEMORY_NODES]; + u64 bsize, version = 1, i, nr; + int ret; + + ret = sysfs__read_xll("devices/system/memory/block_size_bytes", + (unsigned long long *) &bsize); + if (ret) + return ret; + + ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr); + if (ret) + return ret; + + ret = do_write(ff, &version, sizeof(version)); + if (ret < 0) + goto out; + + ret = do_write(ff, &bsize, sizeof(bsize)); + if (ret < 0) + goto out; + + ret = do_write(ff, &nr, sizeof(nr)); + if (ret < 0) + goto out; + + for (i = 0; i < nr; i++) { + struct memory_node *n = &nodes[i]; + + #define _W(v) \ + ret = do_write(ff, &n->v, sizeof(n->v)); \ + if (ret < 0) \ + goto out; + + _W(node) + _W(size) + + #undef _W + + ret = do_write_bitmap(ff, n->set, n->size); + if (ret < 0) + goto out; + } + +out: + return ret; +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1543,6 +1766,35 @@ static void print_sample_time(struct feat_fd *ff, FILE *fp) fprintf(fp, "# sample duration : %10.3f ms\n", d); } +static void memory_node__fprintf(struct memory_node *n, + unsigned long long bsize, FILE *fp) +{ + char buf_map[100], buf_size[50]; + unsigned long long size; + + size = bsize * bitmap_weight(n->set, n->size); + unit_number__scnprintf(buf_size, 50, size); + + bitmap_scnprintf(n->set, n->size, buf_map, 100); + fprintf(fp, "# %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map); +} + +static void print_mem_topology(struct feat_fd *ff, FILE *fp) +{ + struct memory_node *nodes; + int i, nr; + + nodes = ff->ph->env.memory_nodes; + nr = ff->ph->env.nr_memory_nodes; + + fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n", + nr, ff->ph->env.memory_bsize); + + for (i = 0; i < nr; i++) { + memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp); + } +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -2205,6 +2457,58 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) return 0; } +static int process_mem_topology(struct feat_fd *ff, + void *data __maybe_unused) +{ + struct memory_node *nodes; + u64 version, i, nr, bsize; + int ret = -1; + + if (do_read_u64(ff, &version)) + return -1; + + if (version != 1) + return -1; + + if (do_read_u64(ff, &bsize)) + return -1; + + if (do_read_u64(ff, &nr)) + return -1; + + nodes = zalloc(sizeof(*nodes) * nr); + if (!nodes) + return -1; + + for (i = 0; i < nr; i++) { + struct memory_node n; + + #define _R(v) \ + if (do_read_u64(ff, &n.v)) \ + goto out; \ + + _R(node) + _R(size) + + #undef _R + + if (do_read_bitmap(ff, &n.set, &n.size)) + goto out; + + nodes[i] = n; + } + + ff->ph->env.memory_bsize = bsize; + ff->ph->env.memory_nodes = nodes; + ff->ph->env.nr_memory_nodes = nr; + ret = 0; + +out: + if (ret) + free(nodes); + return ret; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2263,6 +2567,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(STAT, stat, false), FEAT_OPN(CACHE, cache, true), FEAT_OPR(SAMPLE_TIME, sample_time, false), + FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), }; struct header_print_data { @@ -2318,7 +2623,12 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) if (ret == -1) return -1; - fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); + fprintf(fp, "# captured on : %s", ctime(&st.st_ctime)); + + fprintf(fp, "# header version : %u\n", header->version); + fprintf(fp, "# data offset : %" PRIu64 "\n", header->data_offset); + fprintf(fp, "# data size : %" PRIu64 "\n", header->data_size); + fprintf(fp, "# feat offset : %" PRIu64 "\n", header->feat_offset); perf_header__process_sections(header, fd, &hd, perf_file_section__fprintf_info); @@ -3105,8 +3415,17 @@ int perf_event__synthesize_features(struct perf_tool *tool, return ret; } } + + /* Send HEADER_LAST_FEATURE mark. */ + fe = ff.buf; + fe->feat_id = HEADER_LAST_FEATURE; + fe->header.type = PERF_RECORD_HEADER_FEATURE; + fe->header.size = sizeof(*fe); + + ret = process(tool, ff.buf, NULL, NULL); + free(ff.buf); - return 0; + return ret; } int perf_event__process_feature(struct perf_tool *tool, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index f28aaaa3a440..90d4577a92dc 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -36,6 +36,7 @@ enum { HEADER_STAT, HEADER_CACHE, HEADER_SAMPLE_TIME, + HEADER_MEM_TOPOLOGY, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -174,4 +175,5 @@ int write_padded(struct feat_fd *fd, const void *bf, int get_cpuid(char *buffer, size_t sz); char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); +int strcmp_cpuid_str(const char *s1, const char *s2); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b6140950301e..7d968892ee39 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -536,7 +536,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, * This mem info was allocated from sample__resolve_mem * and will not be used anymore. */ - zfree(&entry->mem_info); + mem_info__zput(entry->mem_info); /* If the map of an existing hist_entry has * become out-of-date due to an exec() or @@ -879,7 +879,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter, * cumulated only one time to prevent entries more than 100% * overhead. */ - he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1)); + he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1)); if (he_cache == NULL) return -ENOMEM; @@ -1045,8 +1045,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (err) return err; - iter->max_stack = max_stack_depth; - err = iter->ops->prepare_entry(iter, al); if (err) goto out; @@ -1141,7 +1139,7 @@ void hist_entry__delete(struct hist_entry *he) if (he->mem_info) { map__zput(he->mem_info->iaddr.map); map__zput(he->mem_info->daddr.map); - zfree(&he->mem_info); + mem_info__zput(he->mem_info); } zfree(&he->stat_acc); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 02721b579746..e869cad4d89f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -107,7 +107,6 @@ struct hist_entry_iter { int curr; bool hide_unresolved; - int max_stack; struct perf_evsel *evsel; struct perf_sample *sample; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index aa1593ce551d..f9157aed1289 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1378,6 +1378,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) intel_pt_clear_tx_flags(decoder); decoder->have_tma = false; decoder->cbr = 0; + decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; return -EOVERFLOW; @@ -1616,6 +1617,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_PWRX: intel_pt_log("ERROR: Missing TIP after FUP\n"); decoder->pkt_state = INTEL_PT_STATE_ERR3; + decoder->pkt_step = 0; return -ENOENT; case INTEL_PT_OVF: @@ -2390,14 +2392,6 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) return &decoder->state; } -static bool intel_pt_at_psb(unsigned char *buf, size_t len) -{ - if (len < INTEL_PT_PSB_LEN) - return false; - return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR, - INTEL_PT_PSB_LEN); -} - /** * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. * @buf: pointer to buffer pointer @@ -2486,6 +2480,7 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) * @buf: buffer * @len: size of buffer * @tsc: TSC value returned + * @rem: returns remaining size when TSC is found * * Find a TSC packet in @buf and return the TSC value. This function assumes * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a @@ -2493,7 +2488,8 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) * * Return: %true if TSC is found, false otherwise. */ -static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc, + size_t *rem) { struct intel_pt_pkt packet; int ret; @@ -2504,6 +2500,7 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) return false; if (packet.type == INTEL_PT_TSC) { *tsc = packet.payload; + *rem = len; return true; } if (packet.type == INTEL_PT_PSBEND) @@ -2554,6 +2551,8 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) * @len_a: size of first buffer * @buf_b: second buffer * @len_b: size of second buffer + * @consecutive: returns true if there is data in buf_b that is consecutive + * to buf_a * * If the trace contains TSC we can look at the last TSC of @buf_a and the * first TSC of @buf_b in order to determine if the buffers overlap, and then @@ -2566,33 +2565,41 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, - size_t len_b) + size_t len_b, bool *consecutive) { uint64_t tsc_a, tsc_b; unsigned char *p; - size_t len; + size_t len, rem_a, rem_b; p = intel_pt_last_psb(buf_a, len_a); if (!p) return buf_b; /* No PSB in buf_a => no overlap */ len = len_a - (p - buf_a); - if (!intel_pt_next_tsc(p, len, &tsc_a)) { + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) { /* The last PSB+ in buf_a is incomplete, so go back one more */ len_a -= len; p = intel_pt_last_psb(buf_a, len_a); if (!p) return buf_b; /* No full PSB+ => assume no overlap */ len = len_a - (p - buf_a); - if (!intel_pt_next_tsc(p, len, &tsc_a)) + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) return buf_b; /* No TSC in buf_a => assume no overlap */ } while (1) { /* Ignore PSB+ with no TSC */ - if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && - intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) - return buf_b; /* tsc_a < tsc_b => no overlap */ + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) { + int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b); + + /* Same TSC, so buffers are consecutive */ + if (!cmp && rem_b >= rem_a) { + *consecutive = true; + return buf_b + len_b - (rem_b - rem_a); + } + if (cmp < 0) + return buf_b; /* tsc_a < tsc_b => no overlap */ + } if (!intel_pt_step_psb(&buf_b, &len_b)) return buf_b + len_b; /* No PSB in buf_b => no data */ @@ -2606,6 +2613,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, * @buf_b: second buffer * @len_b: size of second buffer * @have_tsc: can use TSC packets to detect overlap + * @consecutive: returns true if there is data in buf_b that is consecutive + * to buf_a * * When trace samples or snapshots are recorded there is the possibility that * the data overlaps. Note that, for the purposes of decoding, data is only @@ -2616,7 +2625,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, */ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, - bool have_tsc) + bool have_tsc, bool *consecutive) { unsigned char *found; @@ -2628,7 +2637,8 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, return buf_b; /* No overlap */ if (have_tsc) { - found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b, + consecutive); if (found) return found; } @@ -2643,28 +2653,16 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, } /* Now len_b >= len_a */ - if (len_b > len_a) { - /* The leftover buffer 'b' must start at a PSB */ - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { - if (!intel_pt_step_psb(&buf_a, &len_a)) - return buf_b; /* No overlap */ - } - } - while (1) { /* Potential overlap so check the bytes */ found = memmem(buf_a, len_a, buf_b, len_a); - if (found) + if (found) { + *consecutive = true; return buf_b + len_a; + } /* Try again at next PSB in buffer 'a' */ if (!intel_pt_step_psb(&buf_a, &len_a)) return buf_b; /* No overlap */ - - /* The leftover buffer 'b' must start at a PSB */ - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { - if (!intel_pt_step_psb(&buf_a, &len_a)) - return buf_b; /* No overlap */ - } } } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 921b22e8ca0e..fc1752d50019 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -117,7 +117,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, - bool have_tsc); + bool have_tsc, bool *consecutive); int intel_pt__strerror(int code, char *buf, size_t buflen); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3773d9c54f45..0effaff57020 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -132,6 +132,7 @@ struct intel_pt_queue { struct intel_pt *pt; unsigned int queue_nr; struct auxtrace_buffer *buffer; + struct auxtrace_buffer *old_buffer; void *decoder; const struct intel_pt_state *state; struct ip_callchain *chain; @@ -143,6 +144,7 @@ struct intel_pt_queue { bool stop; bool step_through_buffers; bool use_buffer_pid_tid; + bool sync_switch; pid_t pid, tid; int cpu; int switch_state; @@ -207,49 +209,28 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { + bool consecutive = false; void *start; start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, - pt->have_tsc); + pt->have_tsc, &consecutive); if (!start) return -EINVAL; b->use_size = b->data + b->size - start; b->use_data = start; + if (b->use_size && consecutive) + b->consecutive = true; return 0; } -static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq, - struct auxtrace_queue *queue, - struct auxtrace_buffer *buffer) -{ - if (queue->cpu == -1 && buffer->cpu != -1) - ptq->cpu = buffer->cpu; - - ptq->pid = buffer->pid; - ptq->tid = buffer->tid; - - intel_pt_log("queue %u cpu %d pid %d tid %d\n", - ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); - - thread__zput(ptq->thread); - - if (ptq->tid != -1) { - if (ptq->pid != -1) - ptq->thread = machine__findnew_thread(ptq->pt->machine, - ptq->pid, - ptq->tid); - else - ptq->thread = machine__find_thread(ptq->pt->machine, -1, - ptq->tid); - } -} - /* This function assumes data is processed sequentially only */ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) { struct intel_pt_queue *ptq = data; - struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; + struct auxtrace_buffer *buffer = ptq->buffer; + struct auxtrace_buffer *old_buffer = ptq->old_buffer; struct auxtrace_queue *queue; + bool might_overlap; if (ptq->stop) { b->len = 0; @@ -257,7 +238,7 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) } queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; -next: + buffer = auxtrace_buffer__next(queue, buffer); if (!buffer) { if (old_buffer) @@ -276,7 +257,8 @@ next: return -ENOMEM; } - if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && + might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode; + if (might_overlap && !buffer->consecutive && old_buffer && intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) return -ENOMEM; @@ -289,33 +271,24 @@ next: } b->ref_timestamp = buffer->reference; - /* - * If in snapshot mode and the buffer has no usable data, get next - * buffer and again check overlap against old_buffer. - */ - if (ptq->pt->snapshot_mode && !b->len) - goto next; - - if (old_buffer) - auxtrace_buffer__drop_data(old_buffer); - - if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode && - !buffer->consecutive)) { + if (!old_buffer || (might_overlap && !buffer->consecutive)) { b->consecutive = false; b->trace_nr = buffer->buffer_nr + 1; } else { b->consecutive = true; } - if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid || - ptq->tid != buffer->tid)) - intel_pt_use_buffer_pid_tid(ptq, queue, buffer); - if (ptq->step_through_buffers) ptq->stop = true; - if (!b->len) + if (b->len) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + ptq->old_buffer = buffer; + } else { + auxtrace_buffer__drop_data(buffer); return intel_pt_get_trace(b, data); + } return 0; } @@ -954,16 +927,15 @@ static int intel_pt_setup_queue(struct intel_pt *pt, ptq->cpu = queue->cpu; ptq->tid = queue->tid; - if (pt->sampling_mode) { - if (pt->timeless_decoding) - ptq->step_through_buffers = true; - if (pt->timeless_decoding || !pt->have_sched_switch) - ptq->use_buffer_pid_tid = true; - } + if (pt->sampling_mode && !pt->snapshot_mode && + pt->timeless_decoding) + ptq->step_through_buffers = true; + + ptq->sync_switch = pt->sync_switch; } if (!ptq->on_heap && - (!pt->sync_switch || + (!ptq->sync_switch || ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { const struct intel_pt_state *state; int ret; @@ -1546,7 +1518,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (pt->synth_opts.last_branch) intel_pt_update_last_branch_rb(ptq); - if (!pt->sync_switch) + if (!ptq->sync_switch) return 0; if (intel_pt_is_switch_ip(ptq, state->to_ip)) { @@ -1627,6 +1599,21 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) return switch_ip; } +static void intel_pt_enable_sync_switch(struct intel_pt *pt) +{ + unsigned int i; + + pt->sync_switch = true; + + for (i = 0; i < pt->queues.nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq) + ptq->sync_switch = true; + } +} + static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) { const struct intel_pt_state *state = ptq->state; @@ -1643,7 +1630,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (pt->switch_ip) { intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", pt->switch_ip, pt->ptss_ip); - pt->sync_switch = true; + intel_pt_enable_sync_switch(pt); } } } @@ -1659,9 +1646,9 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (state->err) { if (state->err == INTEL_PT_ERR_NODATA) return 1; - if (pt->sync_switch && + if (ptq->sync_switch && state->from_ip >= pt->kernel_start) { - pt->sync_switch = false; + ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } if (pt->synth_opts.errors) { @@ -1687,7 +1674,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) state->timestamp, state->est_timestamp); ptq->timestamp = state->est_timestamp; /* Use estimated TSC in unknown switch state */ - } else if (pt->sync_switch && + } else if (ptq->sync_switch && ptq->switch_state == INTEL_PT_SS_UNKNOWN && intel_pt_is_switch_ip(ptq, state->to_ip) && ptq->next_tid == -1) { @@ -1834,7 +1821,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, return 1; ptq = intel_pt_cpu_to_ptq(pt, cpu); - if (!ptq) + if (!ptq || !ptq->sync_switch) return 1; switch (ptq->switch_state) { @@ -2075,9 +2062,6 @@ static int intel_pt_process_auxtrace_event(struct perf_session *session, struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); - if (pt->sampling_mode) - return 0; - if (!pt->data_queued) { struct auxtrace_buffer *buffer; off_t data_offset; diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 4952b429caa7..1cca0a2fa641 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -433,6 +433,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, char serr[STRERR_BUFSIZE]; char *kbuild_dir = NULL, *kbuild_include_opts = NULL; const char *template = llvm_param.clang_bpf_cmd_template; + char *command_echo, *command_out; if (path[0] != '-' && realpath(path, abspath) == NULL) { err = errno; @@ -487,6 +488,16 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, (path[0] == '-') ? path : abspath); pr_debug("llvm compiling command template: %s\n", template); + + if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0) + goto errout; + + err = read_from_pipe(command_echo, (void **) &command_out, NULL); + if (err) + goto errout; + + pr_debug("llvm compiling command : %s\n", command_out); + err = read_from_pipe(template, &obj_buf, &obj_buf_sz); if (err) { pr_err("ERROR:\tunable to compile %s\n", path); @@ -497,6 +508,8 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, goto errout; } + free(command_echo); + free(command_out); free(kbuild_dir); free(kbuild_include_opts); @@ -509,6 +522,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, *p_obj_buf_sz = obj_buf_sz; return 0; errout: + free(command_echo); free(kbuild_dir); free(kbuild_include_opts); free(obj_buf); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b05a67464c03..2eca8478e24f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -48,8 +48,23 @@ static void machine__threads_init(struct machine *machine) } } +static int machine__set_mmap_name(struct machine *machine) +{ + if (machine__is_host(machine)) + machine->mmap_name = strdup("[kernel.kallsyms]"); + else if (machine__is_default_guest(machine)) + machine->mmap_name = strdup("[guest.kernel.kallsyms]"); + else if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]", + machine->pid) < 0) + machine->mmap_name = NULL; + + return machine->mmap_name ? 0 : -ENOMEM; +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { + int err = -ENOMEM; + memset(machine, 0, sizeof(*machine)); map_groups__init(&machine->kmaps, machine); RB_CLEAR_NODE(&machine->rb_node); @@ -73,13 +88,16 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) if (machine->root_dir == NULL) return -ENOMEM; + if (machine__set_mmap_name(machine)) + goto out; + if (pid != HOST_KERNEL_ID) { struct thread *thread = machine__findnew_thread(machine, -1, pid); char comm[64]; if (thread == NULL) - return -ENOMEM; + goto out; snprintf(comm, sizeof(comm), "[guest/%d]", pid); thread__set_comm(thread, comm, 0); @@ -87,7 +105,13 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) } machine->current_tid = NULL; + err = 0; +out: + if (err) { + zfree(&machine->root_dir); + zfree(&machine->mmap_name); + } return 0; } @@ -119,7 +143,7 @@ struct machine *machine__new_kallsyms(void) * ask for not using the kcore parsing code, once this one is fixed * to create a map per module. */ - if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) { + if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) { machine__delete(machine); machine = NULL; } @@ -180,6 +204,7 @@ void machine__exit(struct machine *machine) dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); + zfree(&machine->mmap_name); zfree(&machine->current_tid); for (i = 0; i < THREADS__TABLE_SIZE; i++) { @@ -322,20 +347,6 @@ void machines__process_guests(struct machines *machines, } } -char *machine__mmap_name(struct machine *machine, char *bf, size_t size) -{ - if (machine__is_host(machine)) - snprintf(bf, size, "[%s]", "kernel.kallsyms"); - else if (machine__is_default_guest(machine)) - snprintf(bf, size, "[%s]", "guest.kernel.kallsyms"); - else { - snprintf(bf, size, "[%s.%d]", "guest.kernel.kallsyms", - machine->pid); - } - - return bf; -} - void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size) { struct rb_node *node; @@ -771,24 +782,18 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) static struct dso *machine__get_kernel(struct machine *machine) { - const char *vmlinux_name = NULL; + const char *vmlinux_name = machine->mmap_name; struct dso *kernel; if (machine__is_host(machine)) { - vmlinux_name = symbol_conf.vmlinux_name; - if (!vmlinux_name) - vmlinux_name = DSO__NAME_KALLSYMS; + if (symbol_conf.vmlinux_name) + vmlinux_name = symbol_conf.vmlinux_name; kernel = machine__findnew_kernel(machine, vmlinux_name, "[kernel]", DSO_TYPE_KERNEL); } else { - char bf[PATH_MAX]; - - if (machine__is_default_guest(machine)) + if (symbol_conf.default_guest_vmlinux_name) vmlinux_name = symbol_conf.default_guest_vmlinux_name; - if (!vmlinux_name) - vmlinux_name = machine__mmap_name(machine, bf, - sizeof(bf)); kernel = machine__findnew_kernel(machine, vmlinux_name, "[guest.kernel]", @@ -849,13 +854,10 @@ static int machine__get_running_kernel_start(struct machine *machine, return 0; } -int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) +static int +__machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { int type; - u64 start = 0; - - if (machine__get_running_kernel_start(machine, NULL, &start)) - return -1; /* In case of renewal the kernel map, destroy previous one */ machine__destroy_kernel_maps(machine); @@ -864,7 +866,7 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) struct kmap *kmap; struct map *map; - machine->vmlinux_maps[type] = map__new2(start, kernel, type); + machine->vmlinux_maps[type] = map__new2(0, kernel, type); if (machine->vmlinux_maps[type] == NULL) return -1; @@ -987,11 +989,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid) return machine__create_kernel_maps(machine); } -int __machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type, bool no_kcore) +int machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type) { struct map *map = machine__kernel_map(machine); - int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore); + int ret = __dso__load_kallsyms(map->dso, filename, map, true); if (ret > 0) { dso__set_loaded(map->dso, type); @@ -1006,12 +1008,6 @@ int __machine__load_kallsyms(struct machine *machine, const char *filename, return ret; } -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type) -{ - return __machine__load_kallsyms(machine, filename, type, false); -} - int machine__load_vmlinux_path(struct machine *machine, enum map_type type) { struct map *map = machine__kernel_map(machine); @@ -1215,6 +1211,24 @@ static int machine__create_modules(struct machine *machine) return 0; } +static void machine__set_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + int i; + + for (i = 0; i < MAP__NR_TYPES; i++) { + machine->vmlinux_maps[i]->start = start; + machine->vmlinux_maps[i]->end = end; + + /* + * Be a bit paranoid here, some perf.data file came with + * a zero sized synthesized MMAP event for the kernel. + */ + if (start == 0 && end == 0) + machine->vmlinux_maps[i]->end = ~0ULL; + } +} + int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); @@ -1239,40 +1253,22 @@ int machine__create_kernel_maps(struct machine *machine) "continuing anyway...\n", machine->pid); } - /* - * Now that we have all the maps created, just set the ->end of them: - */ - map_groups__fixup_end(&machine->kmaps); - if (!machine__get_running_kernel_start(machine, &name, &addr)) { if (name && maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { machine__destroy_kernel_maps(machine); return -1; } + machine__set_kernel_mmap(machine, addr, 0); } + /* + * Now that we have all the maps created, just set the ->end of them: + */ + map_groups__fixup_end(&machine->kmaps); return 0; } -static void machine__set_kernel_mmap_len(struct machine *machine, - union perf_event *event) -{ - int i; - - for (i = 0; i < MAP__NR_TYPES; i++) { - machine->vmlinux_maps[i]->start = event->mmap.start; - machine->vmlinux_maps[i]->end = (event->mmap.start + - event->mmap.len); - /* - * Be a bit paranoid here, some perf.data file came with - * a zero sized synthesized MMAP event for the kernel. - */ - if (machine->vmlinux_maps[i]->end == 0) - machine->vmlinux_maps[i]->end = ~0ULL; - } -} - static bool machine__uses_kcore(struct machine *machine) { struct dso *dso; @@ -1289,7 +1285,6 @@ static int machine__process_kernel_mmap_event(struct machine *machine, union perf_event *event) { struct map *map; - char kmmap_prefix[PATH_MAX]; enum dso_kernel_type kernel_type; bool is_kernel_mmap; @@ -1297,15 +1292,14 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (machine__uses_kcore(machine)) return 0; - machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix)); if (machine__is_host(machine)) kernel_type = DSO_TYPE_KERNEL; else kernel_type = DSO_TYPE_GUEST_KERNEL; is_kernel_mmap = memcmp(event->mmap.filename, - kmmap_prefix, - strlen(kmmap_prefix) - 1) == 0; + machine->mmap_name, + strlen(machine->mmap_name) - 1) == 0; if (event->mmap.filename[0] == '/' || (!is_kernel_mmap && event->mmap.filename[0] == '[')) { map = machine__findnew_module_map(machine, event->mmap.start, @@ -1316,7 +1310,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, map->end = map->start + event->mmap.len; } else if (is_kernel_mmap) { const char *symbol_name = (event->mmap.filename + - strlen(kmmap_prefix)); + strlen(machine->mmap_name)); /* * Should be there already, from the build-id table in * the header. @@ -1357,7 +1351,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, up_read(&machine->dsos.lock); if (kernel == NULL) - kernel = machine__findnew_dso(machine, kmmap_prefix); + kernel = machine__findnew_dso(machine, machine->mmap_name); if (kernel == NULL) goto out_problem; @@ -1370,7 +1364,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); - machine__set_kernel_mmap_len(machine, event); + machine__set_kernel_mmap(machine, event->mmap.start, + event->mmap.start + event->mmap.len); /* * Avoid using a zero address (kptr_restrict) for the ref reloc @@ -1700,7 +1695,7 @@ static void ip__resolve_data(struct thread *thread, struct mem_info *sample__resolve_mem(struct perf_sample *sample, struct addr_location *al) { - struct mem_info *mi = zalloc(sizeof(*mi)); + struct mem_info *mi = mem_info__new(); if (!mi) return NULL; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 5ce860b64c74..66cc200ef86f 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -43,6 +43,7 @@ struct machine { bool comm_exec; bool kptr_restrict_warned; char *root_dir; + char *mmap_name; struct threads threads[THREADS__TABLE_SIZE]; struct vdso_info *vdso_info; struct perf_env *env; @@ -142,8 +143,6 @@ struct machine *machines__find(struct machines *machines, pid_t pid); struct machine *machines__findnew(struct machines *machines, pid_t pid); void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); -char *machine__mmap_name(struct machine *machine, char *bf, size_t size); - void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); @@ -226,8 +225,6 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); int arch__fix_module_text_start(u64 *start, const char *name); -int __machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type, bool no_kcore); int machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type); int machine__load_vmlinux_path(struct machine *machine, enum map_type type); @@ -239,7 +236,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); void machine__destroy_kernel_maps(struct machine *machine); -int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel); int machine__create_kernel_maps(struct machine *machine); int machines__create_kernel_maps(struct machines *machines, pid_t pid); diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c new file mode 100644 index 000000000000..c6fd81c02586 --- /dev/null +++ b/tools/perf/util/mem2node.c @@ -0,0 +1,134 @@ +#include <errno.h> +#include <inttypes.h> +#include <linux/bitmap.h> +#include "mem2node.h" +#include "util.h" + +struct phys_entry { + struct rb_node rb_node; + u64 start; + u64 end; + u64 node; +}; + +static void phys_entry__insert(struct phys_entry *entry, struct rb_root *root) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct phys_entry *e; + + while (*p != NULL) { + parent = *p; + e = rb_entry(parent, struct phys_entry, rb_node); + + if (entry->start < e->start) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&entry->rb_node, parent, p); + rb_insert_color(&entry->rb_node, root); +} + +static void +phys_entry__init(struct phys_entry *entry, u64 start, u64 bsize, u64 node) +{ + entry->start = start; + entry->end = start + bsize; + entry->node = node; + RB_CLEAR_NODE(&entry->rb_node); +} + +int mem2node__init(struct mem2node *map, struct perf_env *env) +{ + struct memory_node *n, *nodes = &env->memory_nodes[0]; + struct phys_entry *entries, *tmp_entries; + u64 bsize = env->memory_bsize; + int i, j = 0, max = 0; + + memset(map, 0x0, sizeof(*map)); + map->root = RB_ROOT; + + for (i = 0; i < env->nr_memory_nodes; i++) { + n = &nodes[i]; + max += bitmap_weight(n->set, n->size); + } + + entries = zalloc(sizeof(*entries) * max); + if (!entries) + return -ENOMEM; + + for (i = 0; i < env->nr_memory_nodes; i++) { + u64 bit; + + n = &nodes[i]; + + for (bit = 0; bit < n->size; bit++) { + u64 start; + + if (!test_bit(bit, n->set)) + continue; + + start = bit * bsize; + + /* + * Merge nearby areas, we walk in order + * through the bitmap, so no need to sort. + */ + if (j > 0) { + struct phys_entry *prev = &entries[j - 1]; + + if ((prev->end == start) && + (prev->node == n->node)) { + prev->end += bsize; + continue; + } + } + + phys_entry__init(&entries[j++], start, bsize, n->node); + } + } + + /* Cut unused entries, due to merging. */ + tmp_entries = realloc(entries, sizeof(*entries) * j); + if (tmp_entries) + entries = tmp_entries; + + for (i = 0; i < j; i++) { + pr_debug("mem2node %03" PRIu64 " [0x%016" PRIx64 "-0x%016" PRIx64 "]\n", + entries[i].node, entries[i].start, entries[i].end); + + phys_entry__insert(&entries[i], &map->root); + } + + map->entries = entries; + return 0; +} + +void mem2node__exit(struct mem2node *map) +{ + zfree(&map->entries); +} + +int mem2node__node(struct mem2node *map, u64 addr) +{ + struct rb_node **p, *parent = NULL; + struct phys_entry *entry; + + p = &map->root.rb_node; + while (*p != NULL) { + parent = *p; + entry = rb_entry(parent, struct phys_entry, rb_node); + if (addr < entry->start) + p = &(*p)->rb_left; + else if (addr >= entry->end) + p = &(*p)->rb_right; + else + goto out; + } + + entry = NULL; +out: + return entry ? (int) entry->node : -1; +} diff --git a/tools/perf/util/mem2node.h b/tools/perf/util/mem2node.h new file mode 100644 index 000000000000..59c4752a2181 --- /dev/null +++ b/tools/perf/util/mem2node.h @@ -0,0 +1,19 @@ +#ifndef __MEM2NODE_H +#define __MEM2NODE_H + +#include <linux/rbtree.h> +#include "env.h" + +struct phys_entry; + +struct mem2node { + struct rb_root root; + struct phys_entry *entries; + int cnt; +}; + +int mem2node__init(struct mem2node *map, struct perf_env *env); +void mem2node__exit(struct mem2node *map); +int mem2node__node(struct mem2node *map, u64 addr); + +#endif /* __MEM2NODE_H */ diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 91531a7c8fbf..fc832676a798 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -64,25 +64,6 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, } /* - * legacy interface for mmap read. - * Don't use it. Use perf_mmap__read_event(). - */ -union perf_event *perf_mmap__read_forward(struct perf_mmap *map) -{ - u64 head; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->refcnt)) - return NULL; - - head = perf_mmap__read_head(map); - - return perf_mmap__read(map, &map->prev, head); -} - -/* * Read event from ring buffer one by one. * Return one event for each call. * @@ -94,9 +75,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map) * } * perf_mmap__read_done() */ -union perf_event *perf_mmap__read_event(struct perf_mmap *map, - bool overwrite, - u64 *startp, u64 end) +union perf_event *perf_mmap__read_event(struct perf_mmap *map) { union perf_event *event; @@ -106,17 +85,14 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map, if (!refcount_read(&map->refcnt)) return NULL; - if (startp == NULL) - return NULL; - /* non-overwirte doesn't pause the ringbuffer */ - if (!overwrite) - end = perf_mmap__read_head(map); + if (!map->overwrite) + map->end = perf_mmap__read_head(map); - event = perf_mmap__read(map, startp, end); + event = perf_mmap__read(map, &map->start, map->end); - if (!overwrite) - map->prev = *startp; + if (!map->overwrite) + map->prev = map->start; return event; } @@ -139,9 +115,9 @@ void perf_mmap__put(struct perf_mmap *map) perf_mmap__munmap(map); } -void perf_mmap__consume(struct perf_mmap *map, bool overwrite) +void perf_mmap__consume(struct perf_mmap *map) { - if (!overwrite) { + if (!map->overwrite) { u64 old = map->prev; perf_mmap__write_tail(map, old); @@ -191,7 +167,7 @@ void perf_mmap__munmap(struct perf_mmap *map) int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) { /* - * The last one will be done at perf_evlist__mmap_consume(), so that we + * The last one will be done at perf_mmap__consume(), so that we * make sure we don't prevent tools from consuming every last event in * the ring buffer. * @@ -223,19 +199,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) return 0; } -static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) { struct perf_event_header *pheader; - u64 evt_head = head; + u64 evt_head = *start; int size = mask + 1; - pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); - pheader = (struct perf_event_header *)(buf + (head & mask)); - *start = head; + pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); + pheader = (struct perf_event_header *)(buf + (*start & mask)); while (true) { - if (evt_head - head >= (unsigned int)size) { + if (evt_head - *start >= (unsigned int)size) { pr_debug("Finished reading overwrite ring buffer: rewind\n"); - if (evt_head - head > (unsigned int)size) + if (evt_head - *start > (unsigned int)size) evt_head -= pheader->size; *end = evt_head; return 0; @@ -259,27 +234,26 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6 /* * Report the start and end of the available data in ringbuffer */ -int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, - u64 *startp, u64 *endp) +static int __perf_mmap__read_init(struct perf_mmap *md) { u64 head = perf_mmap__read_head(md); u64 old = md->prev; unsigned char *data = md->base + page_size; unsigned long size; - *startp = overwrite ? head : old; - *endp = overwrite ? old : head; + md->start = md->overwrite ? head : old; + md->end = md->overwrite ? old : head; - if (*startp == *endp) + if (md->start == md->end) return -EAGAIN; - size = *endp - *startp; + size = md->end - md->start; if (size > (unsigned long)(md->mask) + 1) { - if (!overwrite) { + if (!md->overwrite) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->prev = head; - perf_mmap__consume(md, overwrite); + perf_mmap__consume(md); return -EAGAIN; } @@ -287,33 +261,43 @@ int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (overwrite_rb_find_range(data, md->mask, head, startp, endp)) + if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) return -EINVAL; } return 0; } -int perf_mmap__push(struct perf_mmap *md, bool overwrite, - void *to, int push(void *to, void *buf, size_t size)) +int perf_mmap__read_init(struct perf_mmap *map) +{ + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return -ENOENT; + + return __perf_mmap__read_init(map); +} + +int perf_mmap__push(struct perf_mmap *md, void *to, + int push(void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); - u64 end, start; unsigned char *data = md->base + page_size; unsigned long size; void *buf; int rc = 0; - rc = perf_mmap__read_init(md, overwrite, &start, &end); + rc = perf_mmap__read_init(md); if (rc < 0) return (rc == -EAGAIN) ? 0 : -1; - size = end - start; + size = md->end - md->start; - if ((start & md->mask) + size != (end & md->mask)) { - buf = &data[start & md->mask]; - size = md->mask + 1 - (start & md->mask); - start += size; + if ((md->start & md->mask) + size != (md->end & md->mask)) { + buf = &data[md->start & md->mask]; + size = md->mask + 1 - (md->start & md->mask); + md->start += size; if (push(to, buf, size) < 0) { rc = -1; @@ -321,9 +305,9 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, } } - buf = &data[start & md->mask]; - size = end - start; - start += size; + buf = &data[md->start & md->mask]; + size = md->end - md->start; + md->start += size; if (push(to, buf, size) < 0) { rc = -1; @@ -331,7 +315,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, } md->prev = head; - perf_mmap__consume(md, overwrite); + perf_mmap__consume(md); out: return rc; } @@ -344,5 +328,11 @@ out: */ void perf_mmap__read_done(struct perf_mmap *map) { + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return; + map->prev = perf_mmap__read_head(map); } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index ec7d3a24e276..d82294db1295 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -20,6 +20,9 @@ struct perf_mmap { int fd; refcount_t refcnt; u64 prev; + u64 start; + u64 end; + bool overwrite; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; @@ -63,7 +66,7 @@ void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); void perf_mmap__put(struct perf_mmap *map); -void perf_mmap__consume(struct perf_mmap *map, bool overwrite); +void perf_mmap__consume(struct perf_mmap *map); static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { @@ -86,16 +89,13 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) union perf_event *perf_mmap__read_forward(struct perf_mmap *map); -union perf_event *perf_mmap__read_event(struct perf_mmap *map, - bool overwrite, - u64 *startp, u64 end); +union perf_event *perf_mmap__read_event(struct perf_mmap *map); -int perf_mmap__push(struct perf_mmap *md, bool backward, - void *to, int push(void *to, void *buf, size_t size)); +int perf_mmap__push(struct perf_mmap *md, void *to, + int push(void *to, void *buf, size_t size)); size_t perf_mmap__mmap_len(struct perf_mmap *map); -int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, - u64 *startp, u64 *endp); +int perf_mmap__read_init(struct perf_mmap *md); void perf_mmap__read_done(struct perf_mmap *map); #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 34589c427e52..2fb0272146d8 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -206,8 +206,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) for_each_event(sys_dirent, evt_dir, evt_dirent) { - snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, - evt_dirent->d_name); + scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, + evt_dirent->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) continue; @@ -1217,7 +1217,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, get_config_name(head_config), &config_terms); } -static int __parse_events_add_pmu(struct parse_events_state *parse_state, +int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, bool auto_merge_stats) { @@ -1247,7 +1247,12 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); - return evsel ? 0 : -ENOMEM; + if (evsel) { + evsel->pmu_name = name; + return 0; + } else { + return -ENOMEM; + } } if (perf_pmu__check_alias(pmu, head_config, &info)) @@ -1276,18 +1281,12 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, evsel->snapshot = info.snapshot; evsel->metric_expr = info.metric_expr; evsel->metric_name = info.metric_name; + evsel->pmu_name = name; } return evsel ? 0 : -ENOMEM; } -int parse_events_add_pmu(struct parse_events_state *parse_state, - struct list_head *list, char *name, - struct list_head *head_config) -{ - return __parse_events_add_pmu(parse_state, list, name, head_config, false); -} - int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, struct list_head **listp) { @@ -1317,8 +1316,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, return -1; list_add_tail(&term->list, head); - if (!__parse_events_add_pmu(parse_state, list, - pmu->name, head, true)) { + if (!parse_events_add_pmu(parse_state, list, + pmu->name, head, true)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 88108cd11b4c..5015cfd58277 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -167,7 +167,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, - struct list_head *head_config); + struct list_head *head_config, bool auto_merge_stats); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 655ecff636a8..a1a01b1ac8b8 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -175,7 +175,7 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?][a-zA-Z0-9_*?.]* +name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index e81a20ea8d7d..7afeb80cc39e 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -8,6 +8,7 @@ #define YYDEBUG 1 +#include <fnmatch.h> #include <linux/compiler.h> #include <linux/list.h> #include <linux/types.h> @@ -231,9 +232,13 @@ PE_NAME opt_event_config YYABORT; ALLOC_LIST(list); - if (parse_events_add_pmu(_parse_state, list, $1, $2)) { + if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) { struct perf_pmu *pmu = NULL; int ok = 0; + char *pattern; + + if (asprintf(&pattern, "%s*", $1) < 0) + YYABORT; while ((pmu = perf_pmu__scan(pmu)) != NULL) { char *name = pmu->name; @@ -241,14 +246,19 @@ PE_NAME opt_event_config if (!strncmp(name, "uncore_", 7) && strncmp($1, "uncore_", 7)) name += 7; - if (!strncmp($1, name, strlen($1))) { - if (parse_events_copy_term_list(orig_terms, &terms)) + if (!fnmatch(pattern, name, 0)) { + if (parse_events_copy_term_list(orig_terms, &terms)) { + free(pattern); YYABORT; - if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms)) + } + if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true)) ok++; parse_events_terms__delete(terms); } } + + free(pattern); + if (!ok) YYABORT; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 57e38fdf0b34..064bdcb7bd78 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -351,7 +351,7 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) if (pmu_alias_info_file(name)) continue; - snprintf(path, PATH_MAX, "%s/%s", dir, name); + scnprintf(path, PATH_MAX, "%s/%s", dir, name); file = fopen(path, "r"); if (!file) { @@ -576,6 +576,34 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return NULL; } +/* Return zero when the cpuid from the mapfile.csv matches the + * cpuid string generated on this platform. + * Otherwise return non-zero. + */ +int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) +{ + regex_t re; + regmatch_t pmatch[1]; + int match; + + if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", mapcpuid); + return 1; + } + + match = !regexec(&re, cpuid, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + + /* Verify the entire string matched. */ + if (match_len == strlen(cpuid)) + return 0; + } + return 1; +} + static char *perf_pmu__getcpuid(struct perf_pmu *pmu) { char *cpuid; @@ -610,31 +638,14 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) i = 0; for (;;) { - regex_t re; - regmatch_t pmatch[1]; - int match; - map = &pmu_events_map[i++]; if (!map->table) { map = NULL; break; } - if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { - /* Warn unable to generate match particular string. */ - pr_info("Invalid regular expression %s\n", map->cpuid); + if (!strcmp_cpuid_str(map->cpuid, cpuid)) break; - } - - match = !regexec(&re, cpuid, 1, pmatch, 0); - regfree(&re); - if (match) { - size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); - - /* Verify the entire string matched. */ - if (match_len == strlen(cpuid)) - break; - } } free(cpuid); return map; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index a5731de0e5eb..c37fbef1711d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -423,20 +423,20 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, pr_warning("Failed to get the type of %s.\n", varname); return -ENOENT; } - pr_debug2("Var real type: (%x)\n", (unsigned)dwarf_dieoffset(&type)); + pr_debug2("Var real type: %s (%x)\n", dwarf_diename(&type), + (unsigned)dwarf_dieoffset(&type)); tag = dwarf_tag(&type); if (field->name[0] == '[' && (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) { - if (field->next) - /* Save original type for next field */ - memcpy(die_mem, &type, sizeof(*die_mem)); + /* Save original type for next field or type */ + memcpy(die_mem, &type, sizeof(*die_mem)); /* Get the type of this array */ if (die_get_real_type(&type, &type) == NULL) { pr_warning("Failed to get the type of %s.\n", varname); return -ENOENT; } - pr_debug2("Array real type: (%x)\n", + pr_debug2("Array real type: %s (%x)\n", dwarf_diename(&type), (unsigned)dwarf_dieoffset(&type)); if (tag == DW_TAG_pointer_type) { ref = zalloc(sizeof(struct probe_trace_arg_ref)); @@ -448,9 +448,6 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, *ref_ptr = ref; } ref->offset += dwarf_bytesize(&type) * field->index; - if (!field->next) - /* Save vr_die for converting types */ - memcpy(die_mem, vr_die, sizeof(*die_mem)); goto next; } else if (tag == DW_TAG_pointer_type) { /* Check the pointer and dereference */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index b1e999bd21ef..863b61478edd 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -12,6 +12,30 @@ #include "print_binary.h" #include "thread_map.h" +#if PY_MAJOR_VERSION < 3 +#define _PyUnicode_FromString(arg) \ + PyString_FromString(arg) +#define _PyUnicode_AsString(arg) \ + PyString_AsString(arg) +#define _PyUnicode_FromFormat(...) \ + PyString_FromFormat(__VA_ARGS__) +#define _PyLong_FromLong(arg) \ + PyInt_FromLong(arg) + +#else + +#define _PyUnicode_FromString(arg) \ + PyUnicode_FromString(arg) +#define _PyUnicode_FromFormat(...) \ + PyUnicode_FromFormat(__VA_ARGS__) +#define _PyLong_FromLong(arg) \ + PyLong_FromLong(arg) +#endif + +#ifndef Py_TYPE +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif + /* * Provide these two so that we don't have to link against callchain.c and * start dragging hist.c, etc. @@ -49,7 +73,11 @@ int eprintf(int level, int var, const char *fmt, ...) # define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, #endif +#if PY_MAJOR_VERSION < 3 PyMODINIT_FUNC initperf(void); +#else +PyMODINIT_FUNC PyInit_perf(void); +#endif #define member_def(type, member, ptype, help) \ { #member, ptype, \ @@ -107,7 +135,7 @@ static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent) pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) { ret = PyErr_NoMemory(); } else { - ret = PyString_FromString(s); + ret = _PyUnicode_FromString(s); free(s); } return ret; @@ -138,7 +166,7 @@ static PyMemberDef pyrf_task_event__members[] = { static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent) { - return PyString_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, " + return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, " "ptid: %u, time: %" PRIu64 "}", pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit", pevent->event.fork.pid, @@ -171,7 +199,7 @@ static PyMemberDef pyrf_comm_event__members[] = { static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent) { - return PyString_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }", + return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }", pevent->event.comm.pid, pevent->event.comm.tid, pevent->event.comm.comm); @@ -202,7 +230,7 @@ static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent) { struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1); - return PyString_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64 + return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64 ", stream_id: %" PRIu64 " }", pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un", te->time, te->id, te->stream_id); @@ -237,7 +265,7 @@ static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent) pevent->event.lost.id, pevent->event.lost.lost) < 0) { ret = PyErr_NoMemory(); } else { - ret = PyString_FromString(s); + ret = _PyUnicode_FromString(s); free(s); } return ret; @@ -264,7 +292,7 @@ static PyMemberDef pyrf_read_event__members[] = { static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent) { - return PyString_FromFormat("{ type: read, pid: %u, tid: %u }", + return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }", pevent->event.read.pid, pevent->event.read.tid); /* @@ -299,7 +327,7 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent) if (asprintf(&s, "{ type: sample }") < 0) { ret = PyErr_NoMemory(); } else { - ret = PyString_FromString(s); + ret = _PyUnicode_FromString(s); free(s); } return ret; @@ -330,7 +358,7 @@ tracepoint_field(struct pyrf_event *pe, struct format_field *field) } if (field->flags & FIELD_IS_STRING && is_printable_array(data + offset, len)) { - ret = PyString_FromString((char *)data + offset); + ret = _PyUnicode_FromString((char *)data + offset); } else { ret = PyByteArray_FromStringAndSize((const char *) data + offset, len); field->flags &= ~FIELD_IS_STRING; @@ -352,7 +380,7 @@ tracepoint_field(struct pyrf_event *pe, struct format_field *field) static PyObject* get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) { - const char *str = PyString_AsString(PyObject_Str(attr_name)); + const char *str = _PyUnicode_AsString(PyObject_Str(attr_name)); struct perf_evsel *evsel = pevent->evsel; struct format_field *field; @@ -416,7 +444,7 @@ static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent) !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) { ret = PyErr_NoMemory(); } else { - ret = PyString_FromString(s); + ret = _PyUnicode_FromString(s); free(s); } return ret; @@ -528,7 +556,7 @@ static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus, static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus) { cpu_map__put(pcpus->cpus); - pcpus->ob_type->tp_free((PyObject*)pcpus); + Py_TYPE(pcpus)->tp_free((PyObject*)pcpus); } static Py_ssize_t pyrf_cpu_map__length(PyObject *obj) @@ -597,7 +625,7 @@ static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads) { thread_map__put(pthreads->threads); - pthreads->ob_type->tp_free((PyObject*)pthreads); + Py_TYPE(pthreads)->tp_free((PyObject*)pthreads); } static Py_ssize_t pyrf_thread_map__length(PyObject *obj) @@ -759,7 +787,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, static void pyrf_evsel__delete(struct pyrf_evsel *pevsel) { perf_evsel__exit(&pevsel->evsel); - pevsel->ob_type->tp_free((PyObject*)pevsel); + Py_TYPE(pevsel)->tp_free((PyObject*)pevsel); } static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, @@ -850,7 +878,7 @@ static int pyrf_evlist__init(struct pyrf_evlist *pevlist, static void pyrf_evlist__delete(struct pyrf_evlist *pevlist) { perf_evlist__exit(&pevlist->evlist); - pevlist->ob_type->tp_free((PyObject*)pevlist); + Py_TYPE(pevlist)->tp_free((PyObject*)pevlist); } static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, @@ -902,12 +930,16 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, for (i = 0; i < evlist->pollfd.nr; ++i) { PyObject *file; +#if PY_MAJOR_VERSION < 3 FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r"); if (fp == NULL) goto free_list; file = PyFile_FromFile(fp, "perf", "r", NULL); +#else + file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1); +#endif if (file == NULL) goto free_list; @@ -951,13 +983,18 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, union perf_event *event; int sample_id_all = 1, cpu; static char *kwlist[] = { "cpu", "sample_id_all", NULL }; + struct perf_mmap *md; int err; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, &cpu, &sample_id_all)) return NULL; - event = perf_evlist__mmap_read(evlist, cpu); + md = &evlist->mmap[cpu]; + if (perf_mmap__read_init(md) < 0) + goto end; + + event = perf_mmap__read_event(md); if (event != NULL) { PyObject *pyevent = pyrf_event__new(event); struct pyrf_event *pevent = (struct pyrf_event *)pyevent; @@ -967,22 +1004,24 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, return PyErr_NoMemory(); evsel = perf_evlist__event2evsel(evlist, event); - if (!evsel) + if (!evsel) { + Py_INCREF(Py_None); return Py_None; + } pevent->evsel = evsel; err = perf_evsel__parse_sample(evsel, event, &pevent->sample); /* Consume the even only after we parsed it out. */ - perf_evlist__mmap_consume(evlist, cpu); + perf_mmap__consume(md); if (err) return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); return pyevent; } - +end: Py_INCREF(Py_None); return Py_None; } @@ -1194,9 +1233,9 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, tp_format = trace_event__tp_format(sys, name); if (IS_ERR(tp_format)) - return PyInt_FromLong(-1); + return _PyLong_FromLong(-1); - return PyInt_FromLong(tp_format->id); + return _PyLong_FromLong(tp_format->id); } static PyMethodDef perf__methods[] = { @@ -1209,11 +1248,31 @@ static PyMethodDef perf__methods[] = { { .ml_name = NULL, } }; +#if PY_MAJOR_VERSION < 3 PyMODINIT_FUNC initperf(void) +#else +PyMODINIT_FUNC PyInit_perf(void) +#endif { PyObject *obj; int i; - PyObject *dict, *module = Py_InitModule("perf", perf__methods); + PyObject *dict; +#if PY_MAJOR_VERSION < 3 + PyObject *module = Py_InitModule("perf", perf__methods); +#else + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "perf", /* m_name */ + "", /* m_doc */ + -1, /* m_size */ + perf__methods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL, /* m_free */ + }; + PyObject *module = PyModule_Create(&moduledef); +#endif if (module == NULL || pyrf_event__setup_types() < 0 || @@ -1221,7 +1280,11 @@ PyMODINIT_FUNC initperf(void) pyrf_evsel__setup_types() < 0 || pyrf_thread_map__setup_types() < 0 || pyrf_cpu_map__setup_types() < 0) +#if PY_MAJOR_VERSION < 3 return; +#else + return module; +#endif /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); @@ -1270,7 +1333,7 @@ PyMODINIT_FUNC initperf(void) goto error; for (i = 0; perf__constants[i].name != NULL; i++) { - obj = PyInt_FromLong(perf__constants[i].value); + obj = _PyLong_FromLong(perf__constants[i].value); if (obj == NULL) goto error; PyDict_SetItemString(dict, perf__constants[i].name, obj); @@ -1280,6 +1343,9 @@ PyMODINIT_FUNC initperf(void) error: if (PyErr_Occurred()) PyErr_SetString(PyExc_ImportError, "perf: Init failed!"); +#if PY_MAJOR_VERSION >= 3 + return module; +#endif } /* diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 1e97937b03a9..9cfc7bf16531 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -5,6 +5,7 @@ #include "parse-events.h" #include <errno.h> #include <api/fs/fs.h> +#include <subcmd/parse-options.h> #include "util.h" #include "cloexec.h" @@ -137,6 +138,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts, struct perf_evsel *evsel; bool use_sample_identifier = false; bool use_comm_exec; + bool sample_id = opts->sample_id; /* * Set the evsel leader links before we configure attributes, @@ -163,8 +165,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts, * match the id. */ use_sample_identifier = perf_can_sample_identifier(); - evlist__for_each_entry(evlist, evsel) - perf_evsel__set_sample_id(evsel, use_sample_identifier); + sample_id = true; } else if (evlist->nr_entries > 1) { struct perf_evsel *first = perf_evlist__first(evlist); @@ -174,6 +175,10 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts, use_sample_identifier = perf_can_sample_identifier(); break; } + sample_id = true; + } + + if (sample_id) { evlist__for_each_entry(evlist, evsel) perf_evsel__set_sample_id(evsel, use_sample_identifier); } @@ -215,11 +220,21 @@ static int record_opts__config_freq(struct record_opts *opts) * User specified frequency is over current maximum. */ if (user_freq && (max_rate < opts->freq)) { - pr_err("Maximum frequency rate (%u) reached.\n" - "Please use -F freq option with lower value or consider\n" - "tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n", - max_rate); - return -1; + if (opts->strict_freq) { + pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n" + " Please use -F freq option with a lower value or consider\n" + " tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n", + max_rate); + return -1; + } else { + pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n" + " The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n" + " The kernel will lower it when perf's interrupts take too long.\n" + " Use --strict-freq to disable this throttling, refusing to record.\n", + max_rate, opts->freq, max_rate); + + opts->freq = max_rate; + } } /* @@ -287,3 +302,25 @@ out_delete: perf_evlist__delete(temp_evlist); return ret; } + +int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused) +{ + unsigned int freq; + struct record_opts *opts = opt->value; + + if (!str) + return -EINVAL; + + if (strcasecmp(str, "max") == 0) { + if (get_max_rate(&freq)) { + pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n"); + return -1; + } + pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq); + } else { + freq = atoi(str); + } + + opts->user_freq = freq; + return 0; +} diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index ea070883c593..10dd5fce082b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -49,7 +49,37 @@ #include "print_binary.h" #include "stat.h" +#if PY_MAJOR_VERSION < 3 +#define _PyUnicode_FromString(arg) \ + PyString_FromString(arg) +#define _PyUnicode_FromStringAndSize(arg1, arg2) \ + PyString_FromStringAndSize((arg1), (arg2)) +#define _PyBytes_FromStringAndSize(arg1, arg2) \ + PyString_FromStringAndSize((arg1), (arg2)) +#define _PyLong_FromLong(arg) \ + PyInt_FromLong(arg) +#define _PyLong_AsLong(arg) \ + PyInt_AsLong(arg) +#define _PyCapsule_New(arg1, arg2, arg3) \ + PyCObject_FromVoidPtr((arg1), (arg2)) + PyMODINIT_FUNC initperf_trace_context(void); +#else +#define _PyUnicode_FromString(arg) \ + PyUnicode_FromString(arg) +#define _PyUnicode_FromStringAndSize(arg1, arg2) \ + PyUnicode_FromStringAndSize((arg1), (arg2)) +#define _PyBytes_FromStringAndSize(arg1, arg2) \ + PyBytes_FromStringAndSize((arg1), (arg2)) +#define _PyLong_FromLong(arg) \ + PyLong_FromLong(arg) +#define _PyLong_AsLong(arg) \ + PyLong_AsLong(arg) +#define _PyCapsule_New(arg1, arg2, arg3) \ + PyCapsule_New((arg1), (arg2), (arg3)) + +PyMODINIT_FUNC PyInit_perf_trace_context(void); +#endif #define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(unsigned short) * 8)) - 1) @@ -135,7 +165,7 @@ static int get_argument_count(PyObject *handler) PyObject *arg_count_obj = PyObject_GetAttrString(code_obj, "co_argcount"); if (arg_count_obj) { - arg_count = (int) PyInt_AsLong(arg_count_obj); + arg_count = (int) _PyLong_AsLong(arg_count_obj); Py_DECREF(arg_count_obj); } Py_DECREF(code_obj); @@ -182,10 +212,10 @@ static void define_value(enum print_arg_type field_type, value = eval_flag(field_value); - PyTuple_SetItem(t, n++, PyString_FromString(ev_name)); - PyTuple_SetItem(t, n++, PyString_FromString(field_name)); - PyTuple_SetItem(t, n++, PyInt_FromLong(value)); - PyTuple_SetItem(t, n++, PyString_FromString(field_str)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(value)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_str)); try_call_object(handler_name, t); @@ -223,10 +253,10 @@ static void define_field(enum print_arg_type field_type, if (!t) Py_FatalError("couldn't create Python tuple"); - PyTuple_SetItem(t, n++, PyString_FromString(ev_name)); - PyTuple_SetItem(t, n++, PyString_FromString(field_name)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name)); if (field_type == PRINT_FLAGS) - PyTuple_SetItem(t, n++, PyString_FromString(delim)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim)); try_call_object(handler_name, t); @@ -325,12 +355,12 @@ static PyObject *get_field_numeric_entry(struct event_format *event, if (field->flags & FIELD_IS_SIGNED) { if ((long long)val >= LONG_MIN && (long long)val <= LONG_MAX) - obj = PyInt_FromLong(val); + obj = _PyLong_FromLong(val); else obj = PyLong_FromLongLong(val); } else { if (val <= LONG_MAX) - obj = PyInt_FromLong(val); + obj = _PyLong_FromLong(val); else obj = PyLong_FromUnsignedLongLong(val); } @@ -389,9 +419,9 @@ static PyObject *python_process_callchain(struct perf_sample *sample, pydict_set_item_string_decref(pysym, "end", PyLong_FromUnsignedLongLong(node->sym->end)); pydict_set_item_string_decref(pysym, "binding", - PyInt_FromLong(node->sym->binding)); + _PyLong_FromLong(node->sym->binding)); pydict_set_item_string_decref(pysym, "name", - PyString_FromStringAndSize(node->sym->name, + _PyUnicode_FromStringAndSize(node->sym->name, node->sym->namelen)); pydict_set_item_string_decref(pyelem, "sym", pysym); } @@ -406,7 +436,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, dsoname = map->dso->name; } pydict_set_item_string_decref(pyelem, "dso", - PyString_FromString(dsoname)); + _PyUnicode_FromString(dsoname)); } callchain_cursor_advance(&callchain_cursor); @@ -483,16 +513,16 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, if (!dict_sample) Py_FatalError("couldn't create Python dictionary"); - pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); - pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); + pydict_set_item_string_decref(dict, "attr", _PyUnicode_FromStringAndSize( (const char *)&evsel->attr, sizeof(evsel->attr))); pydict_set_item_string_decref(dict_sample, "pid", - PyInt_FromLong(sample->pid)); + _PyLong_FromLong(sample->pid)); pydict_set_item_string_decref(dict_sample, "tid", - PyInt_FromLong(sample->tid)); + _PyLong_FromLong(sample->tid)); pydict_set_item_string_decref(dict_sample, "cpu", - PyInt_FromLong(sample->cpu)); + _PyLong_FromLong(sample->cpu)); pydict_set_item_string_decref(dict_sample, "ip", PyLong_FromUnsignedLongLong(sample->ip)); pydict_set_item_string_decref(dict_sample, "time", @@ -504,17 +534,17 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict, "sample", dict_sample); - pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize( (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", - PyString_FromString(thread__comm_str(al->thread))); + _PyUnicode_FromString(thread__comm_str(al->thread))); if (al->map) { pydict_set_item_string_decref(dict, "dso", - PyString_FromString(al->map->dso->name)); + _PyUnicode_FromString(al->map->dso->name)); } if (al->sym) { pydict_set_item_string_decref(dict, "symbol", - PyString_FromString(al->sym->name)); + _PyUnicode_FromString(al->sym->name)); } pydict_set_item_string_decref(dict, "callchain", callchain); @@ -574,9 +604,9 @@ static void python_process_tracepoint(struct perf_sample *sample, scripting_context->event_data = data; scripting_context->pevent = evsel->tp_format->pevent; - context = PyCObject_FromVoidPtr(scripting_context, NULL); + context = _PyCapsule_New(scripting_context, NULL, NULL); - PyTuple_SetItem(t, n++, PyString_FromString(handler_name)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(handler_name)); PyTuple_SetItem(t, n++, context); /* ip unwinding */ @@ -585,18 +615,18 @@ static void python_process_tracepoint(struct perf_sample *sample, Py_INCREF(callchain); if (!dict) { - PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); - PyTuple_SetItem(t, n++, PyInt_FromLong(s)); - PyTuple_SetItem(t, n++, PyInt_FromLong(ns)); - PyTuple_SetItem(t, n++, PyInt_FromLong(pid)); - PyTuple_SetItem(t, n++, PyString_FromString(comm)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(s)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(ns)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(pid)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(comm)); PyTuple_SetItem(t, n++, callchain); } else { - pydict_set_item_string_decref(dict, "common_cpu", PyInt_FromLong(cpu)); - pydict_set_item_string_decref(dict, "common_s", PyInt_FromLong(s)); - pydict_set_item_string_decref(dict, "common_ns", PyInt_FromLong(ns)); - pydict_set_item_string_decref(dict, "common_pid", PyInt_FromLong(pid)); - pydict_set_item_string_decref(dict, "common_comm", PyString_FromString(comm)); + pydict_set_item_string_decref(dict, "common_cpu", _PyLong_FromLong(cpu)); + pydict_set_item_string_decref(dict, "common_s", _PyLong_FromLong(s)); + pydict_set_item_string_decref(dict, "common_ns", _PyLong_FromLong(ns)); + pydict_set_item_string_decref(dict, "common_pid", _PyLong_FromLong(pid)); + pydict_set_item_string_decref(dict, "common_comm", _PyUnicode_FromString(comm)); pydict_set_item_string_decref(dict, "common_callchain", callchain); } for (field = event->format.fields; field; field = field->next) { @@ -615,7 +645,7 @@ static void python_process_tracepoint(struct perf_sample *sample, } if (field->flags & FIELD_IS_STRING && is_printable_array(data + offset, len)) { - obj = PyString_FromString((char *) data + offset); + obj = _PyUnicode_FromString((char *) data + offset); } else { obj = PyByteArray_FromStringAndSize((const char *) data + offset, len); field->flags &= ~FIELD_IS_STRING; @@ -668,7 +698,7 @@ static PyObject *tuple_new(unsigned int sz) static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val) { #if BITS_PER_LONG == 64 - return PyTuple_SetItem(t, pos, PyInt_FromLong(val)); + return PyTuple_SetItem(t, pos, _PyLong_FromLong(val)); #endif #if BITS_PER_LONG == 32 return PyTuple_SetItem(t, pos, PyLong_FromLongLong(val)); @@ -677,12 +707,12 @@ static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val) static int tuple_set_s32(PyObject *t, unsigned int pos, s32 val) { - return PyTuple_SetItem(t, pos, PyInt_FromLong(val)); + return PyTuple_SetItem(t, pos, _PyLong_FromLong(val)); } static int tuple_set_string(PyObject *t, unsigned int pos, const char *s) { - return PyTuple_SetItem(t, pos, PyString_FromString(s)); + return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s)); } static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel) @@ -1029,8 +1059,8 @@ process_stat(struct perf_evsel *counter, int cpu, int thread, u64 tstamp, return; } - PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); - PyTuple_SetItem(t, n++, PyInt_FromLong(thread)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(thread)); tuple_set_u64(t, n++, tstamp); tuple_set_u64(t, n++, count->val); @@ -1212,27 +1242,58 @@ static void set_table_handlers(struct tables *tables) SET_TABLE_HANDLER(call_return); } +#if PY_MAJOR_VERSION < 3 +static void _free_command_line(const char **command_line, int num) +{ + free(command_line); +} +#else +static void _free_command_line(wchar_t **command_line, int num) +{ + int i; + for (i = 0; i < num; i++) + PyMem_RawFree(command_line[i]); + free(command_line); +} +#endif + + /* * Start trace script */ static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; +#if PY_MAJOR_VERSION < 3 const char **command_line; +#else + wchar_t **command_line; +#endif char buf[PATH_MAX]; int i, err = 0; FILE *fp; +#if PY_MAJOR_VERSION < 3 command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; +#else + command_line = malloc((argc + 1) * sizeof(wchar_t *)); + command_line[0] = Py_DecodeLocale(script, NULL); + for (i = 1; i < argc + 1; i++) + command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); +#endif Py_Initialize(); +#if PY_MAJOR_VERSION < 3 initperf_trace_context(); - PySys_SetArgv(argc + 1, (char **)command_line); +#else + PyInit_perf_trace_context(); + PySys_SetArgv(argc + 1, command_line); +#endif fp = fopen(script, "r"); if (!fp) { @@ -1262,12 +1323,12 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } - free(command_line); + _free_command_line(command_line, argc + 1); return err; error: Py_Finalize(); - free(command_line); + _free_command_line(command_line, argc + 1); return err; } diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index af415febbc46..001be4f9d3b9 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python from os import getenv @@ -28,6 +28,8 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +if cc != "clang": + cflags += ['-Wno-cast-function-type' ] src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') @@ -35,11 +37,11 @@ build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') libapikfs = getenv('LIBAPI') -ext_sources = [f.strip() for f in file('util/python-ext-sources') +ext_sources = [f.strip() for f in open('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] # use full paths with source files -ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources) +ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) perf = Extension('perf', sources = ext_sources, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2da4d0456a03..e8514f651865 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -111,17 +111,20 @@ struct sort_entry sort_thread = { /* --sort comm */ +/* + * We can't use pointer comparison in functions below, + * because it gives different results based on pointer + * values, which could break some sorting assumptions. + */ static int64_t sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) { - /* Compare the addr that should be unique among comm */ return strcmp(comm__str(right->comm), comm__str(left->comm)); } static int64_t sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) { - /* Compare the addr that should be unique among comm */ return strcmp(comm__str(right->comm), comm__str(left->comm)); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 32235657c1ac..a0061e0b0fad 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -92,7 +92,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { }; #undef ID -void perf_stat_evsel_id_init(struct perf_evsel *evsel) +static void perf_stat_evsel_id_init(struct perf_evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; int i; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index dbc6f7134f61..8f56ba4fd258 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -90,6 +90,8 @@ struct perf_stat_config { bool scale; FILE *output; unsigned int interval; + unsigned int timeout; + int times; struct runtime_stat *stats; int stats_num; }; @@ -126,8 +128,6 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, #define perf_stat_evsel__is(evsel, id) \ __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) -void perf_stat_evsel_id_init(struct perf_evsel *evsel); - extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cc065d4bfafc..62b2dd2253eb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1582,7 +1582,7 @@ int dso__load(struct dso *dso, struct map *map) bool next_slot = false; bool is_reg; bool nsexit; - int sirc; + int sirc = -1; enum dso_binary_type symtab_type = binary_type_symtab[i]; @@ -1600,16 +1600,14 @@ int dso__load(struct dso *dso, struct map *map) nsinfo__mountns_exit(&nsc); is_reg = is_regular_file(name); - sirc = symsrc__init(ss, dso, name, symtab_type); + if (is_reg) + sirc = symsrc__init(ss, dso, name, symtab_type); if (nsexit) nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (!is_reg || sirc < 0) { - if (sirc >= 0) - symsrc__destroy(ss); + if (!is_reg || sirc < 0) continue; - } if (!syms_ss && symsrc__has_symtab(ss)) { syms_ss = ss; @@ -1960,8 +1958,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; - machine__mmap_name(machine, path, sizeof(path)); - dso__set_long_name(dso, strdup(path), true); + dso__set_long_name(dso, machine->mmap_name, false); map__fixup_start(map); map__fixup_end(map); } @@ -2224,3 +2221,25 @@ int symbol__config_symfs(const struct option *opt __maybe_unused, free(bf); return 0; } + +struct mem_info *mem_info__get(struct mem_info *mi) +{ + if (mi) + refcount_inc(&mi->refcnt); + return mi; +} + +void mem_info__put(struct mem_info *mi) +{ + if (mi && refcount_dec_and_test(&mi->refcnt)) + free(mi); +} + +struct mem_info *mem_info__new(void) +{ + struct mem_info *mi = zalloc(sizeof(*mi)); + + if (mi) + refcount_set(&mi->refcnt, 1); + return mi; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0563f33c1eb3..70c16741f50a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -200,9 +200,10 @@ struct branch_info { }; struct mem_info { - struct addr_map_symbol iaddr; - struct addr_map_symbol daddr; - union perf_mem_data_src data_src; + struct addr_map_symbol iaddr; + struct addr_map_symbol daddr; + union perf_mem_data_src data_src; + refcount_t refcnt; }; struct addr_location { @@ -389,4 +390,16 @@ int sdt_notes__get_count(struct list_head *start); #define SDT_NOTE_NAME "stapsdt" #define NR_ADDR 3 +struct mem_info *mem_info__new(void); +struct mem_info *mem_info__get(struct mem_info *mi); +void mem_info__put(struct mem_info *mi); + +static inline void __mem_info__zput(struct mem_info **mi) +{ + mem_info__put(*mi); + *mi = NULL; +} + +#define mem_info__zput(mi) __mem_info__zput(&mi) + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 303bdb84ab5a..895122d638dd 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -30,6 +30,14 @@ static const char **syscalltbl_native = syscalltbl_x86_64; #include <asm/syscalls_64.c> const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; static const char **syscalltbl_native = syscalltbl_s390_64; +#elif defined(__powerpc64__) +#include <asm/syscalls_64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_powerpc_64; +#elif defined(__powerpc__) +#include <asm/syscalls_32.c> +const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID; +static const char **syscalltbl_native = syscalltbl_powerpc_32; #endif struct syscall { diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 40cfa36c022a..14d44c3235b8 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -26,7 +26,6 @@ struct thread { pid_t ppid; int cpu; refcount_t refcnt; - char shortname[3]; bool comm_set; int comm_len; bool dead; /* if set thread has exited */ diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 3e1038f6491c..5d467d8ae9ab 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -32,6 +32,7 @@ static void thread_map__reset(struct thread_map *map, int start, int nr) size_t size = (nr - start) * sizeof(map->map[0]); memset(&map->map[start], 0, size); + map->err_thread = -1; } static struct thread_map *thread_map__realloc(struct thread_map *map, int nr) @@ -323,7 +324,7 @@ out_free_threads: } struct thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid, bool per_thread) + uid_t uid, bool all_threads) { if (pid) return thread_map__new_by_pid_str(pid); @@ -331,7 +332,7 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, if (!tid && uid != UINT_MAX) return thread_map__new_by_uid(uid); - if (per_thread) + if (all_threads) return thread_map__new_all_cpus(); return thread_map__new_by_tid_str(tid); diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 0a806b99e73c..2f689c90a8c6 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -14,6 +14,7 @@ struct thread_map_data { struct thread_map { refcount_t refcnt; int nr; + int err_thread; struct thread_map_data map[]; }; @@ -31,7 +32,7 @@ struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid, bool per_thread); + const char *tid, uid_t uid, bool all_threads); struct thread_map *thread_map__new_by_tid_str(const char *tid_str); diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h index 370138e7e35c..88223bc7c82b 100644 --- a/tools/perf/util/trigger.h +++ b/tools/perf/util/trigger.h @@ -12,7 +12,7 @@ * States and transits: * * - * OFF--(on)--> READY --(hit)--> HIT + * OFF--> ON --> READY --(hit)--> HIT * ^ | * | (ready) * | | @@ -27,8 +27,9 @@ struct trigger { volatile enum { TRIGGER_ERROR = -2, TRIGGER_OFF = -1, - TRIGGER_READY = 0, - TRIGGER_HIT = 1, + TRIGGER_ON = 0, + TRIGGER_READY = 1, + TRIGGER_HIT = 2, } state; const char *name; }; @@ -50,7 +51,7 @@ static inline bool trigger_is_error(struct trigger *t) static inline void trigger_on(struct trigger *t) { TRIGGER_WARN_ONCE(t, TRIGGER_OFF); - t->state = TRIGGER_READY; + t->state = TRIGGER_ON; } static inline void trigger_ready(struct trigger *t) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 1e9c974faf67..7bdd239c795c 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -50,7 +50,7 @@ static int __report_module(struct addr_location *al, u64 ip, if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - dso->long_name, -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; @@ -236,7 +236,8 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (err) goto out; - if (!dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui)) + err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui); + if (err) goto out; err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui); |