diff options
Diffstat (limited to '')
-rw-r--r-- | tools/lib/perf/Documentation/libperf.txt | 18 | ||||
-rw-r--r-- | tools/lib/perf/Makefile | 2 | ||||
-rw-r--r-- | tools/lib/perf/cpumap.c | 146 | ||||
-rw-r--r-- | tools/lib/perf/evlist.c | 133 | ||||
-rw-r--r-- | tools/lib/perf/evsel.c | 246 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/cpumap.h | 15 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/evlist.h | 16 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/evsel.h | 19 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/lib.h | 2 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/mmap.h | 5 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/cpumap.h | 14 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/event.h | 86 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/evsel.h | 19 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/threadmap.h | 7 | ||||
-rw-r--r-- | tools/lib/perf/lib.c | 20 | ||||
-rw-r--r-- | tools/lib/perf/libperf.map | 4 | ||||
-rw-r--r-- | tools/lib/perf/mmap.c | 102 | ||||
-rw-r--r-- | tools/lib/perf/tests/test-cpumap.c | 11 | ||||
-rw-r--r-- | tools/lib/perf/tests/test-evlist.c | 169 | ||||
-rw-r--r-- | tools/lib/perf/tests/test-evsel.c | 166 | ||||
-rw-r--r-- | tools/lib/perf/tests/test-threadmap.c | 41 | ||||
-rw-r--r-- | tools/lib/perf/threadmap.c | 36 |
22 files changed, 1058 insertions, 219 deletions
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 63ae5e0195ce..a8f1a237931b 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -48,6 +48,7 @@ SYNOPSIS int perf_cpu_map__nr(const struct perf_cpu_map *cpus); bool perf_cpu_map__empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); + bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) -- @@ -61,11 +62,12 @@ SYNOPSIS struct perf_thread_map; struct perf_thread_map *perf_thread_map__new_dummy(void); + struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array); - void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid); - char *perf_thread_map__comm(struct perf_thread_map *map, int thread); + void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid); + char *perf_thread_map__comm(struct perf_thread_map *map, int idx); int perf_thread_map__nr(struct perf_thread_map *threads); - pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread); + pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx); struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); void perf_thread_map__put(struct perf_thread_map *map); @@ -135,16 +137,16 @@ SYNOPSIS int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel); - void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); + void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); int perf_evsel__mmap(struct perf_evsel *evsel, int pages); void perf_evsel__munmap(struct perf_evsel *evsel); - void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); - int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); + int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count); int perf_evsel__enable(struct perf_evsel *evsel); - int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); int perf_evsel__disable(struct perf_evsel *evsel); - int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 08fe6e3c4089..21df023a2103 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -153,7 +153,7 @@ $(TESTS_STATIC): $(TESTS_IN) $(LIBPERF_A) $(LIBAPI) $(QUIET_LINK)$(CC) -o $@ $^ $(TESTS_SHARED): $(TESTS_IN) $(LIBAPI) - $(QUIET_LINK)$(CC) -o $@ -L$(if $(OUTPUT),$(OUTPUT),.) $^ -lperf + $(QUIET_LINK)$(CC) -o $@ -L$(or $(OUTPUT),.) $^ -lperf make-tests: libs $(TESTS_SHARED) $(TESTS_STATIC) diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index adaad3dddf6e..6cd0be7c1bb4 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -10,15 +10,24 @@ #include <ctype.h> #include <limits.h> -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) { - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); + struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); if (cpus != NULL) { - cpus->nr = 1; - cpus->map[0] = -1; + cpus->nr = nr_cpus; refcount_set(&cpus->refcnt, 1); + } + return cpus; +} + +struct perf_cpu_map *perf_cpu_map__dummy_new(void) +{ + struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); + + if (cpus) + cpus->map[0].cpu = -1; return cpus; } @@ -54,15 +63,12 @@ static struct perf_cpu_map *cpu_map__default_new(void) if (nr_cpus < 0) return NULL; - cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int)); + cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { int i; for (i = 0; i < nr_cpus; ++i) - cpus->map[i] = i; - - cpus->nr = nr_cpus; - refcount_set(&cpus->refcnt, 1); + cpus->map[i].cpu = i; } return cpus; @@ -73,31 +79,32 @@ struct perf_cpu_map *perf_cpu_map__default_new(void) return cpu_map__default_new(); } -static int cmp_int(const void *a, const void *b) + +static int cmp_cpu(const void *a, const void *b) { - return *(const int *)a - *(const int*)b; + const struct perf_cpu *cpu_a = a, *cpu_b = b; + + return cpu_a->cpu - cpu_b->cpu; } -static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) +static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus) { - size_t payload_size = nr_cpus * sizeof(int); - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size); + size_t payload_size = nr_cpus * sizeof(struct perf_cpu); + struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus); int i, j; if (cpus != NULL) { memcpy(cpus->map, tmp_cpus, payload_size); - qsort(cpus->map, nr_cpus, sizeof(int), cmp_int); + qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); /* Remove dups */ j = 0; for (i = 0; i < nr_cpus; i++) { - if (i == 0 || cpus->map[i] != cpus->map[i - 1]) - cpus->map[j++] = cpus->map[i]; + if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu) + cpus->map[j++].cpu = cpus->map[i].cpu; } cpus->nr = j; assert(j <= nr_cpus); - refcount_set(&cpus->refcnt, 1); } - return cpus; } @@ -105,7 +112,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) { struct perf_cpu_map *cpus = NULL; int nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; + struct perf_cpu *tmp_cpus = NULL, *tmp; int max_entries = 0; int n, cpu, prev; char sep; @@ -124,24 +131,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) if (new_max >= max_entries) { max_entries = new_max + MAX_NR_CPUS / 2; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } while (++prev < cpu) - tmp_cpus[nr_cpus++] = prev; + tmp_cpus[nr_cpus++].cpu = prev; } if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++] = cpu; + tmp_cpus[nr_cpus++].cpu = cpu; if (n == 2 && sep == '-') prev = cpu; else @@ -179,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) unsigned long start_cpu, end_cpu = 0; char *p = NULL; int i, nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; + struct perf_cpu *tmp_cpus = NULL, *tmp; int max_entries = 0; if (!cpu_list) @@ -220,17 +227,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) for (; start_cpu <= end_cpu; start_cpu++) { /* check for duplicates */ for (i = 0; i < nr_cpus; i++) - if (tmp_cpus[i] == (int)start_cpu) + if (tmp_cpus[i].cpu == (int)start_cpu) goto invalid; if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto invalid; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++] = (int)start_cpu; + tmp_cpus[nr_cpus++].cpu = (int)start_cpu; } if (*p) ++p; @@ -250,12 +257,16 @@ out: return cpus; } -int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { + struct perf_cpu result = { + .cpu = -1 + }; + if (cpus && idx < cpus->nr) return cpus->map[idx]; - return -1; + return result; } int perf_cpu_map__nr(const struct perf_cpu_map *cpus) @@ -265,21 +276,26 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus) bool perf_cpu_map__empty(const struct perf_cpu_map *map) { - return map ? map->map[0] == -1 : true; + return map ? map->map[0].cpu == -1 : true; } -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) { - int low = 0, high = cpus->nr; + int low, high; + if (!cpus) + return -1; + + low = 0; + high = cpus->nr; while (low < high) { - int idx = (low + high) / 2, - cpu_at_idx = cpus->map[idx]; + int idx = (low + high) / 2; + struct perf_cpu cpu_at_idx = cpus->map[idx]; - if (cpu_at_idx == cpu) + if (cpu_at_idx.cpu == cpu.cpu) return idx; - if (cpu_at_idx > cpu) + if (cpu_at_idx.cpu > cpu.cpu) high = idx; else low = idx + 1; @@ -288,10 +304,39 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) return -1; } -int perf_cpu_map__max(struct perf_cpu_map *map) +bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) +{ + return perf_cpu_map__idx(cpus, cpu) != -1; +} + +struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map) { + struct perf_cpu result = { + .cpu = -1 + }; + // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return map->nr > 0 ? map->map[map->nr - 1] : -1; + return map->nr > 0 ? map->map[map->nr - 1] : result; +} + +/** Is 'b' a subset of 'a'. */ +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b) +{ + if (a == b || !b) + return true; + if (!a || b->nr > a->nr) + return false; + + for (int i = 0, j = 0; i < a->nr; i++) { + if (a->map[i].cpu > b->map[j].cpu) + return false; + if (a->map[i].cpu == b->map[j].cpu) { + j++; + if (j == b->nr) + return true; + } + } + return false; } /* @@ -305,33 +350,28 @@ int perf_cpu_map__max(struct perf_cpu_map *map) struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other) { - int *tmp_cpus; + struct perf_cpu *tmp_cpus; int tmp_len; int i, j, k; struct perf_cpu_map *merged; - if (!orig && !other) - return NULL; - if (!orig) { - perf_cpu_map__get(other); - return other; - } - if (!other) - return orig; - if (orig->nr == other->nr && - !memcmp(orig->map, other->map, orig->nr * sizeof(int))) + if (perf_cpu_map__is_subset(orig, other)) return orig; + if (perf_cpu_map__is_subset(other, orig)) { + perf_cpu_map__put(orig); + return perf_cpu_map__get(other); + } tmp_len = orig->nr + other->nr; - tmp_cpus = malloc(tmp_len * sizeof(int)); + tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); if (!tmp_cpus) return NULL; /* Standard merge algorithm from wikipedia */ i = j = k = 0; while (i < orig->nr && j < other->nr) { - if (orig->map[i] <= other->map[j]) { - if (orig->map[i] == other->map[j]) + if (orig->map[i].cpu <= other->map[j].cpu) { + if (orig->map[i].cpu == other->map[j].cpu) j++; tmp_cpus[k++] = orig->map[i++]; } else diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index e37dfad31383..61b637f29b82 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -23,6 +23,7 @@ #include <perf/cpumap.h> #include <perf/threadmap.h> #include <api/fd/array.h> +#include "internal.h" void perf_evlist__init(struct perf_evlist *evlist) { @@ -39,19 +40,26 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (!evsel->own_cpus || evlist->has_user_cpus) { + if (evsel->system_wide) { perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->cpus); - } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) { + evsel->cpus = perf_cpu_map__new(NULL); + } else if (!evsel->own_cpus || evlist->has_user_cpus || + (!evsel->requires_cpu && perf_cpu_map__empty(evlist->user_requested_cpus))) { perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->cpus); + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } else if (evsel->cpus != evsel->own_cpus) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evsel->own_cpus); } - perf_thread_map__put(evsel->threads); - evsel->threads = perf_thread_map__get(evlist->threads); + if (evsel->system_wide) { + perf_thread_map__put(evsel->threads); + evsel->threads = perf_thread_map__new_dummy(); + } else { + perf_thread_map__put(evsel->threads); + evsel->threads = perf_thread_map__get(evlist->threads); + } + evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); } @@ -59,6 +67,8 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { struct perf_evsel *evsel; + evlist->needs_map_propagation = true; + perf_evlist__for_each_evsel(evlist, evsel) __perf_evlist__propagate_maps(evlist, evsel); } @@ -69,7 +79,9 @@ void perf_evlist__add(struct perf_evlist *evlist, evsel->idx = evlist->nr_entries; list_add_tail(&evsel->node, &evlist->entries); evlist->nr_entries += 1; - __perf_evlist__propagate_maps(evlist, evsel); + + if (evlist->needs_map_propagation) + __perf_evlist__propagate_maps(evlist, evsel); } void perf_evlist__remove(struct perf_evlist *evlist, @@ -123,10 +135,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { - perf_cpu_map__put(evlist->cpus); + perf_cpu_map__put(evlist->user_requested_cpus); perf_cpu_map__put(evlist->all_cpus); perf_thread_map__put(evlist->threads); - evlist->cpus = NULL; + evlist->user_requested_cpus = NULL; evlist->all_cpus = NULL; evlist->threads = NULL; fdarray__exit(&evlist->pollfd); @@ -155,9 +167,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, * original reference count of 1. If that is not the case it is up to * the caller to increase the reference count. */ - if (cpus != evlist->cpus) { - perf_cpu_map__put(evlist->cpus); - evlist->cpus = perf_cpu_map__get(cpus); + if (cpus != evlist->user_requested_cpus) { + perf_cpu_map__put(evlist->user_requested_cpus); + evlist->user_requested_cpus = perf_cpu_map__get(cpus); } if (threads != evlist->threads) { @@ -165,9 +177,6 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, evlist->threads = perf_thread_map__get(threads); } - if (!evlist->all_cpus && cpus) - evlist->all_cpus = perf_cpu_map__get(cpus); - perf_evlist__propagate_maps(evlist); } @@ -294,7 +303,7 @@ add: int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); int nr_threads = perf_thread_map__nr(evlist->threads); int nfds = 0; struct perf_evsel *evsel; @@ -407,7 +416,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) static int perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int output, int cpu) + int output, struct perf_cpu cpu) { return perf_mmap__mmap(map, mp, output, cpu); } @@ -424,14 +433,15 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_ static int mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, - int thread, int *_output, int *_output_overwrite) + int thread, int *_output, int *_output_overwrite, int *nr_mmaps) { - int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx); struct perf_evsel *evsel; int revent; perf_evlist__for_each_entry(evlist, evsel) { bool overwrite = evsel->attr.write_backward; + enum fdarray_flags flgs; struct perf_mmap *map; int *output, fd, cpu; @@ -474,12 +484,21 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, */ refcount_set(&map->refcnt, 2); + if (ops->idx) + ops->idx(evlist, evsel, mp, idx); + + /* Debug message used by test scripts */ + pr_debug("idx %d: mmapping fd %d\n", idx, *output); if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; + *nr_mmaps += 1; + if (!idx) perf_evlist__set_mmap_first(evlist, map, overwrite); } else { + /* Debug message used by test scripts */ + pr_debug("idx %d: set output fd %d -> %d\n", idx, fd, *output); if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) return -1; @@ -488,8 +507,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, revent = !overwrite ? POLLIN : 0; - if (!evsel->system_wide && - perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) { + flgs = evsel->system_wide ? fdarray_flag__nonfilterable : fdarray_flag__default; + if (perf_evlist__add_pollfd(evlist, fd, map, revent, flgs) < 0) { perf_mmap__put(map); return -1; } @@ -509,21 +528,37 @@ static int mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { - int thread; int nr_threads = perf_thread_map__nr(evlist->threads); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); + int cpu, thread, idx = 0; + int nr_mmaps = 0; - for (thread = 0; thread < nr_threads; thread++) { + pr_debug("%s: nr cpu values (may include -1) %d nr threads %d\n", + __func__, nr_cpus, nr_threads); + + /* per-thread mmaps */ + for (thread = 0; thread < nr_threads; thread++, idx++) { int output = -1; int output_overwrite = -1; - if (ops->idx) - ops->idx(evlist, mp, thread, false); + if (mmap_per_evsel(evlist, ops, idx, mp, 0, thread, &output, + &output_overwrite, &nr_mmaps)) + goto out_unmap; + } - if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, - &output, &output_overwrite)) + /* system-wide mmaps i.e. per-cpu */ + for (cpu = 1; cpu < nr_cpus; cpu++, idx++) { + int output = -1; + int output_overwrite = -1; + + if (mmap_per_evsel(evlist, ops, idx, mp, cpu, 0, &output, + &output_overwrite, &nr_mmaps)) goto out_unmap; } + if (nr_mmaps != evlist->nr_mmaps) + pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps); + return 0; out_unmap: @@ -536,23 +571,26 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { int nr_threads = perf_thread_map__nr(evlist->threads); - int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); + int nr_mmaps = 0; int cpu, thread; + pr_debug("%s: nr cpu values %d nr threads %d\n", __func__, nr_cpus, nr_threads); + for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; int output_overwrite = -1; - if (ops->idx) - ops->idx(evlist, mp, cpu, true); - for (thread = 0; thread < nr_threads; thread++) { if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, - thread, &output, &output_overwrite)) + thread, &output, &output_overwrite, &nr_mmaps)) goto out_unmap; } } + if (nr_mmaps != evlist->nr_mmaps) + pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps); + return 0; out_unmap: @@ -564,9 +602,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) { int nr_mmaps; - nr_mmaps = perf_cpu_map__nr(evlist->cpus); - if (perf_cpu_map__empty(evlist->cpus)) - nr_mmaps = perf_thread_map__nr(evlist->threads); + /* One for each CPU */ + nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); + if (perf_cpu_map__empty(evlist->all_cpus)) { + /* Plus one for each thread */ + nr_mmaps += perf_thread_map__nr(evlist->threads); + /* Minus the per-thread CPU (-1) */ + nr_mmaps -= 1; + } return nr_mmaps; } @@ -575,9 +618,8 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { + const struct perf_cpu_map *cpus = evlist->all_cpus; struct perf_evsel *evsel; - const struct perf_cpu_map *cpus = evlist->cpus; - const struct perf_thread_map *threads = evlist->threads; if (!ops || !ops->get || !ops->mmap) return -EINVAL; @@ -589,7 +631,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, perf_evlist__for_each_entry(evlist, evsel) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0) return -ENOMEM; } @@ -643,14 +685,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; } -void __perf_evlist__set_leader(struct list_head *list) +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader) { - struct perf_evsel *evsel, *leader; + struct perf_evsel *first, *last, *evsel; - leader = list_entry(list->next, struct perf_evsel, node); - evsel = list_entry(list->prev, struct perf_evsel, node); + first = list_first_entry(list, struct perf_evsel, node); + last = list_last_entry(list, struct perf_evsel, node); - leader->nr_members = evsel->idx - leader->idx + 1; + leader->nr_members = last->idx - first->idx + 1; __perf_evlist__for_each_entry(list, evsel) evsel->leader = leader; @@ -659,7 +701,10 @@ void __perf_evlist__set_leader(struct list_head *list) void perf_evlist__set_leader(struct perf_evlist *evlist) { if (evlist->nr_entries) { + struct perf_evsel *first = list_entry(evlist->entries.next, + struct perf_evsel, node); + evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; - __perf_evlist__set_leader(&evlist->entries); + __perf_evlist__set_leader(&evlist->entries, first); } } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8441e3e1aaac..8b51b008a81f 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) -#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) +#define FD(_evsel, _cpu_map_idx, _thread) \ + ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread)) +#define MMAP(_evsel, _cpu_map_idx, _thread) \ + (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \ + : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); if (evsel->fd) { - int cpu, thread; - for (cpu = 0; cpu < ncpus; cpu++) { + int idx, thread; + + for (idx = 0; idx < ncpus; idx++) { for (thread = 0; thread < nthreads; thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); if (fd) *fd = -1; @@ -74,13 +78,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre static int sys_perf_event_open(struct perf_event_attr *attr, - pid_t pid, int cpu, int group_fd, + pid_t pid, struct perf_cpu cpu, int group_fd, unsigned long flags) { - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags); } -static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) +static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd) { struct perf_evsel *leader = evsel->leader; int *fd; @@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou if (!leader->fd) return -ENOTCONN; - fd = FD(leader, cpu, thread); + fd = FD(leader, cpu_map_idx, thread); if (fd == NULL || *fd == -1) return -EBADF; @@ -109,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - int cpu, thread, err = 0; + struct perf_cpu cpu; + int idx, thread, err = 0; if (cpus == NULL) { static struct perf_cpu_map *empty_cpu_map; @@ -136,41 +141,48 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, } if (evsel->fd == NULL && - perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) + perf_evsel__alloc_fd(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) return -ENOMEM; - for (cpu = 0; cpu < cpus->nr; cpu++) { + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { for (thread = 0; thread < threads->nr; thread++) { int fd, group_fd, *evsel_fd; - evsel_fd = FD(evsel, cpu, thread); - if (evsel_fd == NULL) - return -EINVAL; + evsel_fd = FD(evsel, idx, thread); + if (evsel_fd == NULL) { + err = -EINVAL; + goto out; + } - err = get_group_fd(evsel, cpu, thread, &group_fd); + err = get_group_fd(evsel, idx, thread, &group_fd); if (err < 0) - return err; + goto out; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, - cpus->map[cpu], group_fd, 0); + cpu, group_fd, 0); - if (fd < 0) - return -errno; + if (fd < 0) { + err = -errno; + goto out; + } *evsel_fd = fd; } } +out: + if (err) + perf_evsel__close(evsel); return err; } -static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) +static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); if (fd && *fd >= 0) { close(*fd); @@ -181,10 +193,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) void perf_evsel__close_fd(struct perf_evsel *evsel) { - int cpu; - - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) - perf_evsel__close_fd_cpu(evsel, cpu); + for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++) + perf_evsel__close_fd_cpu(evsel, idx); } void perf_evsel__free_fd(struct perf_evsel *evsel) @@ -202,29 +212,29 @@ void perf_evsel__close(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); } -void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu) +void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx) { if (evsel->fd == NULL) return; - perf_evsel__close_fd_cpu(evsel, cpu); + perf_evsel__close_fd_cpu(evsel, cpu_map_idx); } void perf_evsel__munmap(struct perf_evsel *evsel) { - int cpu, thread; + int idx, thread; if (evsel->fd == NULL || evsel->mmap == NULL) return; - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); if (fd == NULL || *fd < 0) continue; - perf_mmap__munmap(MMAP(evsel, cpu, thread)); + perf_mmap__munmap(MMAP(evsel, idx, thread)); } } @@ -234,7 +244,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel) int perf_evsel__mmap(struct perf_evsel *evsel, int pages) { - int ret, cpu, thread; + int ret, idx, thread; struct perf_mmap_param mp = { .prot = PROT_READ | PROT_WRITE, .mask = (pages * page_size) - 1, @@ -246,15 +256,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0) return -ENOMEM; - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); struct perf_mmap *map; + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx); if (fd == NULL || *fd < 0) continue; - map = MMAP(evsel, cpu, thread); + map = MMAP(evsel, idx, thread); perf_mmap__init(map, NULL, false, NULL); ret = perf_mmap__mmap(map, &mp, *fd, cpu); @@ -268,14 +279,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) return 0; } -void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) +void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); - if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) + if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL) return NULL; - return MMAP(evsel, cpu, thread)->base; + return MMAP(evsel, cpu_map_idx, thread)->base; } int perf_evsel__read_size(struct perf_evsel *evsel) @@ -294,6 +305,9 @@ int perf_evsel__read_size(struct perf_evsel *evsel) if (read_format & PERF_FORMAT_ID) entry += sizeof(u64); + if (read_format & PERF_FORMAT_LOST) + entry += sizeof(u64); + if (read_format & PERF_FORMAT_GROUP) { nr = evsel->nr_members; size += sizeof(u64); @@ -303,41 +317,120 @@ int perf_evsel__read_size(struct perf_evsel *evsel) return size; } -int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +/* This only reads values for the leader */ +static int perf_evsel__read_group(struct perf_evsel *evsel, int cpu_map_idx, + int thread, struct perf_counts_values *count) +{ + size_t size = perf_evsel__read_size(evsel); + int *fd = FD(evsel, cpu_map_idx, thread); + u64 read_format = evsel->attr.read_format; + u64 *data; + int idx = 1; + + if (fd == NULL || *fd < 0) + return -EINVAL; + + data = calloc(1, size); + if (data == NULL) + return -ENOMEM; + + if (readn(*fd, data, size) <= 0) { + free(data); + return -errno; + } + + /* + * This reads only the leader event intentionally since we don't have + * perf counts values for sibling events. + */ + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + count->ena = data[idx++]; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + count->run = data[idx++]; + + /* value is always available */ + count->val = data[idx++]; + if (read_format & PERF_FORMAT_ID) + count->id = data[idx++]; + if (read_format & PERF_FORMAT_LOST) + count->lost = data[idx++]; + + free(data); + return 0; +} + +/* + * The perf read format is very flexible. It needs to set the proper + * values according to the read format. + */ +static void perf_evsel__adjust_values(struct perf_evsel *evsel, u64 *buf, + struct perf_counts_values *count) +{ + u64 read_format = evsel->attr.read_format; + int n = 0; + + count->val = buf[n++]; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + count->ena = buf[n++]; + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + count->run = buf[n++]; + + if (read_format & PERF_FORMAT_ID) + count->id = buf[n++]; + + if (read_format & PERF_FORMAT_LOST) + count->lost = buf[n++]; +} + +int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); + u64 read_format = evsel->attr.read_format; + struct perf_counts_values buf; memset(count, 0, sizeof(*count)); if (fd == NULL || *fd < 0) return -EINVAL; - if (MMAP(evsel, cpu, thread) && - !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) + if (read_format & PERF_FORMAT_GROUP) + return perf_evsel__read_group(evsel, cpu_map_idx, thread, count); + + if (MMAP(evsel, cpu_map_idx, thread) && + !(read_format & (PERF_FORMAT_ID | PERF_FORMAT_LOST)) && + !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count)) return 0; - if (readn(*fd, count->values, size) <= 0) + if (readn(*fd, buf.values, size) <= 0) return -errno; + perf_evsel__adjust_values(evsel, buf.values, count); return 0; } +static int perf_evsel__ioctl(struct perf_evsel *evsel, int ioc, void *arg, + int cpu_map_idx, int thread) +{ + int *fd = FD(evsel, cpu_map_idx, thread); + + if (fd == NULL || *fd < 0) + return -1; + + return ioctl(*fd, ioc, arg); +} + static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ioc, void *arg, - int cpu) + int cpu_map_idx) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int err; - int *fd = FD(evsel, cpu, thread); - - if (fd == NULL || *fd < 0) - return -1; - - err = ioctl(*fd, ioc, arg); + int err = perf_evsel__ioctl(evsel, ioc, arg, cpu_map_idx, thread); if (err) return err; @@ -346,9 +439,24 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, return 0; } -int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx); +} + +int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread) +{ + struct perf_cpu cpu __maybe_unused; + int idx; + int err; + + perf_cpu_map__for_each_cpu(cpu, idx, evsel->cpus) { + err = perf_evsel__ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, idx, thread); + if (err) + return err; + } + + return 0; } int perf_evsel__enable(struct perf_evsel *evsel) @@ -361,9 +469,9 @@ int perf_evsel__enable(struct perf_evsel *evsel) return err; } -int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx); } int perf_evsel__disable(struct perf_evsel *evsel) @@ -380,7 +488,7 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter) { int err = 0, i; - for (i = 0; i < evsel->cpus->nr && !err; i++) + for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++) err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_SET_FILTER, (void *)filter, i); @@ -407,9 +515,6 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) if (ncpus == 0 || nthreads == 0) return 0; - if (evsel->system_wide) - nthreads = 1; - evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); if (evsel->sample_id == NULL) return -ENOMEM; @@ -431,3 +536,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel) zfree(&evsel->id); evsel->ids = 0; } + +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, __s8 *pscaled) +{ + s8 scaled = 0; + + if (scale) { + if (count->run == 0) { + scaled = -1; + count->val = 0; + } else if (count->run < count->ena) { + scaled = 1; + count->val = (u64)((double)count->val * count->ena / count->run); + } + } + + if (pscaled) + *pscaled = scaled; +} diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 840d4032587b..35dd29642296 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -3,17 +3,28 @@ #define __LIBPERF_INTERNAL_CPUMAP_H #include <linux/refcount.h> +#include <perf/cpumap.h> +/** + * A sized, reference counted, sorted array of integers representing CPU + * numbers. This is commonly used to capture which CPUs a PMU is associated + * with. The indices into the cpumap are frequently used as they avoid having + * gaps if CPU numbers were used. For events associated with a pid, rather than + * a CPU, a single dummy map with an entry of -1 is used. + */ struct perf_cpu_map { refcount_t refcnt; + /** Length of the map array. */ int nr; - int map[]; + /** The CPU values. */ + struct perf_cpu map[]; }; #ifndef MAX_NR_CPUS #define MAX_NR_CPUS 2048 #endif -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b); #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index f366dbad6a88..850f07070036 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -4,6 +4,7 @@ #include <linux/list.h> #include <api/fd/array.h> +#include <internal/cpumap.h> #include <internal/evsel.h> #define PERF_EVLIST__HLIST_BITS 8 @@ -18,7 +19,13 @@ struct perf_evlist { int nr_entries; int nr_groups; bool has_user_cpus; - struct perf_cpu_map *cpus; + bool needs_map_propagation; + /** + * The cpus passed from the command line or all online CPUs by + * default. + */ + struct perf_cpu_map *user_requested_cpus; + /** The union of all evsel cpu maps. */ struct perf_cpu_map *all_cpus; struct perf_thread_map *threads; int nr_mmaps; @@ -32,11 +39,12 @@ struct perf_evlist { }; typedef void -(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); +(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_evsel*, + struct perf_mmap_param*, int); typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int -(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); +(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu); struct perf_evlist_mmap_ops { perf_evlist_mmap__cb_idx_t idx; @@ -127,5 +135,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, void perf_evlist__reset_id_hash(struct perf_evlist *evlist); -void __perf_evlist__set_leader(struct list_head *list); +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 1f3eacbad2e8..a99a75d9e78f 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -6,8 +6,8 @@ #include <linux/perf_event.h> #include <stdbool.h> #include <sys/types.h> +#include <internal/cpumap.h> -struct perf_cpu_map; struct perf_thread_map; struct xyarray; @@ -27,9 +27,13 @@ struct perf_sample_id { * queue number. */ int idx; - int cpu; + struct perf_cpu cpu; pid_t tid; + /* Guest machine pid and VCPU, valid only if machine_pid is non-zero */ + pid_t machine_pid; + struct perf_cpu vcpu; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ u64 period; }; @@ -49,7 +53,18 @@ struct perf_evsel { /* parse modifier helper */ int nr_members; + /* + * system_wide is for events that need to be on every CPU, irrespective + * of user requested CPUs or threads. Map propagation will set cpus to + * this event's own_cpus, whereby they will contribute to evlist + * all_cpus. + */ bool system_wide; + /* + * Some events, for example uncore events, require a CPU. + * i.e. it cannot be the 'any CPU' value of -1. + */ + bool requires_cpu; int idx; }; diff --git a/tools/lib/perf/include/internal/lib.h b/tools/lib/perf/include/internal/lib.h index 5175d491b2d4..85471a4b900f 100644 --- a/tools/lib/perf/include/internal/lib.h +++ b/tools/lib/perf/include/internal/lib.h @@ -9,4 +9,6 @@ extern unsigned int page_size; ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); +ssize_t preadn(int fd, void *buf, size_t n, off_t offs); + #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5e3422f40ed5..5a062af8e9d8 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -6,6 +6,7 @@ #include <linux/refcount.h> #include <linux/types.h> #include <stdbool.h> +#include <internal/cpumap.h> /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) @@ -24,7 +25,7 @@ struct perf_mmap { void *base; int mask; int fd; - int cpu; + struct perf_cpu cpu; refcount_t refcnt; u64 prev; u64 start; @@ -46,7 +47,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu); + int fd, struct perf_cpu cpu); void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); void perf_mmap__put(struct perf_mmap *map); diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 7c27766ea0bf..03aceb72a783 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,10 +3,14 @@ #define __LIBPERF_CPUMAP_H #include <perf/core.h> +#include <perf/cpumap.h> #include <stdio.h> #include <stdbool.h> -struct perf_cpu_map; +/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ +struct perf_cpu { + int cpu; +}; LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); @@ -16,14 +20,18 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ (idx) < perf_cpu_map__nr(cpus); \ (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) +#define perf_cpu_map__for_each_idx(idx, cpus) \ + for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++) + #endif /* __LIBPERF_CPUMAP_H */ diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 75ee385fb078..ad47d7b31046 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -76,7 +76,7 @@ struct perf_record_lost_samples { }; /* - * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID + * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST */ struct perf_record_read { struct perf_event_header header; @@ -85,6 +85,7 @@ struct perf_record_read { __u64 time_enabled; __u64 time_running; __u64 id; + __u64 lost; }; struct perf_record_throttle { @@ -95,7 +96,7 @@ struct perf_record_throttle { }; #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif struct perf_record_ksymbol { @@ -151,23 +152,75 @@ struct perf_record_header_attr { enum { PERF_CPU_MAP__CPUS = 0, PERF_CPU_MAP__MASK = 1, + PERF_CPU_MAP__RANGE_CPUS = 2, }; +/* + * Array encoding of a perf_cpu_map where nr is the number of entries in cpu[] + * and each entry is a value for a CPU in the map. + */ struct cpu_map_entries { __u16 nr; __u16 cpu[]; }; -struct perf_record_record_cpu_map { +/* Bitmap encoding of a perf_cpu_map where bitmap entries are 32-bit. */ +struct perf_record_mask_cpu_map32 { + /* Number of mask values. */ + __u16 nr; + /* Constant 4. */ + __u16 long_size; + /* Bitmap data. */ + __u32 mask[]; +}; + +/* Bitmap encoding of a perf_cpu_map where bitmap entries are 64-bit. */ +struct perf_record_mask_cpu_map64 { + /* Number of mask values. */ __u16 nr; + /* Constant 8. */ __u16 long_size; - unsigned long mask[]; + /* Legacy padding. */ + char __pad[4]; + /* Bitmap data. */ + __u64 mask[]; +}; + +/* + * 'struct perf_record_cpu_map_data' is packed as unfortunately an earlier + * version had unaligned data and we wish to retain file format compatibility. + * -irogers + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" + +/* + * An encoding of a CPU map for a range starting at start_cpu through to + * end_cpu. If any_cpu is 1, an any CPU (-1) value (aka dummy value) is present. + */ +struct perf_record_range_cpu_map { + __u8 any_cpu; + __u8 __pad; + __u16 start_cpu; + __u16 end_cpu; }; struct perf_record_cpu_map_data { __u16 type; - char data[]; -}; + union { + /* Used when type == PERF_CPU_MAP__CPUS. */ + struct cpu_map_entries cpus_data; + /* Used when type == PERF_CPU_MAP__MASK and long_size == 4. */ + struct perf_record_mask_cpu_map32 mask32_data; + /* Used when type == PERF_CPU_MAP__MASK and long_size == 8. */ + struct perf_record_mask_cpu_map64 mask64_data; + /* Used when type == PERF_CPU_MAP__RANGE_CPUS. */ + struct perf_record_range_cpu_map range_cpu_data; + }; +} __attribute__((packed)); + +#pragma GCC diagnostic pop struct perf_record_cpu_map { struct perf_event_header header; @@ -193,7 +246,16 @@ struct perf_record_event_update { struct perf_event_header header; __u64 type; __u64 id; - char data[]; + union { + /* Used when type == PERF_EVENT_UPDATE__SCALE. */ + struct perf_record_event_update_scale scale; + /* Used when type == PERF_EVENT_UPDATE__UNIT. */ + char unit[0]; + /* Used when type == PERF_EVENT_UPDATE__NAME. */ + char name[0]; + /* Used when type == PERF_EVENT_UPDATE__CPUS. */ + struct perf_record_event_update_cpus cpus; + }; }; #define MAX_EVENT_NAME 64 @@ -237,10 +299,15 @@ struct id_index_entry { __u64 tid; }; +struct id_index_entry_2 { + __u64 machine_pid; + __u64 vcpu; +}; + struct perf_record_id_index { struct perf_event_header header; __u64 nr; - struct id_index_entry entries[0]; + struct id_index_entry entries[]; }; struct perf_record_auxtrace_info { @@ -274,6 +341,8 @@ struct perf_record_auxtrace_error { __u64 ip; __u64 time; char msg[MAX_AUXTRACE_ERROR_MSG]; + __u32 machine_pid; + __u32 vcpu; }; struct perf_record_aux { @@ -389,6 +458,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_TIME_CONV = 79, PERF_RECORD_HEADER_FEATURE = 80, PERF_RECORD_COMPRESSED = 81, + PERF_RECORD_FINISHED_INIT = 82, PERF_RECORD_HEADER_MAX }; diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 60eae25076d3..6f92204075c2 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -4,6 +4,8 @@ #include <stdint.h> #include <perf/core.h> +#include <stdbool.h> +#include <linux/types.h> struct perf_evsel; struct perf_event_attr; @@ -16,8 +18,10 @@ struct perf_counts_values { uint64_t val; uint64_t ena; uint64_t run; + uint64_t id; + uint64_t lost; }; - uint64_t values[3]; + uint64_t values[5]; }; }; @@ -26,18 +30,21 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); -LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages); LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel); -LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); -LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); +LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); +LIBPERF_API int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); +LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, __s8 *pscaled); #endif /* __LIBPERF_EVSEL_H */ diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h index a7c50de8d010..8b40e7777cea 100644 --- a/tools/lib/perf/include/perf/threadmap.h +++ b/tools/lib/perf/include/perf/threadmap.h @@ -8,11 +8,12 @@ struct perf_thread_map; LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void); +LIBPERF_API struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array); -LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid); -LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread); +LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid); +LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx); LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads); -LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread); +LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx); LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map); diff --git a/tools/lib/perf/lib.c b/tools/lib/perf/lib.c index 18658931fc71..696fb0ea67c6 100644 --- a/tools/lib/perf/lib.c +++ b/tools/lib/perf/lib.c @@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n) return ion(true, fd, buf, n); } +ssize_t preadn(int fd, void *buf, size_t n, off_t offs) +{ + size_t left = n; + + while (left) { + ssize_t ret = pread(fd, buf, left, offs); + + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) + return ret; + + left -= ret; + buf += ret; + offs += ret; + } + + return n; +} + /* * Write exactly 'n' bytes or return an error. */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 71468606e8a7..190b56ae923a 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -2,6 +2,7 @@ LIBPERF_0.0.1 { global: libperf_init; perf_cpu_map__dummy_new; + perf_cpu_map__default_new; perf_cpu_map__get; perf_cpu_map__put; perf_cpu_map__new; @@ -10,6 +11,8 @@ LIBPERF_0.0.1 { perf_cpu_map__cpu; perf_cpu_map__empty; perf_cpu_map__max; + perf_cpu_map__has; + perf_thread_map__new_array; perf_thread_map__new_dummy; perf_thread_map__set_pid; perf_thread_map__comm; @@ -50,6 +53,7 @@ LIBPERF_0.0.1 { perf_mmap__read_init; perf_mmap__read_done; perf_mmap__read_event; + perf_counts_values__scale; local: *; }; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index c89dfa5f67b3..0d1634cedf44 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -13,6 +13,7 @@ #include <internal/lib.h> #include <linux/kernel.h> #include <linux/math64.h> +#include <linux/stringify.h> #include "internal.h" void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, @@ -32,7 +33,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) } int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu) + int fd, struct perf_cpu cpu) { map->prev = 0; map->mask = mp->mask; @@ -294,6 +295,103 @@ static u64 read_timestamp(void) return low | ((u64)high) << 32; } +#elif defined(__aarch64__) +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +static u64 read_pmccntr(void) +{ + return read_sysreg(pmccntr_el0); +} + +#define PMEVCNTR_READ(idx) \ + static u64 read_pmevcntr_##idx(void) { \ + return read_sysreg(pmevcntr##idx##_el0); \ + } + +PMEVCNTR_READ(0); +PMEVCNTR_READ(1); +PMEVCNTR_READ(2); +PMEVCNTR_READ(3); +PMEVCNTR_READ(4); +PMEVCNTR_READ(5); +PMEVCNTR_READ(6); +PMEVCNTR_READ(7); +PMEVCNTR_READ(8); +PMEVCNTR_READ(9); +PMEVCNTR_READ(10); +PMEVCNTR_READ(11); +PMEVCNTR_READ(12); +PMEVCNTR_READ(13); +PMEVCNTR_READ(14); +PMEVCNTR_READ(15); +PMEVCNTR_READ(16); +PMEVCNTR_READ(17); +PMEVCNTR_READ(18); +PMEVCNTR_READ(19); +PMEVCNTR_READ(20); +PMEVCNTR_READ(21); +PMEVCNTR_READ(22); +PMEVCNTR_READ(23); +PMEVCNTR_READ(24); +PMEVCNTR_READ(25); +PMEVCNTR_READ(26); +PMEVCNTR_READ(27); +PMEVCNTR_READ(28); +PMEVCNTR_READ(29); +PMEVCNTR_READ(30); + +/* + * Read a value direct from PMEVCNTR<idx> + */ +static u64 read_perf_counter(unsigned int counter) +{ + static u64 (* const read_f[])(void) = { + read_pmevcntr_0, + read_pmevcntr_1, + read_pmevcntr_2, + read_pmevcntr_3, + read_pmevcntr_4, + read_pmevcntr_5, + read_pmevcntr_6, + read_pmevcntr_7, + read_pmevcntr_8, + read_pmevcntr_9, + read_pmevcntr_10, + read_pmevcntr_11, + read_pmevcntr_13, + read_pmevcntr_12, + read_pmevcntr_14, + read_pmevcntr_15, + read_pmevcntr_16, + read_pmevcntr_17, + read_pmevcntr_18, + read_pmevcntr_19, + read_pmevcntr_20, + read_pmevcntr_21, + read_pmevcntr_22, + read_pmevcntr_23, + read_pmevcntr_24, + read_pmevcntr_25, + read_pmevcntr_26, + read_pmevcntr_27, + read_pmevcntr_28, + read_pmevcntr_29, + read_pmevcntr_30, + read_pmccntr + }; + + if (counter < ARRAY_SIZE(read_f)) + return (read_f[counter])(); + + return 0; +} + +static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } + #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; } @@ -353,8 +451,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count count->ena += delta; if (idx) count->run += delta; - - cnt = mul_u64_u64_div64(cnt, count->ena, count->run); } count->val = cnt; diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index d39378eaf897..87b0510a556f 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -14,6 +14,8 @@ static int libperf_print(enum libperf_print_level level, int test_cpumap(int argc, char **argv) { struct perf_cpu_map *cpus; + struct perf_cpu cpu; + int idx; __T_START; @@ -27,6 +29,15 @@ int test_cpumap(int argc, char **argv) perf_cpu_map__put(cpus); perf_cpu_map__put(cpus); + cpus = perf_cpu_map__default_new(); + if (!cpus) + return -1; + + perf_cpu_map__for_each_cpu(cpu, idx, cpus) + __T("wrong cpu number", cpu.cpu != -1); + + perf_cpu_map__put(cpus); + __T_END; return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ce91a582f0e4..ed616fc19b4f 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET) +#include <inttypes.h> #include <sched.h> #include <stdio.h> #include <stdarg.h> @@ -21,6 +22,9 @@ #include "tests.h" #include <internal/evsel.h> +#define EVENT_NUM 15 +#define WAIT_COUNT 100000000UL + static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { @@ -65,7 +69,7 @@ static int test_stat_cpu(void) perf_evlist__set_maps(evlist, cpus, NULL); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { cpus = perf_evsel__cpus(evsel); @@ -126,7 +130,7 @@ static int test_stat_thread(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -183,7 +187,7 @@ static int test_stat_thread_enable(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -331,7 +335,8 @@ static int test_mmap_cpus(void) }; cpu_set_t saved_mask; char path[PATH_MAX]; - int id, err, cpu, tmp; + int id, err, tmp; + struct perf_cpu cpu; union perf_event *event; int count = 0; @@ -374,7 +379,7 @@ static int test_mmap_cpus(void) cpu_set_t mask; CPU_ZERO(&mask); - CPU_SET(cpu, &mask); + CPU_SET(cpu.cpu, &mask); err = sched_setaffinity(0, sizeof(mask), &mask); __T("sched_setaffinity failed", err == 0); @@ -413,6 +418,159 @@ static int test_mmap_cpus(void) return 0; } +static double display_error(long long average, + long long high, + long long low, + long long expected) +{ + double error; + + error = (((double)average - expected) / expected) * 100.0; + + __T_VERBOSE(" Expected: %lld\n", expected); + __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n", + high, low, average); + + __T_VERBOSE(" Average Error = %.2f%%\n", error); + + return error; +} + +static int test_stat_multiplexing(void) +{ + struct perf_counts_values expected_counts = { .val = 0 }; + struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },}; + struct perf_thread_map *threads; + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING, + .disabled = 1, + }; + int err, i, nonzero = 0; + unsigned long count; + long long max = 0, min = 0, avg = 0; + double error = 0.0; + s8 scaled = 0; + + /* read for non-multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + err = perf_evsel__open(evsel, NULL, threads); + __T("failed to open evsel", err == 0); + + err = perf_evsel__enable(evsel); + __T("failed to enable evsel", err == 0); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + perf_evsel__read(evsel, 0, 0, &expected_counts); + __T("failed to read value for evsel", expected_counts.val != 0); + __T("failed to read non-multiplexing event count", + expected_counts.ena == expected_counts.run); + + err = perf_evsel__disable(evsel); + __T("failed to enable evsel", err == 0); + + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + + perf_thread_map__put(threads); + + /* read for multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evlist = perf_evlist__new(); + __T("failed to create evlist", evlist); + + for (i = 0; i < EVENT_NUM; i++) { + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + perf_evlist__add(evlist, evsel); + } + perf_evlist__set_maps(evlist, NULL, threads); + + err = perf_evlist__open(evlist); + __T("failed to open evlist", err == 0); + + perf_evlist__enable(evlist); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + i = 0; + perf_evlist__for_each_evsel(evlist, evsel) { + perf_evsel__read(evsel, 0, 0, &counts[i]); + __T("failed to read value for evsel", counts[i].val != 0); + i++; + } + + perf_evlist__disable(evlist); + + min = counts[0].val; + for (i = 0; i < EVENT_NUM; i++) { + __T_VERBOSE("Event %2d -- Raw count = %" PRIu64 ", run = %" PRIu64 ", enable = %" PRIu64 "\n", + i, counts[i].val, counts[i].run, counts[i].ena); + + perf_counts_values__scale(&counts[i], true, &scaled); + if (scaled == 1) { + __T_VERBOSE("\t Scaled count = %" PRIu64 " (%.2lf%%, %" PRIu64 "/%" PRIu64 ")\n", + counts[i].val, + (double)counts[i].run / (double)counts[i].ena * 100.0, + counts[i].run, counts[i].ena); + } else if (scaled == -1) { + __T_VERBOSE("\t Not Running\n"); + } else { + __T_VERBOSE("\t Not Scaling\n"); + } + + if (counts[i].val > max) + max = counts[i].val; + + if (counts[i].val < min) + min = counts[i].val; + + avg += counts[i].val; + + if (counts[i].val != 0) + nonzero++; + } + + if (nonzero != 0) + avg = avg / nonzero; + else + avg = 0; + + error = display_error(avg, max, min, expected_counts.val); + + __T("Error out of range!", ((error <= 1.0) && (error >= -1.0))); + + perf_evlist__close(evlist); + perf_evlist__delete(evlist); + + perf_thread_map__put(threads); + + return 0; +} + int test_evlist(int argc, char **argv) { __T_START; @@ -424,6 +582,7 @@ int test_evlist(int argc, char **argv) test_stat_thread_enable(); test_mmap_thread(); test_mmap_cpus(); + test_stat_multiplexing(); __T_END; return tests_failed == 0 ? 0 : -1; diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 33ae9334861a..a11fc51bfb68 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdarg.h> #include <stdio.h> +#include <string.h> #include <linux/perf_event.h> +#include <linux/kernel.h> #include <perf/cpumap.h> #include <perf/threadmap.h> #include <perf/evsel.h> +#include <internal/evsel.h> #include <internal/tests.h> #include "tests.h" @@ -130,6 +133,9 @@ static int test_stat_user_read(int event) struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = event, +#ifdef __aarch64__ + .config1 = 0x2, /* Request user access */ +#endif }; int err, i; @@ -150,7 +156,7 @@ static int test_stat_user_read(int event) pc = perf_evsel__mmap_base(evsel, 0, 0); __T("failed to get mmapped address", pc); -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) __T("userspace counter access not supported", pc->cap_user_rdpmc); __T("userspace counter access not enabled", pc->index); __T("userspace counter width not set", pc->pmc_width >= 32); @@ -186,6 +192,163 @@ static int test_stat_user_read(int event) return 0; } +static int test_stat_read_format_single(struct perf_event_attr *attr, struct perf_thread_map *threads) +{ + struct perf_evsel *evsel; + struct perf_counts_values counts; + volatile int count = 0x100000; + int err; + + evsel = perf_evsel__new(attr); + __T("failed to create evsel", evsel); + + /* skip old kernels that don't support the format */ + err = perf_evsel__open(evsel, NULL, threads); + if (err < 0) + return 0; + + while (count--) ; + + memset(&counts, -1, sizeof(counts)); + perf_evsel__read(evsel, 0, 0, &counts); + + __T("failed to read value", counts.val); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + __T("failed to read TOTAL_TIME_ENABLED", counts.ena); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + __T("failed to read TOTAL_TIME_RUNNING", counts.run); + if (attr->read_format & PERF_FORMAT_ID) + __T("failed to read ID", counts.id); + if (attr->read_format & PERF_FORMAT_LOST) + __T("failed to read LOST", counts.lost == 0); + + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + return 0; +} + +static int test_stat_read_format_group(struct perf_event_attr *attr, struct perf_thread_map *threads) +{ + struct perf_evsel *leader, *member; + struct perf_counts_values counts; + volatile int count = 0x100000; + int err; + + attr->read_format |= PERF_FORMAT_GROUP; + leader = perf_evsel__new(attr); + __T("failed to create leader", leader); + + attr->read_format &= ~PERF_FORMAT_GROUP; + member = perf_evsel__new(attr); + __T("failed to create member", member); + + member->leader = leader; + leader->nr_members = 2; + + /* skip old kernels that don't support the format */ + err = perf_evsel__open(leader, NULL, threads); + if (err < 0) + return 0; + err = perf_evsel__open(member, NULL, threads); + if (err < 0) + return 0; + + while (count--) ; + + memset(&counts, -1, sizeof(counts)); + perf_evsel__read(leader, 0, 0, &counts); + + __T("failed to read leader value", counts.val); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + __T("failed to read leader TOTAL_TIME_ENABLED", counts.ena); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + __T("failed to read leader TOTAL_TIME_RUNNING", counts.run); + if (attr->read_format & PERF_FORMAT_ID) + __T("failed to read leader ID", counts.id); + if (attr->read_format & PERF_FORMAT_LOST) + __T("failed to read leader LOST", counts.lost == 0); + + memset(&counts, -1, sizeof(counts)); + perf_evsel__read(member, 0, 0, &counts); + + __T("failed to read member value", counts.val); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + __T("failed to read member TOTAL_TIME_ENABLED", counts.ena); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + __T("failed to read member TOTAL_TIME_RUNNING", counts.run); + if (attr->read_format & PERF_FORMAT_ID) + __T("failed to read member ID", counts.id); + if (attr->read_format & PERF_FORMAT_LOST) + __T("failed to read member LOST", counts.lost == 0); + + perf_evsel__close(member); + perf_evsel__close(leader); + perf_evsel__delete(member); + perf_evsel__delete(leader); + return 0; +} + +static int test_stat_read_format(void) +{ + struct perf_thread_map *threads; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + }; + int err, i; + +#define FMT(_fmt) PERF_FORMAT_ ## _fmt +#define FMT_TIME (FMT(TOTAL_TIME_ENABLED) | FMT(TOTAL_TIME_RUNNING)) + + uint64_t test_formats [] = { + 0, + FMT_TIME, + FMT(ID), + FMT(LOST), + FMT_TIME | FMT(ID), + FMT_TIME | FMT(LOST), + FMT_TIME | FMT(ID) | FMT(LOST), + FMT(ID) | FMT(LOST), + }; + +#undef FMT +#undef FMT_TIME + + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) { + attr.read_format = test_formats[i]; + __T_VERBOSE("testing single read with read_format: %lx\n", + (unsigned long)test_formats[i]); + + err = test_stat_read_format_single(&attr, threads); + __T("failed to read single format", err == 0); + } + + perf_thread_map__put(threads); + + threads = perf_thread_map__new_array(2, NULL); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + perf_thread_map__set_pid(threads, 1, 0); + + for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) { + attr.read_format = test_formats[i]; + __T_VERBOSE("testing group read with read_format: %lx\n", + (unsigned long)test_formats[i]); + + err = test_stat_read_format_group(&attr, threads); + __T("failed to read group format", err == 0); + } + + perf_thread_map__put(threads); + return 0; +} + int test_evsel(int argc, char **argv) { __T_START; @@ -197,6 +360,7 @@ int test_evsel(int argc, char **argv) test_stat_thread_enable(); test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS); test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES); + test_stat_read_format(); __T_END; return tests_failed == 0 ? 0 : -1; diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 5e2a0291e94c..f728ad7002bb 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -11,9 +11,43 @@ static int libperf_print(enum libperf_print_level level, return vfprintf(stderr, fmt, ap); } +static int test_threadmap_array(int nr, pid_t *array) +{ + struct perf_thread_map *threads; + int i; + + threads = perf_thread_map__new_array(nr, array); + __T("Failed to allocate new thread map", threads); + + __T("Unexpected number of threads", perf_thread_map__nr(threads) == nr); + + for (i = 0; i < nr; i++) { + __T("Unexpected initial value of thread", + perf_thread_map__pid(threads, i) == (array ? array[i] : -1)); + } + + for (i = 1; i < nr; i++) + perf_thread_map__set_pid(threads, i, i * 100); + + __T("Unexpected value of thread 0", + perf_thread_map__pid(threads, 0) == (array ? array[0] : -1)); + + for (i = 1; i < nr; i++) { + __T("Unexpected thread value", + perf_thread_map__pid(threads, i) == i * 100); + } + + perf_thread_map__put(threads); + + return 0; +} + +#define THREADS_NR 10 int test_threadmap(int argc, char **argv) { struct perf_thread_map *threads; + pid_t thr_array[THREADS_NR]; + int i; __T_START; @@ -27,6 +61,13 @@ int test_threadmap(int argc, char **argv) perf_thread_map__put(threads); perf_thread_map__put(threads); + test_threadmap_array(THREADS_NR, NULL); + + for (i = 0; i < THREADS_NR; i++) + thr_array[i] = i + 100; + + test_threadmap_array(THREADS_NR, thr_array); + __T_END; return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c index e92c368b0a6c..07968f3ea093 100644 --- a/tools/lib/perf/threadmap.c +++ b/tools/lib/perf/threadmap.c @@ -32,28 +32,38 @@ struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, in #define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr) -void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid) +void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid) { - map->map[thread].pid = pid; + map->map[idx].pid = pid; } -char *perf_thread_map__comm(struct perf_thread_map *map, int thread) +char *perf_thread_map__comm(struct perf_thread_map *map, int idx) { - return map->map[thread].comm; + return map->map[idx].comm; } -struct perf_thread_map *perf_thread_map__new_dummy(void) +struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array) { - struct perf_thread_map *threads = thread_map__alloc(1); + struct perf_thread_map *threads = thread_map__alloc(nr_threads); + int i; + + if (!threads) + return NULL; + + for (i = 0; i < nr_threads; i++) + perf_thread_map__set_pid(threads, i, array ? array[i] : -1); + + threads->nr = nr_threads; + refcount_set(&threads->refcnt, 1); - if (threads != NULL) { - perf_thread_map__set_pid(threads, 0, -1); - threads->nr = 1; - refcount_set(&threads->refcnt, 1); - } return threads; } +struct perf_thread_map *perf_thread_map__new_dummy(void) +{ + return perf_thread_map__new_array(1, NULL); +} + static void perf_thread_map__delete(struct perf_thread_map *threads) { if (threads) { @@ -85,7 +95,7 @@ int perf_thread_map__nr(struct perf_thread_map *threads) return threads ? threads->nr : 1; } -pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread) +pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx) { - return map->map[thread].pid; + return map->map[idx].pid; } |