aboutsummaryrefslogtreecommitdiffstats
path: root/tools/lib/perf
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/lib/perf/Documentation/libperf.txt18
-rw-r--r--tools/lib/perf/Makefile2
-rw-r--r--tools/lib/perf/cpumap.c146
-rw-r--r--tools/lib/perf/evlist.c133
-rw-r--r--tools/lib/perf/evsel.c246
-rw-r--r--tools/lib/perf/include/internal/cpumap.h15
-rw-r--r--tools/lib/perf/include/internal/evlist.h16
-rw-r--r--tools/lib/perf/include/internal/evsel.h19
-rw-r--r--tools/lib/perf/include/internal/lib.h2
-rw-r--r--tools/lib/perf/include/internal/mmap.h5
-rw-r--r--tools/lib/perf/include/perf/cpumap.h14
-rw-r--r--tools/lib/perf/include/perf/event.h86
-rw-r--r--tools/lib/perf/include/perf/evsel.h19
-rw-r--r--tools/lib/perf/include/perf/threadmap.h7
-rw-r--r--tools/lib/perf/lib.c20
-rw-r--r--tools/lib/perf/libperf.map4
-rw-r--r--tools/lib/perf/mmap.c102
-rw-r--r--tools/lib/perf/tests/test-cpumap.c11
-rw-r--r--tools/lib/perf/tests/test-evlist.c169
-rw-r--r--tools/lib/perf/tests/test-evsel.c166
-rw-r--r--tools/lib/perf/tests/test-threadmap.c41
-rw-r--r--tools/lib/perf/threadmap.c36
22 files changed, 1058 insertions, 219 deletions
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 63ae5e0195ce..a8f1a237931b 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -48,6 +48,7 @@ SYNOPSIS
int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
bool perf_cpu_map__empty(const struct perf_cpu_map *map);
int perf_cpu_map__max(struct perf_cpu_map *map);
+ bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus)
--
@@ -61,11 +62,12 @@ SYNOPSIS
struct perf_thread_map;
struct perf_thread_map *perf_thread_map__new_dummy(void);
+ struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array);
- void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
- char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
+ void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid);
+ char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
int perf_thread_map__nr(struct perf_thread_map *threads);
- pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
+ pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
void perf_thread_map__put(struct perf_thread_map *map);
@@ -135,16 +137,16 @@ SYNOPSIS
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void perf_evsel__close(struct perf_evsel *evsel);
- void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+ void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
void perf_evsel__munmap(struct perf_evsel *evsel);
- void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
- int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+ void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
+ int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
int perf_evsel__enable(struct perf_evsel *evsel);
- int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
int perf_evsel__disable(struct perf_evsel *evsel);
- int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+ int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 08fe6e3c4089..21df023a2103 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -153,7 +153,7 @@ $(TESTS_STATIC): $(TESTS_IN) $(LIBPERF_A) $(LIBAPI)
$(QUIET_LINK)$(CC) -o $@ $^
$(TESTS_SHARED): $(TESTS_IN) $(LIBAPI)
- $(QUIET_LINK)$(CC) -o $@ -L$(if $(OUTPUT),$(OUTPUT),.) $^ -lperf
+ $(QUIET_LINK)$(CC) -o $@ -L$(or $(OUTPUT),.) $^ -lperf
make-tests: libs $(TESTS_SHARED) $(TESTS_STATIC)
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index adaad3dddf6e..6cd0be7c1bb4 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -10,15 +10,24 @@
#include <ctype.h>
#include <limits.h>
-struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
{
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
+ struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
if (cpus != NULL) {
- cpus->nr = 1;
- cpus->map[0] = -1;
+ cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
+
}
+ return cpus;
+}
+
+struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+{
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
+
+ if (cpus)
+ cpus->map[0].cpu = -1;
return cpus;
}
@@ -54,15 +63,12 @@ static struct perf_cpu_map *cpu_map__default_new(void)
if (nr_cpus < 0)
return NULL;
- cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
+ cpus = perf_cpu_map__alloc(nr_cpus);
if (cpus != NULL) {
int i;
for (i = 0; i < nr_cpus; ++i)
- cpus->map[i] = i;
-
- cpus->nr = nr_cpus;
- refcount_set(&cpus->refcnt, 1);
+ cpus->map[i].cpu = i;
}
return cpus;
@@ -73,31 +79,32 @@ struct perf_cpu_map *perf_cpu_map__default_new(void)
return cpu_map__default_new();
}
-static int cmp_int(const void *a, const void *b)
+
+static int cmp_cpu(const void *a, const void *b)
{
- return *(const int *)a - *(const int*)b;
+ const struct perf_cpu *cpu_a = a, *cpu_b = b;
+
+ return cpu_a->cpu - cpu_b->cpu;
}
-static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
+static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
{
- size_t payload_size = nr_cpus * sizeof(int);
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+ size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus);
int i, j;
if (cpus != NULL) {
memcpy(cpus->map, tmp_cpus, payload_size);
- qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
+ qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
/* Remove dups */
j = 0;
for (i = 0; i < nr_cpus; i++) {
- if (i == 0 || cpus->map[i] != cpus->map[i - 1])
- cpus->map[j++] = cpus->map[i];
+ if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu)
+ cpus->map[j++].cpu = cpus->map[i].cpu;
}
cpus->nr = j;
assert(j <= nr_cpus);
- refcount_set(&cpus->refcnt, 1);
}
-
return cpus;
}
@@ -105,7 +112,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
{
struct perf_cpu_map *cpus = NULL;
int nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
+ struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
int n, cpu, prev;
char sep;
@@ -124,24 +131,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
if (new_max >= max_entries) {
max_entries = new_max + MAX_NR_CPUS / 2;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
while (++prev < cpu)
- tmp_cpus[nr_cpus++] = prev;
+ tmp_cpus[nr_cpus++].cpu = prev;
}
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
- tmp_cpus[nr_cpus++] = cpu;
+ tmp_cpus[nr_cpus++].cpu = cpu;
if (n == 2 && sep == '-')
prev = cpu;
else
@@ -179,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
unsigned long start_cpu, end_cpu = 0;
char *p = NULL;
int i, nr_cpus = 0;
- int *tmp_cpus = NULL, *tmp;
+ struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
if (!cpu_list)
@@ -220,17 +227,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
for (; start_cpu <= end_cpu; start_cpu++) {
/* check for duplicates */
for (i = 0; i < nr_cpus; i++)
- if (tmp_cpus[i] == (int)start_cpu)
+ if (tmp_cpus[i].cpu == (int)start_cpu)
goto invalid;
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto invalid;
tmp_cpus = tmp;
}
- tmp_cpus[nr_cpus++] = (int)start_cpu;
+ tmp_cpus[nr_cpus++].cpu = (int)start_cpu;
}
if (*p)
++p;
@@ -250,12 +257,16 @@ out:
return cpus;
}
-int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
+ struct perf_cpu result = {
+ .cpu = -1
+ };
+
if (cpus && idx < cpus->nr)
return cpus->map[idx];
- return -1;
+ return result;
}
int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
@@ -265,21 +276,26 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
bool perf_cpu_map__empty(const struct perf_cpu_map *map)
{
- return map ? map->map[0] == -1 : true;
+ return map ? map->map[0].cpu == -1 : true;
}
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
+int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
- int low = 0, high = cpus->nr;
+ int low, high;
+ if (!cpus)
+ return -1;
+
+ low = 0;
+ high = cpus->nr;
while (low < high) {
- int idx = (low + high) / 2,
- cpu_at_idx = cpus->map[idx];
+ int idx = (low + high) / 2;
+ struct perf_cpu cpu_at_idx = cpus->map[idx];
- if (cpu_at_idx == cpu)
+ if (cpu_at_idx.cpu == cpu.cpu)
return idx;
- if (cpu_at_idx > cpu)
+ if (cpu_at_idx.cpu > cpu.cpu)
high = idx;
else
low = idx + 1;
@@ -288,10 +304,39 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
return -1;
}
-int perf_cpu_map__max(struct perf_cpu_map *map)
+bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
+{
+ return perf_cpu_map__idx(cpus, cpu) != -1;
+}
+
+struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
{
+ struct perf_cpu result = {
+ .cpu = -1
+ };
+
// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
- return map->nr > 0 ? map->map[map->nr - 1] : -1;
+ return map->nr > 0 ? map->map[map->nr - 1] : result;
+}
+
+/** Is 'b' a subset of 'a'. */
+bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b)
+{
+ if (a == b || !b)
+ return true;
+ if (!a || b->nr > a->nr)
+ return false;
+
+ for (int i = 0, j = 0; i < a->nr; i++) {
+ if (a->map[i].cpu > b->map[j].cpu)
+ return false;
+ if (a->map[i].cpu == b->map[j].cpu) {
+ j++;
+ if (j == b->nr)
+ return true;
+ }
+ }
+ return false;
}
/*
@@ -305,33 +350,28 @@ int perf_cpu_map__max(struct perf_cpu_map *map)
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other)
{
- int *tmp_cpus;
+ struct perf_cpu *tmp_cpus;
int tmp_len;
int i, j, k;
struct perf_cpu_map *merged;
- if (!orig && !other)
- return NULL;
- if (!orig) {
- perf_cpu_map__get(other);
- return other;
- }
- if (!other)
- return orig;
- if (orig->nr == other->nr &&
- !memcmp(orig->map, other->map, orig->nr * sizeof(int)))
+ if (perf_cpu_map__is_subset(orig, other))
return orig;
+ if (perf_cpu_map__is_subset(other, orig)) {
+ perf_cpu_map__put(orig);
+ return perf_cpu_map__get(other);
+ }
tmp_len = orig->nr + other->nr;
- tmp_cpus = malloc(tmp_len * sizeof(int));
+ tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
if (!tmp_cpus)
return NULL;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
while (i < orig->nr && j < other->nr) {
- if (orig->map[i] <= other->map[j]) {
- if (orig->map[i] == other->map[j])
+ if (orig->map[i].cpu <= other->map[j].cpu) {
+ if (orig->map[i].cpu == other->map[j].cpu)
j++;
tmp_cpus[k++] = orig->map[i++];
} else
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index e37dfad31383..61b637f29b82 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -23,6 +23,7 @@
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <api/fd/array.h>
+#include "internal.h"
void perf_evlist__init(struct perf_evlist *evlist)
{
@@ -39,19 +40,26 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
* We already have cpus for evsel (via PMU sysfs) so
* keep it, if there's no target cpu list defined.
*/
- if (!evsel->own_cpus || evlist->has_user_cpus) {
+ if (evsel->system_wide) {
perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evlist->cpus);
- } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) {
+ evsel->cpus = perf_cpu_map__new(NULL);
+ } else if (!evsel->own_cpus || evlist->has_user_cpus ||
+ (!evsel->requires_cpu && perf_cpu_map__empty(evlist->user_requested_cpus))) {
perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evlist->cpus);
+ evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (evsel->cpus != evsel->own_cpus) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
}
- perf_thread_map__put(evsel->threads);
- evsel->threads = perf_thread_map__get(evlist->threads);
+ if (evsel->system_wide) {
+ perf_thread_map__put(evsel->threads);
+ evsel->threads = perf_thread_map__new_dummy();
+ } else {
+ perf_thread_map__put(evsel->threads);
+ evsel->threads = perf_thread_map__get(evlist->threads);
+ }
+
evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
}
@@ -59,6 +67,8 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
+ evlist->needs_map_propagation = true;
+
perf_evlist__for_each_evsel(evlist, evsel)
__perf_evlist__propagate_maps(evlist, evsel);
}
@@ -69,7 +79,9 @@ void perf_evlist__add(struct perf_evlist *evlist,
evsel->idx = evlist->nr_entries;
list_add_tail(&evsel->node, &evlist->entries);
evlist->nr_entries += 1;
- __perf_evlist__propagate_maps(evlist, evsel);
+
+ if (evlist->needs_map_propagation)
+ __perf_evlist__propagate_maps(evlist, evsel);
}
void perf_evlist__remove(struct perf_evlist *evlist,
@@ -123,10 +135,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
- perf_cpu_map__put(evlist->cpus);
+ perf_cpu_map__put(evlist->user_requested_cpus);
perf_cpu_map__put(evlist->all_cpus);
perf_thread_map__put(evlist->threads);
- evlist->cpus = NULL;
+ evlist->user_requested_cpus = NULL;
evlist->all_cpus = NULL;
evlist->threads = NULL;
fdarray__exit(&evlist->pollfd);
@@ -155,9 +167,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist,
* original reference count of 1. If that is not the case it is up to
* the caller to increase the reference count.
*/
- if (cpus != evlist->cpus) {
- perf_cpu_map__put(evlist->cpus);
- evlist->cpus = perf_cpu_map__get(cpus);
+ if (cpus != evlist->user_requested_cpus) {
+ perf_cpu_map__put(evlist->user_requested_cpus);
+ evlist->user_requested_cpus = perf_cpu_map__get(cpus);
}
if (threads != evlist->threads) {
@@ -165,9 +177,6 @@ void perf_evlist__set_maps(struct perf_evlist *evlist,
evlist->threads = perf_thread_map__get(threads);
}
- if (!evlist->all_cpus && cpus)
- evlist->all_cpus = perf_cpu_map__get(cpus);
-
perf_evlist__propagate_maps(evlist);
}
@@ -294,7 +303,7 @@ add:
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
- int nr_cpus = perf_cpu_map__nr(evlist->cpus);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
int nr_threads = perf_thread_map__nr(evlist->threads);
int nfds = 0;
struct perf_evsel *evsel;
@@ -407,7 +416,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int output, int cpu)
+ int output, struct perf_cpu cpu)
{
return perf_mmap__mmap(map, mp, output, cpu);
}
@@ -424,14 +433,15 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_
static int
mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
- int thread, int *_output, int *_output_overwrite)
+ int thread, int *_output, int *_output_overwrite, int *nr_mmaps)
{
- int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
+ struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;
perf_evlist__for_each_entry(evlist, evsel) {
bool overwrite = evsel->attr.write_backward;
+ enum fdarray_flags flgs;
struct perf_mmap *map;
int *output, fd, cpu;
@@ -474,12 +484,21 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
*/
refcount_set(&map->refcnt, 2);
+ if (ops->idx)
+ ops->idx(evlist, evsel, mp, idx);
+
+ /* Debug message used by test scripts */
+ pr_debug("idx %d: mmapping fd %d\n", idx, *output);
if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
return -1;
+ *nr_mmaps += 1;
+
if (!idx)
perf_evlist__set_mmap_first(evlist, map, overwrite);
} else {
+ /* Debug message used by test scripts */
+ pr_debug("idx %d: set output fd %d -> %d\n", idx, fd, *output);
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
return -1;
@@ -488,8 +507,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
revent = !overwrite ? POLLIN : 0;
- if (!evsel->system_wide &&
- perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) {
+ flgs = evsel->system_wide ? fdarray_flag__nonfilterable : fdarray_flag__default;
+ if (perf_evlist__add_pollfd(evlist, fd, map, revent, flgs) < 0) {
perf_mmap__put(map);
return -1;
}
@@ -509,21 +528,37 @@ static int
mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
- int thread;
int nr_threads = perf_thread_map__nr(evlist->threads);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
+ int cpu, thread, idx = 0;
+ int nr_mmaps = 0;
- for (thread = 0; thread < nr_threads; thread++) {
+ pr_debug("%s: nr cpu values (may include -1) %d nr threads %d\n",
+ __func__, nr_cpus, nr_threads);
+
+ /* per-thread mmaps */
+ for (thread = 0; thread < nr_threads; thread++, idx++) {
int output = -1;
int output_overwrite = -1;
- if (ops->idx)
- ops->idx(evlist, mp, thread, false);
+ if (mmap_per_evsel(evlist, ops, idx, mp, 0, thread, &output,
+ &output_overwrite, &nr_mmaps))
+ goto out_unmap;
+ }
- if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
- &output, &output_overwrite))
+ /* system-wide mmaps i.e. per-cpu */
+ for (cpu = 1; cpu < nr_cpus; cpu++, idx++) {
+ int output = -1;
+ int output_overwrite = -1;
+
+ if (mmap_per_evsel(evlist, ops, idx, mp, cpu, 0, &output,
+ &output_overwrite, &nr_mmaps))
goto out_unmap;
}
+ if (nr_mmaps != evlist->nr_mmaps)
+ pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);
+
return 0;
out_unmap:
@@ -536,23 +571,26 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int nr_threads = perf_thread_map__nr(evlist->threads);
- int nr_cpus = perf_cpu_map__nr(evlist->cpus);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
+ int nr_mmaps = 0;
int cpu, thread;
+ pr_debug("%s: nr cpu values %d nr threads %d\n", __func__, nr_cpus, nr_threads);
+
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
int output_overwrite = -1;
- if (ops->idx)
- ops->idx(evlist, mp, cpu, true);
-
for (thread = 0; thread < nr_threads; thread++) {
if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
- thread, &output, &output_overwrite))
+ thread, &output, &output_overwrite, &nr_mmaps))
goto out_unmap;
}
}
+ if (nr_mmaps != evlist->nr_mmaps)
+ pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);
+
return 0;
out_unmap:
@@ -564,9 +602,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
{
int nr_mmaps;
- nr_mmaps = perf_cpu_map__nr(evlist->cpus);
- if (perf_cpu_map__empty(evlist->cpus))
- nr_mmaps = perf_thread_map__nr(evlist->threads);
+ /* One for each CPU */
+ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus);
+ if (perf_cpu_map__empty(evlist->all_cpus)) {
+ /* Plus one for each thread */
+ nr_mmaps += perf_thread_map__nr(evlist->threads);
+ /* Minus the per-thread CPU (-1) */
+ nr_mmaps -= 1;
+ }
return nr_mmaps;
}
@@ -575,9 +618,8 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
+ const struct perf_cpu_map *cpus = evlist->all_cpus;
struct perf_evsel *evsel;
- const struct perf_cpu_map *cpus = evlist->cpus;
- const struct perf_thread_map *threads = evlist->threads;
if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
@@ -589,7 +631,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
perf_evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
evsel->sample_id == NULL &&
- perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
+ perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
return -ENOMEM;
}
@@ -643,14 +685,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
}
-void __perf_evlist__set_leader(struct list_head *list)
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)
{
- struct perf_evsel *evsel, *leader;
+ struct perf_evsel *first, *last, *evsel;
- leader = list_entry(list->next, struct perf_evsel, node);
- evsel = list_entry(list->prev, struct perf_evsel, node);
+ first = list_first_entry(list, struct perf_evsel, node);
+ last = list_last_entry(list, struct perf_evsel, node);
- leader->nr_members = evsel->idx - leader->idx + 1;
+ leader->nr_members = last->idx - first->idx + 1;
__perf_evlist__for_each_entry(list, evsel)
evsel->leader = leader;
@@ -659,7 +701,10 @@ void __perf_evlist__set_leader(struct list_head *list)
void perf_evlist__set_leader(struct perf_evlist *evlist)
{
if (evlist->nr_entries) {
+ struct perf_evsel *first = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
- __perf_evlist__set_leader(&evlist->entries);
+ __perf_evlist__set_leader(&evlist->entries, first);
}
}
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 8441e3e1aaac..8b51b008a81f 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel)
free(evsel);
}
-#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y))
-#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL)
+#define FD(_evsel, _cpu_map_idx, _thread) \
+ ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread))
+#define MMAP(_evsel, _cpu_map_idx, _thread) \
+ (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \
+ : NULL)
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
- int cpu, thread;
- for (cpu = 0; cpu < ncpus; cpu++) {
+ int idx, thread;
+
+ for (idx = 0; idx < ncpus; idx++) {
for (thread = 0; thread < nthreads; thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
if (fd)
*fd = -1;
@@ -74,13 +78,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre
static int
sys_perf_event_open(struct perf_event_attr *attr,
- pid_t pid, int cpu, int group_fd,
+ pid_t pid, struct perf_cpu cpu, int group_fd,
unsigned long flags)
{
- return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+ return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags);
}
-static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd)
+static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd)
{
struct perf_evsel *leader = evsel->leader;
int *fd;
@@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
if (!leader->fd)
return -ENOTCONN;
- fd = FD(leader, cpu, thread);
+ fd = FD(leader, cpu_map_idx, thread);
if (fd == NULL || *fd == -1)
return -EBADF;
@@ -109,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- int cpu, thread, err = 0;
+ struct perf_cpu cpu;
+ int idx, thread, err = 0;
if (cpus == NULL) {
static struct perf_cpu_map *empty_cpu_map;
@@ -136,41 +141,48 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
}
if (evsel->fd == NULL &&
- perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
+ perf_evsel__alloc_fd(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
return -ENOMEM;
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
for (thread = 0; thread < threads->nr; thread++) {
int fd, group_fd, *evsel_fd;
- evsel_fd = FD(evsel, cpu, thread);
- if (evsel_fd == NULL)
- return -EINVAL;
+ evsel_fd = FD(evsel, idx, thread);
+ if (evsel_fd == NULL) {
+ err = -EINVAL;
+ goto out;
+ }
- err = get_group_fd(evsel, cpu, thread, &group_fd);
+ err = get_group_fd(evsel, idx, thread, &group_fd);
if (err < 0)
- return err;
+ goto out;
fd = sys_perf_event_open(&evsel->attr,
threads->map[thread].pid,
- cpus->map[cpu], group_fd, 0);
+ cpu, group_fd, 0);
- if (fd < 0)
- return -errno;
+ if (fd < 0) {
+ err = -errno;
+ goto out;
+ }
*evsel_fd = fd;
}
}
+out:
+ if (err)
+ perf_evsel__close(evsel);
return err;
}
-static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
if (fd && *fd >= 0) {
close(*fd);
@@ -181,10 +193,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
void perf_evsel__close_fd(struct perf_evsel *evsel)
{
- int cpu;
-
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
- perf_evsel__close_fd_cpu(evsel, cpu);
+ for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++)
+ perf_evsel__close_fd_cpu(evsel, idx);
}
void perf_evsel__free_fd(struct perf_evsel *evsel)
@@ -202,29 +212,29 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
}
-void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
if (evsel->fd == NULL)
return;
- perf_evsel__close_fd_cpu(evsel, cpu);
+ perf_evsel__close_fd_cpu(evsel, cpu_map_idx);
}
void perf_evsel__munmap(struct perf_evsel *evsel)
{
- int cpu, thread;
+ int idx, thread;
if (evsel->fd == NULL || evsel->mmap == NULL)
return;
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
if (fd == NULL || *fd < 0)
continue;
- perf_mmap__munmap(MMAP(evsel, cpu, thread));
+ perf_mmap__munmap(MMAP(evsel, idx, thread));
}
}
@@ -234,7 +244,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel)
int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
{
- int ret, cpu, thread;
+ int ret, idx, thread;
struct perf_mmap_param mp = {
.prot = PROT_READ | PROT_WRITE,
.mask = (pages * page_size) - 1,
@@ -246,15 +256,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)
return -ENOMEM;
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
struct perf_mmap *map;
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx);
if (fd == NULL || *fd < 0)
continue;
- map = MMAP(evsel, cpu, thread);
+ map = MMAP(evsel, idx, thread);
perf_mmap__init(map, NULL, false, NULL);
ret = perf_mmap__mmap(map, &mp, *fd, cpu);
@@ -268,14 +279,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
return 0;
}
-void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread)
+void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread)
{
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
- if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL)
+ if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL)
return NULL;
- return MMAP(evsel, cpu, thread)->base;
+ return MMAP(evsel, cpu_map_idx, thread)->base;
}
int perf_evsel__read_size(struct perf_evsel *evsel)
@@ -294,6 +305,9 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
if (read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
+ if (read_format & PERF_FORMAT_LOST)
+ entry += sizeof(u64);
+
if (read_format & PERF_FORMAT_GROUP) {
nr = evsel->nr_members;
size += sizeof(u64);
@@ -303,41 +317,120 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
return size;
}
-int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+/* This only reads values for the leader */
+static int perf_evsel__read_group(struct perf_evsel *evsel, int cpu_map_idx,
+ int thread, struct perf_counts_values *count)
+{
+ size_t size = perf_evsel__read_size(evsel);
+ int *fd = FD(evsel, cpu_map_idx, thread);
+ u64 read_format = evsel->attr.read_format;
+ u64 *data;
+ int idx = 1;
+
+ if (fd == NULL || *fd < 0)
+ return -EINVAL;
+
+ data = calloc(1, size);
+ if (data == NULL)
+ return -ENOMEM;
+
+ if (readn(*fd, data, size) <= 0) {
+ free(data);
+ return -errno;
+ }
+
+ /*
+ * This reads only the leader event intentionally since we don't have
+ * perf counts values for sibling events.
+ */
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ count->ena = data[idx++];
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ count->run = data[idx++];
+
+ /* value is always available */
+ count->val = data[idx++];
+ if (read_format & PERF_FORMAT_ID)
+ count->id = data[idx++];
+ if (read_format & PERF_FORMAT_LOST)
+ count->lost = data[idx++];
+
+ free(data);
+ return 0;
+}
+
+/*
+ * The perf read format is very flexible. It needs to set the proper
+ * values according to the read format.
+ */
+static void perf_evsel__adjust_values(struct perf_evsel *evsel, u64 *buf,
+ struct perf_counts_values *count)
+{
+ u64 read_format = evsel->attr.read_format;
+ int n = 0;
+
+ count->val = buf[n++];
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ count->ena = buf[n++];
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ count->run = buf[n++];
+
+ if (read_format & PERF_FORMAT_ID)
+ count->id = buf[n++];
+
+ if (read_format & PERF_FORMAT_LOST)
+ count->lost = buf[n++];
+}
+
+int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
size_t size = perf_evsel__read_size(evsel);
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
+ u64 read_format = evsel->attr.read_format;
+ struct perf_counts_values buf;
memset(count, 0, sizeof(*count));
if (fd == NULL || *fd < 0)
return -EINVAL;
- if (MMAP(evsel, cpu, thread) &&
- !perf_mmap__read_self(MMAP(evsel, cpu, thread), count))
+ if (read_format & PERF_FORMAT_GROUP)
+ return perf_evsel__read_group(evsel, cpu_map_idx, thread, count);
+
+ if (MMAP(evsel, cpu_map_idx, thread) &&
+ !(read_format & (PERF_FORMAT_ID | PERF_FORMAT_LOST)) &&
+ !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count))
return 0;
- if (readn(*fd, count->values, size) <= 0)
+ if (readn(*fd, buf.values, size) <= 0)
return -errno;
+ perf_evsel__adjust_values(evsel, buf.values, count);
return 0;
}
+static int perf_evsel__ioctl(struct perf_evsel *evsel, int ioc, void *arg,
+ int cpu_map_idx, int thread)
+{
+ int *fd = FD(evsel, cpu_map_idx, thread);
+
+ if (fd == NULL || *fd < 0)
+ return -1;
+
+ return ioctl(*fd, ioc, arg);
+}
+
static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
int ioc, void *arg,
- int cpu)
+ int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int err;
- int *fd = FD(evsel, cpu, thread);
-
- if (fd == NULL || *fd < 0)
- return -1;
-
- err = ioctl(*fd, ioc, arg);
+ int err = perf_evsel__ioctl(evsel, ioc, arg, cpu_map_idx, thread);
if (err)
return err;
@@ -346,9 +439,24 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
return 0;
}
-int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx);
+}
+
+int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread)
+{
+ struct perf_cpu cpu __maybe_unused;
+ int idx;
+ int err;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel->cpus) {
+ err = perf_evsel__ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, idx, thread);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
int perf_evsel__enable(struct perf_evsel *evsel)
@@ -361,9 +469,9 @@ int perf_evsel__enable(struct perf_evsel *evsel)
return err;
}
-int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx);
}
int perf_evsel__disable(struct perf_evsel *evsel)
@@ -380,7 +488,7 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
{
int err = 0, i;
- for (i = 0; i < evsel->cpus->nr && !err; i++)
+ for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++)
err = perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_SET_FILTER,
(void *)filter, i);
@@ -407,9 +515,6 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
if (ncpus == 0 || nthreads == 0)
return 0;
- if (evsel->system_wide)
- nthreads = 1;
-
evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
if (evsel->sample_id == NULL)
return -ENOMEM;
@@ -431,3 +536,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
evsel->ids = 0;
}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled)
+{
+ s8 scaled = 0;
+
+ if (scale) {
+ if (count->run == 0) {
+ scaled = -1;
+ count->val = 0;
+ } else if (count->run < count->ena) {
+ scaled = 1;
+ count->val = (u64)((double)count->val * count->ena / count->run);
+ }
+ }
+
+ if (pscaled)
+ *pscaled = scaled;
+}
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
index 840d4032587b..35dd29642296 100644
--- a/tools/lib/perf/include/internal/cpumap.h
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -3,17 +3,28 @@
#define __LIBPERF_INTERNAL_CPUMAP_H
#include <linux/refcount.h>
+#include <perf/cpumap.h>
+/**
+ * A sized, reference counted, sorted array of integers representing CPU
+ * numbers. This is commonly used to capture which CPUs a PMU is associated
+ * with. The indices into the cpumap are frequently used as they avoid having
+ * gaps if CPU numbers were used. For events associated with a pid, rather than
+ * a CPU, a single dummy map with an entry of -1 is used.
+ */
struct perf_cpu_map {
refcount_t refcnt;
+ /** Length of the map array. */
int nr;
- int map[];
+ /** The CPU values. */
+ struct perf_cpu map[];
};
#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 2048
#endif
-int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu);
+int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
+bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index f366dbad6a88..850f07070036 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <api/fd/array.h>
+#include <internal/cpumap.h>
#include <internal/evsel.h>
#define PERF_EVLIST__HLIST_BITS 8
@@ -18,7 +19,13 @@ struct perf_evlist {
int nr_entries;
int nr_groups;
bool has_user_cpus;
- struct perf_cpu_map *cpus;
+ bool needs_map_propagation;
+ /**
+ * The cpus passed from the command line or all online CPUs by
+ * default.
+ */
+ struct perf_cpu_map *user_requested_cpus;
+ /** The union of all evsel cpu maps. */
struct perf_cpu_map *all_cpus;
struct perf_thread_map *threads;
int nr_mmaps;
@@ -32,11 +39,12 @@ struct perf_evlist {
};
typedef void
-(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool);
+(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_evsel*,
+ struct perf_mmap_param*, int);
typedef struct perf_mmap*
(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
typedef int
-(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int);
+(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu);
struct perf_evlist_mmap_ops {
perf_evlist_mmap__cb_idx_t idx;
@@ -127,5 +135,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
-void __perf_evlist__set_leader(struct list_head *list);
+void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader);
#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index 1f3eacbad2e8..a99a75d9e78f 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -6,8 +6,8 @@
#include <linux/perf_event.h>
#include <stdbool.h>
#include <sys/types.h>
+#include <internal/cpumap.h>
-struct perf_cpu_map;
struct perf_thread_map;
struct xyarray;
@@ -27,9 +27,13 @@ struct perf_sample_id {
* queue number.
*/
int idx;
- int cpu;
+ struct perf_cpu cpu;
pid_t tid;
+ /* Guest machine pid and VCPU, valid only if machine_pid is non-zero */
+ pid_t machine_pid;
+ struct perf_cpu vcpu;
+
/* Holds total ID period value for PERF_SAMPLE_READ processing. */
u64 period;
};
@@ -49,7 +53,18 @@ struct perf_evsel {
/* parse modifier helper */
int nr_members;
+ /*
+ * system_wide is for events that need to be on every CPU, irrespective
+ * of user requested CPUs or threads. Map propagation will set cpus to
+ * this event's own_cpus, whereby they will contribute to evlist
+ * all_cpus.
+ */
bool system_wide;
+ /*
+ * Some events, for example uncore events, require a CPU.
+ * i.e. it cannot be the 'any CPU' value of -1.
+ */
+ bool requires_cpu;
int idx;
};
diff --git a/tools/lib/perf/include/internal/lib.h b/tools/lib/perf/include/internal/lib.h
index 5175d491b2d4..85471a4b900f 100644
--- a/tools/lib/perf/include/internal/lib.h
+++ b/tools/lib/perf/include/internal/lib.h
@@ -9,4 +9,6 @@ extern unsigned int page_size;
ssize_t readn(int fd, void *buf, size_t n);
ssize_t writen(int fd, const void *buf, size_t n);
+ssize_t preadn(int fd, void *buf, size_t n, off_t offs);
+
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h
index 5e3422f40ed5..5a062af8e9d8 100644
--- a/tools/lib/perf/include/internal/mmap.h
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -6,6 +6,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <stdbool.h>
+#include <internal/cpumap.h>
/* perf sample has 16 bits size limit */
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
@@ -24,7 +25,7 @@ struct perf_mmap {
void *base;
int mask;
int fd;
- int cpu;
+ struct perf_cpu cpu;
refcount_t refcnt;
u64 prev;
u64 start;
@@ -46,7 +47,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map);
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
bool overwrite, libperf_unmap_cb_t unmap_cb);
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu);
+ int fd, struct perf_cpu cpu);
void perf_mmap__munmap(struct perf_mmap *map);
void perf_mmap__get(struct perf_mmap *map);
void perf_mmap__put(struct perf_mmap *map);
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 7c27766ea0bf..03aceb72a783 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -3,10 +3,14 @@
#define __LIBPERF_CPUMAP_H
#include <perf/core.h>
+#include <perf/cpumap.h>
#include <stdio.h>
#include <stdbool.h>
-struct perf_cpu_map;
+/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
+struct perf_cpu {
+ int cpu;
+};
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
@@ -16,14 +20,18 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
-LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
+LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
+LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
(idx) < perf_cpu_map__nr(cpus); \
(idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))
+#define perf_cpu_map__for_each_idx(idx, cpus) \
+ for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++)
+
#endif /* __LIBPERF_CPUMAP_H */
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 75ee385fb078..ad47d7b31046 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -76,7 +76,7 @@ struct perf_record_lost_samples {
};
/*
- * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
+ * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST
*/
struct perf_record_read {
struct perf_event_header header;
@@ -85,6 +85,7 @@ struct perf_record_read {
__u64 time_enabled;
__u64 time_running;
__u64 id;
+ __u64 lost;
};
struct perf_record_throttle {
@@ -95,7 +96,7 @@ struct perf_record_throttle {
};
#ifndef KSYM_NAME_LEN
-#define KSYM_NAME_LEN 256
+#define KSYM_NAME_LEN 512
#endif
struct perf_record_ksymbol {
@@ -151,23 +152,75 @@ struct perf_record_header_attr {
enum {
PERF_CPU_MAP__CPUS = 0,
PERF_CPU_MAP__MASK = 1,
+ PERF_CPU_MAP__RANGE_CPUS = 2,
};
+/*
+ * Array encoding of a perf_cpu_map where nr is the number of entries in cpu[]
+ * and each entry is a value for a CPU in the map.
+ */
struct cpu_map_entries {
__u16 nr;
__u16 cpu[];
};
-struct perf_record_record_cpu_map {
+/* Bitmap encoding of a perf_cpu_map where bitmap entries are 32-bit. */
+struct perf_record_mask_cpu_map32 {
+ /* Number of mask values. */
+ __u16 nr;
+ /* Constant 4. */
+ __u16 long_size;
+ /* Bitmap data. */
+ __u32 mask[];
+};
+
+/* Bitmap encoding of a perf_cpu_map where bitmap entries are 64-bit. */
+struct perf_record_mask_cpu_map64 {
+ /* Number of mask values. */
__u16 nr;
+ /* Constant 8. */
__u16 long_size;
- unsigned long mask[];
+ /* Legacy padding. */
+ char __pad[4];
+ /* Bitmap data. */
+ __u64 mask[];
+};
+
+/*
+ * 'struct perf_record_cpu_map_data' is packed as unfortunately an earlier
+ * version had unaligned data and we wish to retain file format compatibility.
+ * -irogers
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+
+/*
+ * An encoding of a CPU map for a range starting at start_cpu through to
+ * end_cpu. If any_cpu is 1, an any CPU (-1) value (aka dummy value) is present.
+ */
+struct perf_record_range_cpu_map {
+ __u8 any_cpu;
+ __u8 __pad;
+ __u16 start_cpu;
+ __u16 end_cpu;
};
struct perf_record_cpu_map_data {
__u16 type;
- char data[];
-};
+ union {
+ /* Used when type == PERF_CPU_MAP__CPUS. */
+ struct cpu_map_entries cpus_data;
+ /* Used when type == PERF_CPU_MAP__MASK and long_size == 4. */
+ struct perf_record_mask_cpu_map32 mask32_data;
+ /* Used when type == PERF_CPU_MAP__MASK and long_size == 8. */
+ struct perf_record_mask_cpu_map64 mask64_data;
+ /* Used when type == PERF_CPU_MAP__RANGE_CPUS. */
+ struct perf_record_range_cpu_map range_cpu_data;
+ };
+} __attribute__((packed));
+
+#pragma GCC diagnostic pop
struct perf_record_cpu_map {
struct perf_event_header header;
@@ -193,7 +246,16 @@ struct perf_record_event_update {
struct perf_event_header header;
__u64 type;
__u64 id;
- char data[];
+ union {
+ /* Used when type == PERF_EVENT_UPDATE__SCALE. */
+ struct perf_record_event_update_scale scale;
+ /* Used when type == PERF_EVENT_UPDATE__UNIT. */
+ char unit[0];
+ /* Used when type == PERF_EVENT_UPDATE__NAME. */
+ char name[0];
+ /* Used when type == PERF_EVENT_UPDATE__CPUS. */
+ struct perf_record_event_update_cpus cpus;
+ };
};
#define MAX_EVENT_NAME 64
@@ -237,10 +299,15 @@ struct id_index_entry {
__u64 tid;
};
+struct id_index_entry_2 {
+ __u64 machine_pid;
+ __u64 vcpu;
+};
+
struct perf_record_id_index {
struct perf_event_header header;
__u64 nr;
- struct id_index_entry entries[0];
+ struct id_index_entry entries[];
};
struct perf_record_auxtrace_info {
@@ -274,6 +341,8 @@ struct perf_record_auxtrace_error {
__u64 ip;
__u64 time;
char msg[MAX_AUXTRACE_ERROR_MSG];
+ __u32 machine_pid;
+ __u32 vcpu;
};
struct perf_record_aux {
@@ -389,6 +458,7 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_TIME_CONV = 79,
PERF_RECORD_HEADER_FEATURE = 80,
PERF_RECORD_COMPRESSED = 81,
+ PERF_RECORD_FINISHED_INIT = 82,
PERF_RECORD_HEADER_MAX
};
diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h
index 60eae25076d3..6f92204075c2 100644
--- a/tools/lib/perf/include/perf/evsel.h
+++ b/tools/lib/perf/include/perf/evsel.h
@@ -4,6 +4,8 @@
#include <stdint.h>
#include <perf/core.h>
+#include <stdbool.h>
+#include <linux/types.h>
struct perf_evsel;
struct perf_event_attr;
@@ -16,8 +18,10 @@ struct perf_counts_values {
uint64_t val;
uint64_t ena;
uint64_t run;
+ uint64_t id;
+ uint64_t lost;
};
- uint64_t values[3];
+ uint64_t values[5];
};
};
@@ -26,18 +30,21 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
-LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel);
-LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
-LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
+LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
+LIBPERF_API int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
-LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
+LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled);
#endif /* __LIBPERF_EVSEL_H */
diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h
index a7c50de8d010..8b40e7777cea 100644
--- a/tools/lib/perf/include/perf/threadmap.h
+++ b/tools/lib/perf/include/perf/threadmap.h
@@ -8,11 +8,12 @@
struct perf_thread_map;
LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void);
+LIBPERF_API struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array);
-LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
-LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
+LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid);
+LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads);
-LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
+LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map);
diff --git a/tools/lib/perf/lib.c b/tools/lib/perf/lib.c
index 18658931fc71..696fb0ea67c6 100644
--- a/tools/lib/perf/lib.c
+++ b/tools/lib/perf/lib.c
@@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n)
return ion(true, fd, buf, n);
}
+ssize_t preadn(int fd, void *buf, size_t n, off_t offs)
+{
+ size_t left = n;
+
+ while (left) {
+ ssize_t ret = pread(fd, buf, left, offs);
+
+ if (ret < 0 && errno == EINTR)
+ continue;
+ if (ret <= 0)
+ return ret;
+
+ left -= ret;
+ buf += ret;
+ offs += ret;
+ }
+
+ return n;
+}
+
/*
* Write exactly 'n' bytes or return an error.
*/
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 71468606e8a7..190b56ae923a 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -2,6 +2,7 @@ LIBPERF_0.0.1 {
global:
libperf_init;
perf_cpu_map__dummy_new;
+ perf_cpu_map__default_new;
perf_cpu_map__get;
perf_cpu_map__put;
perf_cpu_map__new;
@@ -10,6 +11,8 @@ LIBPERF_0.0.1 {
perf_cpu_map__cpu;
perf_cpu_map__empty;
perf_cpu_map__max;
+ perf_cpu_map__has;
+ perf_thread_map__new_array;
perf_thread_map__new_dummy;
perf_thread_map__set_pid;
perf_thread_map__comm;
@@ -50,6 +53,7 @@ LIBPERF_0.0.1 {
perf_mmap__read_init;
perf_mmap__read_done;
perf_mmap__read_event;
+ perf_counts_values__scale;
local:
*;
};
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index c89dfa5f67b3..0d1634cedf44 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -13,6 +13,7 @@
#include <internal/lib.h>
#include <linux/kernel.h>
#include <linux/math64.h>
+#include <linux/stringify.h>
#include "internal.h"
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
@@ -32,7 +33,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
}
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
- int fd, int cpu)
+ int fd, struct perf_cpu cpu)
{
map->prev = 0;
map->mask = mp->mask;
@@ -294,6 +295,103 @@ static u64 read_timestamp(void)
return low | ((u64)high) << 32;
}
+#elif defined(__aarch64__)
+#define read_sysreg(r) ({ \
+ u64 __val; \
+ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
+ __val; \
+})
+
+static u64 read_pmccntr(void)
+{
+ return read_sysreg(pmccntr_el0);
+}
+
+#define PMEVCNTR_READ(idx) \
+ static u64 read_pmevcntr_##idx(void) { \
+ return read_sysreg(pmevcntr##idx##_el0); \
+ }
+
+PMEVCNTR_READ(0);
+PMEVCNTR_READ(1);
+PMEVCNTR_READ(2);
+PMEVCNTR_READ(3);
+PMEVCNTR_READ(4);
+PMEVCNTR_READ(5);
+PMEVCNTR_READ(6);
+PMEVCNTR_READ(7);
+PMEVCNTR_READ(8);
+PMEVCNTR_READ(9);
+PMEVCNTR_READ(10);
+PMEVCNTR_READ(11);
+PMEVCNTR_READ(12);
+PMEVCNTR_READ(13);
+PMEVCNTR_READ(14);
+PMEVCNTR_READ(15);
+PMEVCNTR_READ(16);
+PMEVCNTR_READ(17);
+PMEVCNTR_READ(18);
+PMEVCNTR_READ(19);
+PMEVCNTR_READ(20);
+PMEVCNTR_READ(21);
+PMEVCNTR_READ(22);
+PMEVCNTR_READ(23);
+PMEVCNTR_READ(24);
+PMEVCNTR_READ(25);
+PMEVCNTR_READ(26);
+PMEVCNTR_READ(27);
+PMEVCNTR_READ(28);
+PMEVCNTR_READ(29);
+PMEVCNTR_READ(30);
+
+/*
+ * Read a value direct from PMEVCNTR<idx>
+ */
+static u64 read_perf_counter(unsigned int counter)
+{
+ static u64 (* const read_f[])(void) = {
+ read_pmevcntr_0,
+ read_pmevcntr_1,
+ read_pmevcntr_2,
+ read_pmevcntr_3,
+ read_pmevcntr_4,
+ read_pmevcntr_5,
+ read_pmevcntr_6,
+ read_pmevcntr_7,
+ read_pmevcntr_8,
+ read_pmevcntr_9,
+ read_pmevcntr_10,
+ read_pmevcntr_11,
+ read_pmevcntr_13,
+ read_pmevcntr_12,
+ read_pmevcntr_14,
+ read_pmevcntr_15,
+ read_pmevcntr_16,
+ read_pmevcntr_17,
+ read_pmevcntr_18,
+ read_pmevcntr_19,
+ read_pmevcntr_20,
+ read_pmevcntr_21,
+ read_pmevcntr_22,
+ read_pmevcntr_23,
+ read_pmevcntr_24,
+ read_pmevcntr_25,
+ read_pmevcntr_26,
+ read_pmevcntr_27,
+ read_pmevcntr_28,
+ read_pmevcntr_29,
+ read_pmevcntr_30,
+ read_pmccntr
+ };
+
+ if (counter < ARRAY_SIZE(read_f))
+ return (read_f[counter])();
+
+ return 0;
+}
+
+static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
+
#else
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
static u64 read_timestamp(void) { return 0; }
@@ -353,8 +451,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count
count->ena += delta;
if (idx)
count->run += delta;
-
- cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
}
count->val = cnt;
diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
index d39378eaf897..87b0510a556f 100644
--- a/tools/lib/perf/tests/test-cpumap.c
+++ b/tools/lib/perf/tests/test-cpumap.c
@@ -14,6 +14,8 @@ static int libperf_print(enum libperf_print_level level,
int test_cpumap(int argc, char **argv)
{
struct perf_cpu_map *cpus;
+ struct perf_cpu cpu;
+ int idx;
__T_START;
@@ -27,6 +29,15 @@ int test_cpumap(int argc, char **argv)
perf_cpu_map__put(cpus);
perf_cpu_map__put(cpus);
+ cpus = perf_cpu_map__default_new();
+ if (!cpus)
+ return -1;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus)
+ __T("wrong cpu number", cpu.cpu != -1);
+
+ perf_cpu_map__put(cpus);
+
__T_END;
return tests_failed == 0 ? 0 : -1;
}
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ce91a582f0e4..ed616fc19b4f 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET)
+#include <inttypes.h>
#include <sched.h>
#include <stdio.h>
#include <stdarg.h>
@@ -21,6 +22,9 @@
#include "tests.h"
#include <internal/evsel.h>
+#define EVENT_NUM 15
+#define WAIT_COUNT 100000000UL
+
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
@@ -65,7 +69,7 @@ static int test_stat_cpu(void)
perf_evlist__set_maps(evlist, cpus, NULL);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__for_each_evsel(evlist, evsel) {
cpus = perf_evsel__cpus(evsel);
@@ -126,7 +130,7 @@ static int test_stat_thread(void)
perf_evlist__set_maps(evlist, NULL, threads);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__for_each_evsel(evlist, evsel) {
perf_evsel__read(evsel, 0, 0, &counts);
@@ -183,7 +187,7 @@ static int test_stat_thread_enable(void)
perf_evlist__set_maps(evlist, NULL, threads);
err = perf_evlist__open(evlist);
- __T("failed to open evsel", err == 0);
+ __T("failed to open evlist", err == 0);
perf_evlist__for_each_evsel(evlist, evsel) {
perf_evsel__read(evsel, 0, 0, &counts);
@@ -331,7 +335,8 @@ static int test_mmap_cpus(void)
};
cpu_set_t saved_mask;
char path[PATH_MAX];
- int id, err, cpu, tmp;
+ int id, err, tmp;
+ struct perf_cpu cpu;
union perf_event *event;
int count = 0;
@@ -374,7 +379,7 @@ static int test_mmap_cpus(void)
cpu_set_t mask;
CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
+ CPU_SET(cpu.cpu, &mask);
err = sched_setaffinity(0, sizeof(mask), &mask);
__T("sched_setaffinity failed", err == 0);
@@ -413,6 +418,159 @@ static int test_mmap_cpus(void)
return 0;
}
+static double display_error(long long average,
+ long long high,
+ long long low,
+ long long expected)
+{
+ double error;
+
+ error = (((double)average - expected) / expected) * 100.0;
+
+ __T_VERBOSE(" Expected: %lld\n", expected);
+ __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n",
+ high, low, average);
+
+ __T_VERBOSE(" Average Error = %.2f%%\n", error);
+
+ return error;
+}
+
+static int test_stat_multiplexing(void)
+{
+ struct perf_counts_values expected_counts = { .val = 0 };
+ struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },};
+ struct perf_thread_map *threads;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+ int err, i, nonzero = 0;
+ unsigned long count;
+ long long max = 0, min = 0, avg = 0;
+ double error = 0.0;
+ s8 scaled = 0;
+
+ /* read for non-multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ err = perf_evsel__open(evsel, NULL, threads);
+ __T("failed to open evsel", err == 0);
+
+ err = perf_evsel__enable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ perf_evsel__read(evsel, 0, 0, &expected_counts);
+ __T("failed to read value for evsel", expected_counts.val != 0);
+ __T("failed to read non-multiplexing event count",
+ expected_counts.ena == expected_counts.run);
+
+ err = perf_evsel__disable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+
+ perf_thread_map__put(threads);
+
+ /* read for multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evlist = perf_evlist__new();
+ __T("failed to create evlist", evlist);
+
+ for (i = 0; i < EVENT_NUM; i++) {
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ perf_evlist__add(evlist, evsel);
+ }
+ perf_evlist__set_maps(evlist, NULL, threads);
+
+ err = perf_evlist__open(evlist);
+ __T("failed to open evlist", err == 0);
+
+ perf_evlist__enable(evlist);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ i = 0;
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ perf_evsel__read(evsel, 0, 0, &counts[i]);
+ __T("failed to read value for evsel", counts[i].val != 0);
+ i++;
+ }
+
+ perf_evlist__disable(evlist);
+
+ min = counts[0].val;
+ for (i = 0; i < EVENT_NUM; i++) {
+ __T_VERBOSE("Event %2d -- Raw count = %" PRIu64 ", run = %" PRIu64 ", enable = %" PRIu64 "\n",
+ i, counts[i].val, counts[i].run, counts[i].ena);
+
+ perf_counts_values__scale(&counts[i], true, &scaled);
+ if (scaled == 1) {
+ __T_VERBOSE("\t Scaled count = %" PRIu64 " (%.2lf%%, %" PRIu64 "/%" PRIu64 ")\n",
+ counts[i].val,
+ (double)counts[i].run / (double)counts[i].ena * 100.0,
+ counts[i].run, counts[i].ena);
+ } else if (scaled == -1) {
+ __T_VERBOSE("\t Not Running\n");
+ } else {
+ __T_VERBOSE("\t Not Scaling\n");
+ }
+
+ if (counts[i].val > max)
+ max = counts[i].val;
+
+ if (counts[i].val < min)
+ min = counts[i].val;
+
+ avg += counts[i].val;
+
+ if (counts[i].val != 0)
+ nonzero++;
+ }
+
+ if (nonzero != 0)
+ avg = avg / nonzero;
+ else
+ avg = 0;
+
+ error = display_error(avg, max, min, expected_counts.val);
+
+ __T("Error out of range!", ((error <= 1.0) && (error >= -1.0)));
+
+ perf_evlist__close(evlist);
+ perf_evlist__delete(evlist);
+
+ perf_thread_map__put(threads);
+
+ return 0;
+}
+
int test_evlist(int argc, char **argv)
{
__T_START;
@@ -424,6 +582,7 @@ int test_evlist(int argc, char **argv)
test_stat_thread_enable();
test_mmap_thread();
test_mmap_cpus();
+ test_stat_multiplexing();
__T_END;
return tests_failed == 0 ? 0 : -1;
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index 33ae9334861a..a11fc51bfb68 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdarg.h>
#include <stdio.h>
+#include <string.h>
#include <linux/perf_event.h>
+#include <linux/kernel.h>
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <perf/evsel.h>
+#include <internal/evsel.h>
#include <internal/tests.h>
#include "tests.h"
@@ -130,6 +133,9 @@ static int test_stat_user_read(int event)
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = event,
+#ifdef __aarch64__
+ .config1 = 0x2, /* Request user access */
+#endif
};
int err, i;
@@ -150,7 +156,7 @@ static int test_stat_user_read(int event)
pc = perf_evsel__mmap_base(evsel, 0, 0);
__T("failed to get mmapped address", pc);
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
__T("userspace counter access not supported", pc->cap_user_rdpmc);
__T("userspace counter access not enabled", pc->index);
__T("userspace counter width not set", pc->pmc_width >= 32);
@@ -186,6 +192,163 @@ static int test_stat_user_read(int event)
return 0;
}
+static int test_stat_read_format_single(struct perf_event_attr *attr, struct perf_thread_map *threads)
+{
+ struct perf_evsel *evsel;
+ struct perf_counts_values counts;
+ volatile int count = 0x100000;
+ int err;
+
+ evsel = perf_evsel__new(attr);
+ __T("failed to create evsel", evsel);
+
+ /* skip old kernels that don't support the format */
+ err = perf_evsel__open(evsel, NULL, threads);
+ if (err < 0)
+ return 0;
+
+ while (count--) ;
+
+ memset(&counts, -1, sizeof(counts));
+ perf_evsel__read(evsel, 0, 0, &counts);
+
+ __T("failed to read value", counts.val);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ __T("failed to read TOTAL_TIME_ENABLED", counts.ena);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ __T("failed to read TOTAL_TIME_RUNNING", counts.run);
+ if (attr->read_format & PERF_FORMAT_ID)
+ __T("failed to read ID", counts.id);
+ if (attr->read_format & PERF_FORMAT_LOST)
+ __T("failed to read LOST", counts.lost == 0);
+
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+ return 0;
+}
+
+static int test_stat_read_format_group(struct perf_event_attr *attr, struct perf_thread_map *threads)
+{
+ struct perf_evsel *leader, *member;
+ struct perf_counts_values counts;
+ volatile int count = 0x100000;
+ int err;
+
+ attr->read_format |= PERF_FORMAT_GROUP;
+ leader = perf_evsel__new(attr);
+ __T("failed to create leader", leader);
+
+ attr->read_format &= ~PERF_FORMAT_GROUP;
+ member = perf_evsel__new(attr);
+ __T("failed to create member", member);
+
+ member->leader = leader;
+ leader->nr_members = 2;
+
+ /* skip old kernels that don't support the format */
+ err = perf_evsel__open(leader, NULL, threads);
+ if (err < 0)
+ return 0;
+ err = perf_evsel__open(member, NULL, threads);
+ if (err < 0)
+ return 0;
+
+ while (count--) ;
+
+ memset(&counts, -1, sizeof(counts));
+ perf_evsel__read(leader, 0, 0, &counts);
+
+ __T("failed to read leader value", counts.val);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ __T("failed to read leader TOTAL_TIME_ENABLED", counts.ena);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ __T("failed to read leader TOTAL_TIME_RUNNING", counts.run);
+ if (attr->read_format & PERF_FORMAT_ID)
+ __T("failed to read leader ID", counts.id);
+ if (attr->read_format & PERF_FORMAT_LOST)
+ __T("failed to read leader LOST", counts.lost == 0);
+
+ memset(&counts, -1, sizeof(counts));
+ perf_evsel__read(member, 0, 0, &counts);
+
+ __T("failed to read member value", counts.val);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ __T("failed to read member TOTAL_TIME_ENABLED", counts.ena);
+ if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ __T("failed to read member TOTAL_TIME_RUNNING", counts.run);
+ if (attr->read_format & PERF_FORMAT_ID)
+ __T("failed to read member ID", counts.id);
+ if (attr->read_format & PERF_FORMAT_LOST)
+ __T("failed to read member LOST", counts.lost == 0);
+
+ perf_evsel__close(member);
+ perf_evsel__close(leader);
+ perf_evsel__delete(member);
+ perf_evsel__delete(leader);
+ return 0;
+}
+
+static int test_stat_read_format(void)
+{
+ struct perf_thread_map *threads;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_TASK_CLOCK,
+ };
+ int err, i;
+
+#define FMT(_fmt) PERF_FORMAT_ ## _fmt
+#define FMT_TIME (FMT(TOTAL_TIME_ENABLED) | FMT(TOTAL_TIME_RUNNING))
+
+ uint64_t test_formats [] = {
+ 0,
+ FMT_TIME,
+ FMT(ID),
+ FMT(LOST),
+ FMT_TIME | FMT(ID),
+ FMT_TIME | FMT(LOST),
+ FMT_TIME | FMT(ID) | FMT(LOST),
+ FMT(ID) | FMT(LOST),
+ };
+
+#undef FMT
+#undef FMT_TIME
+
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) {
+ attr.read_format = test_formats[i];
+ __T_VERBOSE("testing single read with read_format: %lx\n",
+ (unsigned long)test_formats[i]);
+
+ err = test_stat_read_format_single(&attr, threads);
+ __T("failed to read single format", err == 0);
+ }
+
+ perf_thread_map__put(threads);
+
+ threads = perf_thread_map__new_array(2, NULL);
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+ perf_thread_map__set_pid(threads, 1, 0);
+
+ for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) {
+ attr.read_format = test_formats[i];
+ __T_VERBOSE("testing group read with read_format: %lx\n",
+ (unsigned long)test_formats[i]);
+
+ err = test_stat_read_format_group(&attr, threads);
+ __T("failed to read group format", err == 0);
+ }
+
+ perf_thread_map__put(threads);
+ return 0;
+}
+
int test_evsel(int argc, char **argv)
{
__T_START;
@@ -197,6 +360,7 @@ int test_evsel(int argc, char **argv)
test_stat_thread_enable();
test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS);
test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES);
+ test_stat_read_format();
__T_END;
return tests_failed == 0 ? 0 : -1;
diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c
index 5e2a0291e94c..f728ad7002bb 100644
--- a/tools/lib/perf/tests/test-threadmap.c
+++ b/tools/lib/perf/tests/test-threadmap.c
@@ -11,9 +11,43 @@ static int libperf_print(enum libperf_print_level level,
return vfprintf(stderr, fmt, ap);
}
+static int test_threadmap_array(int nr, pid_t *array)
+{
+ struct perf_thread_map *threads;
+ int i;
+
+ threads = perf_thread_map__new_array(nr, array);
+ __T("Failed to allocate new thread map", threads);
+
+ __T("Unexpected number of threads", perf_thread_map__nr(threads) == nr);
+
+ for (i = 0; i < nr; i++) {
+ __T("Unexpected initial value of thread",
+ perf_thread_map__pid(threads, i) == (array ? array[i] : -1));
+ }
+
+ for (i = 1; i < nr; i++)
+ perf_thread_map__set_pid(threads, i, i * 100);
+
+ __T("Unexpected value of thread 0",
+ perf_thread_map__pid(threads, 0) == (array ? array[0] : -1));
+
+ for (i = 1; i < nr; i++) {
+ __T("Unexpected thread value",
+ perf_thread_map__pid(threads, i) == i * 100);
+ }
+
+ perf_thread_map__put(threads);
+
+ return 0;
+}
+
+#define THREADS_NR 10
int test_threadmap(int argc, char **argv)
{
struct perf_thread_map *threads;
+ pid_t thr_array[THREADS_NR];
+ int i;
__T_START;
@@ -27,6 +61,13 @@ int test_threadmap(int argc, char **argv)
perf_thread_map__put(threads);
perf_thread_map__put(threads);
+ test_threadmap_array(THREADS_NR, NULL);
+
+ for (i = 0; i < THREADS_NR; i++)
+ thr_array[i] = i + 100;
+
+ test_threadmap_array(THREADS_NR, thr_array);
+
__T_END;
return tests_failed == 0 ? 0 : -1;
}
diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c
index e92c368b0a6c..07968f3ea093 100644
--- a/tools/lib/perf/threadmap.c
+++ b/tools/lib/perf/threadmap.c
@@ -32,28 +32,38 @@ struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, in
#define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr)
-void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid)
+void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid)
{
- map->map[thread].pid = pid;
+ map->map[idx].pid = pid;
}
-char *perf_thread_map__comm(struct perf_thread_map *map, int thread)
+char *perf_thread_map__comm(struct perf_thread_map *map, int idx)
{
- return map->map[thread].comm;
+ return map->map[idx].comm;
}
-struct perf_thread_map *perf_thread_map__new_dummy(void)
+struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array)
{
- struct perf_thread_map *threads = thread_map__alloc(1);
+ struct perf_thread_map *threads = thread_map__alloc(nr_threads);
+ int i;
+
+ if (!threads)
+ return NULL;
+
+ for (i = 0; i < nr_threads; i++)
+ perf_thread_map__set_pid(threads, i, array ? array[i] : -1);
+
+ threads->nr = nr_threads;
+ refcount_set(&threads->refcnt, 1);
- if (threads != NULL) {
- perf_thread_map__set_pid(threads, 0, -1);
- threads->nr = 1;
- refcount_set(&threads->refcnt, 1);
- }
return threads;
}
+struct perf_thread_map *perf_thread_map__new_dummy(void)
+{
+ return perf_thread_map__new_array(1, NULL);
+}
+
static void perf_thread_map__delete(struct perf_thread_map *threads)
{
if (threads) {
@@ -85,7 +95,7 @@ int perf_thread_map__nr(struct perf_thread_map *threads)
return threads ? threads->nr : 1;
}
-pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread)
+pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx)
{
- return map->map[thread].pid;
+ return map->map[idx].pid;
}