diff options
Diffstat (limited to 'tools/lib/perf/include')
-rw-r--r-- | tools/lib/perf/include/internal/cpumap.h | 27 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/evlist.h | 25 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/evsel.h | 97 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/lib.h | 2 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/mmap.h | 11 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/rc_check.h | 113 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/tests.h | 36 | ||||
-rw-r--r-- | tools/lib/perf/include/internal/xyarray.h | 9 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/bpf_perf.h | 31 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/cpumap.h | 92 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/event.h | 173 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/evlist.h | 2 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/evsel.h | 20 | ||||
-rw-r--r-- | tools/lib/perf/include/perf/threadmap.h | 8 |
14 files changed, 593 insertions, 53 deletions
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 840d4032587b..e2be2d17c32b 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -3,17 +3,32 @@ #define __LIBPERF_INTERNAL_CPUMAP_H #include <linux/refcount.h> +#include <perf/cpumap.h> +#include <internal/rc_check.h> -struct perf_cpu_map { +/** + * A sized, reference counted, sorted array of integers representing CPU + * numbers. This is commonly used to capture which CPUs a PMU is associated + * with. The indices into the cpumap are frequently used as they avoid having + * gaps if CPU numbers were used. For events associated with a pid, rather than + * a CPU, a single dummy map with an entry of -1 is used. + */ +DECLARE_RC_STRUCT(perf_cpu_map) { refcount_t refcnt; + /** Length of the map array. */ int nr; - int map[]; + /** The CPU values. */ + struct perf_cpu map[]; }; -#ifndef MAX_NR_CPUS -#define MAX_NR_CPUS 2048 -#endif +struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus); +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b); -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); +void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus); +static inline refcount_t *perf_cpu_map__refcnt(struct perf_cpu_map *map) +{ + return &RC_CHK_ACCESS(map)->refcnt; +} #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 74dc8c3f0b66..f43bdb9b6227 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -4,6 +4,7 @@ #include <linux/list.h> #include <api/fd/array.h> +#include <internal/cpumap.h> #include <internal/evsel.h> #define PERF_EVLIST__HLIST_BITS 8 @@ -17,7 +18,13 @@ struct perf_evlist { struct list_head entries; int nr_entries; bool has_user_cpus; - struct perf_cpu_map *cpus; + bool needs_map_propagation; + /** + * The cpus passed from the command line or all online CPUs by + * default. + */ + struct perf_cpu_map *user_requested_cpus; + /** The union of all evsel cpu maps. */ struct perf_cpu_map *all_cpus; struct perf_thread_map *threads; int nr_mmaps; @@ -31,11 +38,12 @@ struct perf_evlist { }; typedef void -(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); +(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_evsel*, + struct perf_mmap_param*, int); typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int -(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); +(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu); struct perf_evlist_mmap_ops { perf_evlist_mmap__cb_idx_t idx; @@ -45,7 +53,7 @@ struct perf_evlist_mmap_ops { int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, - void *ptr, short revent); + void *ptr, short revent, enum fdarray_flags flags); int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, @@ -118,10 +126,15 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist); void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id); + int cpu_map_idx, int thread, u64 id); int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd); + int cpu_map_idx, int thread, int fd); +void perf_evlist__reset_id_hash(struct perf_evlist *evlist); + +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader); + +void perf_evlist__go_system_wide(struct perf_evlist *evlist, struct perf_evsel *evsel); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 1ffd083b235e..ea78defa77d0 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -6,11 +6,37 @@ #include <linux/perf_event.h> #include <stdbool.h> #include <sys/types.h> +#include <internal/cpumap.h> -struct perf_cpu_map; struct perf_thread_map; struct xyarray; +/** + * The per-thread accumulated period storage node. + */ +struct perf_sample_id_period { + struct list_head node; + struct hlist_node hnode; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; + /* The TID that the values belongs to */ + u32 tid; +}; + +/** + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the + * per_stream_periods + * @evlist:perf_evsel instance to iterate + * @item: struct perf_sample_id_period iterator + * @tmp: struct perf_sample_id_period temp iterator + */ +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \ + list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node) + + +#define PERF_SAMPLE_ID__HLIST_BITS 4 +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS) + /* * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are * more than one entry in the evlist. @@ -27,30 +53,86 @@ struct perf_sample_id { * queue number. */ int idx; - int cpu; + struct perf_cpu cpu; pid_t tid; - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; + /* Guest machine pid and VCPU, valid only if machine_pid is non-zero */ + pid_t machine_pid; + struct perf_cpu vcpu; + + /* + * Per-thread, and global event counts are mutually exclusive: + * Whilst it is possible to combine events into a group with differing + * values of PERF_SAMPLE_READ, it is not valid to have inconsistent + * values for `inherit`. Therefore it is not possible to have a + * situation where a per-thread event is sampled as a global event; + * all !inherit groups are global, and all groups where the sampling + * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event + * that is part of such a group that is inherit but not PERF_SAMPLE_READ + * will be read as per-thread. If such an event can also trigger a + * sample (such as with sample_period > 0) then it will not cause + * `read_format` to be included in its PERF_RECORD_SAMPLE, and + * therefore will not expose the per-thread group members as global. + */ + union { + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is not per-thread). + */ + u64 period; + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is per-thread). + */ + struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE]; + }; }; struct perf_evsel { struct list_head node; struct perf_event_attr attr; + /** The commonly used cpu map of CPUs the event should be opened upon, etc. */ struct perf_cpu_map *cpus; + /** + * The cpu map read from the PMU. For core PMUs this is the list of all + * CPUs the event can be opened upon. For other PMUs this is the default + * cpu map for opening the event on, for example, the first CPU on a + * socket for an uncore event. + */ struct perf_cpu_map *own_cpus; struct perf_thread_map *threads; struct xyarray *fd; + struct xyarray *mmap; struct xyarray *sample_id; u64 *id; u32 ids; + struct perf_evsel *leader; + + /* For events where the read_format value is per-thread rather than + * global, stores the per-thread cumulative period */ + struct list_head per_stream_periods; /* parse modifier helper */ int nr_members; + /* + * system_wide is for events that need to be on every CPU, irrespective + * of user requested CPUs or threads. Tha main example of this is the + * dummy event. Map propagation will set cpus for this event to all CPUs + * as software PMU events like dummy, have a CPU map that is empty. + */ bool system_wide; + /* + * Some events, for example uncore events, require a CPU. + * i.e. it cannot be the 'any CPU' value of -1. + */ + bool requires_cpu; + /** Is the PMU for the event a core one? Effects the handling of own_cpus. */ + bool is_pmu_core; + int idx; }; -void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr); +void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, + int idx); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__close_fd(struct perf_evsel *evsel); void perf_evsel__free_fd(struct perf_evsel *evsel); @@ -60,4 +142,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_id(struct perf_evsel *evsel); +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel); + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, + bool per_thread); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/lib/perf/include/internal/lib.h b/tools/lib/perf/include/internal/lib.h index 5175d491b2d4..85471a4b900f 100644 --- a/tools/lib/perf/include/internal/lib.h +++ b/tools/lib/perf/include/internal/lib.h @@ -9,4 +9,6 @@ extern unsigned int page_size; ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); +ssize_t preadn(int fd, void *buf, size_t n, off_t offs); + #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index be7556e0a2b2..5f08cab61ece 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -6,11 +6,13 @@ #include <linux/refcount.h> #include <linux/types.h> #include <stdbool.h> +#include <internal/cpumap.h> /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) struct perf_mmap; +struct perf_counts_values; typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map); @@ -23,7 +25,7 @@ struct perf_mmap { void *base; int mask; int fd; - int cpu; + struct perf_cpu cpu; refcount_t refcnt; u64 prev; u64 start; @@ -31,7 +33,8 @@ struct perf_mmap { bool overwrite; u64 flush; libperf_unmap_cb_t unmap_cb; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void *event_copy; + size_t event_copy_sz; struct perf_mmap *next; }; @@ -45,11 +48,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu); + int fd, struct perf_cpu cpu); void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); void perf_mmap__put(struct perf_mmap *map); u64 perf_mmap__read_head(struct perf_mmap *map); +int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count); + #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/lib/perf/include/internal/rc_check.h b/tools/lib/perf/include/internal/rc_check.h new file mode 100644 index 000000000000..f80ddfc80129 --- /dev/null +++ b/tools/lib/perf/include/internal/rc_check.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __LIBPERF_INTERNAL_RC_CHECK_H +#define __LIBPERF_INTERNAL_RC_CHECK_H + +#include <stdlib.h> +#include <linux/zalloc.h> + +/* + * Enable reference count checking implicitly with leak checking, which is + * integrated into address sanitizer. + */ +#if defined(__SANITIZE_ADDRESS__) || defined(LEAK_SANITIZER) || defined(ADDRESS_SANITIZER) +#define REFCNT_CHECKING 1 +#elif defined(__has_feature) +#if __has_feature(address_sanitizer) || __has_feature(leak_sanitizer) +#define REFCNT_CHECKING 1 +#endif +#endif + +/* + * Shared reference count checking macros. + * + * Reference count checking is an approach to sanitizing the use of reference + * counted structs. It leverages address and leak sanitizers to make sure gets + * are paired with a put. Reference count checking adds a malloc-ed layer of + * indirection on a get, and frees it on a put. A missed put will be reported as + * a memory leak. A double put will be reported as a double free. Accessing + * after a put will cause a use-after-free and/or a segfault. + */ + +#ifndef REFCNT_CHECKING +/* Replaces "struct foo" so that the pointer may be interposed. */ +#define DECLARE_RC_STRUCT(struct_name) \ + struct struct_name + +/* Declare a reference counted struct variable. */ +#define RC_STRUCT(struct_name) struct struct_name + +/* + * Interpose the indirection. Result will hold the indirection and object is the + * reference counted struct. + */ +#define ADD_RC_CHK(result, object) (result = object, object) + +/* Strip the indirection layer. */ +#define RC_CHK_ACCESS(object) object + +/* Frees the object and the indirection layer. */ +#define RC_CHK_FREE(object) free(object) + +/* A get operation adding the indirection layer. */ +#define RC_CHK_GET(result, object) ADD_RC_CHK(result, object) + +/* A put operation removing the indirection layer. */ +#define RC_CHK_PUT(object) {} + +/* Pointer equality when the indirection may or may not be there. */ +#define RC_CHK_EQUAL(object1, object2) (object1 == object2) + +#else + +/* Replaces "struct foo" so that the pointer may be interposed. */ +#define DECLARE_RC_STRUCT(struct_name) \ + struct original_##struct_name; \ + struct struct_name { \ + struct original_##struct_name *orig; \ + }; \ + struct original_##struct_name + +/* Declare a reference counted struct variable. */ +#define RC_STRUCT(struct_name) struct original_##struct_name + +/* + * Interpose the indirection. Result will hold the indirection and object is the + * reference counted struct. + */ +#define ADD_RC_CHK(result, object) \ + ( \ + object ? (result = malloc(sizeof(*result)), \ + result ? (result->orig = object, result) \ + : (result = NULL, NULL)) \ + : (result = NULL, NULL) \ + ) + +/* Strip the indirection layer. */ +#define RC_CHK_ACCESS(object) object->orig + +/* Frees the object and the indirection layer. */ +#define RC_CHK_FREE(object) \ + do { \ + zfree(&object->orig); \ + free(object); \ + } while(0) + +/* A get operation adding the indirection layer. */ +#define RC_CHK_GET(result, object) ADD_RC_CHK(result, (object ? object->orig : NULL)) + +/* A put operation removing the indirection layer. */ +#define RC_CHK_PUT(object) \ + do { \ + if (object) { \ + object->orig = NULL; \ + free(object); \ + } \ + } while(0) + +/* Pointer equality when the indirection may or may not be there. */ +#define RC_CHK_EQUAL(object1, object2) (object1 == object2 || \ + (object1 && object2 && object1->orig == object2->orig)) + +#endif + +#endif /* __LIBPERF_INTERNAL_RC_CHECK_H */ diff --git a/tools/lib/perf/include/internal/tests.h b/tools/lib/perf/include/internal/tests.h index 2093e8868a67..b130a6663ff8 100644 --- a/tools/lib/perf/include/internal/tests.h +++ b/tools/lib/perf/include/internal/tests.h @@ -3,11 +3,34 @@ #define __LIBPERF_INTERNAL_TESTS_H #include <stdio.h> +#include <unistd.h> -int tests_failed; +extern int tests_failed; +extern int tests_verbose; + +static inline int get_verbose(char **argv, int argc) +{ + int c; + int verbose = 0; + + while ((c = getopt(argc, argv, "v")) != -1) { + switch (c) + { + case 'v': + verbose = 1; + break; + default: + break; + } + } + optind = 1; + + return verbose; +} #define __T_START \ do { \ + tests_verbose = get_verbose(argv, argc); \ fprintf(stdout, "- running %s...", __FILE__); \ fflush(NULL); \ tests_failed = 0; \ @@ -30,4 +53,15 @@ do { } \ } while (0) +#define __T_VERBOSE(...) \ +do { \ + if (tests_verbose) { \ + if (tests_verbose == 1) { \ + fputc('\n', stderr); \ + tests_verbose++; \ + } \ + fprintf(stderr, ##__VA_ARGS__); \ + } \ +} while (0) + #endif /* __LIBPERF_INTERNAL_TESTS_H */ diff --git a/tools/lib/perf/include/internal/xyarray.h b/tools/lib/perf/include/internal/xyarray.h index 51e35d6c8ec4..f10af3da7b21 100644 --- a/tools/lib/perf/include/internal/xyarray.h +++ b/tools/lib/perf/include/internal/xyarray.h @@ -18,11 +18,18 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size); void xyarray__delete(struct xyarray *xy); void xyarray__reset(struct xyarray *xy); -static inline void *xyarray__entry(struct xyarray *xy, int x, int y) +static inline void *__xyarray__entry(struct xyarray *xy, int x, int y) { return &xy->contents[x * xy->row_size + y * xy->entry_size]; } +static inline void *xyarray__entry(struct xyarray *xy, size_t x, size_t y) +{ + if (x >= xy->max_x || y >= xy->max_y) + return NULL; + return __xyarray__entry(xy, x, y); +} + static inline int xyarray__max_y(struct xyarray *xy) { return xy->max_y; diff --git a/tools/lib/perf/include/perf/bpf_perf.h b/tools/lib/perf/include/perf/bpf_perf.h new file mode 100644 index 000000000000..e7cf6ba7b674 --- /dev/null +++ b/tools/lib/perf/include/perf/bpf_perf.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __LIBPERF_BPF_PERF_H +#define __LIBPERF_BPF_PERF_H + +#include <linux/types.h> /* for __u32 */ + +/* + * bpf_perf uses a hashmap, the attr_map, to track all the leader programs. + * The hashmap is pinned in bpffs. flock() on this file is used to ensure + * no concurrent access to the attr_map. The key of attr_map is struct + * perf_event_attr, and the value is struct perf_event_attr_map_entry. + * + * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the + * leader prog, and the diff_map. Each perf-stat session holds a reference + * to the bpf_link to make sure the leader prog is attached to sched_switch + * tracepoint. + * + * Since the hashmap only contains IDs of the bpf_link and diff_map, it + * does not hold any references to the leader program. Once all perf-stat + * sessions of these events exit, the leader prog, its maps, and the + * perf_events will be freed. + */ +struct perf_event_attr_map_entry { + __u32 link_id; + __u32 diff_map_id; +}; + +/* default attr_map name */ +#define BPF_PERF_DEFAULT_ATTR_MAP_PATH "perf_attr_map" + +#endif /* __LIBPERF_BPF_PERF_H */ diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 6a17ad730cbc..58cc5c5fa47c 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,26 +3,102 @@ #define __LIBPERF_CPUMAP_H #include <perf/core.h> -#include <stdio.h> #include <stdbool.h> +#include <stdint.h> + +/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ +struct perf_cpu { + int16_t cpu; +}; + +struct perf_cache { + int cache_lvl; + int cache; +}; struct perf_cpu_map; -LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); +/** + * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); +/** + * perf_cpu_map__new_online_cpus - a map read from + * /sys/devices/system/cpu/online if + * available. If reading wasn't possible a map + * is created using the online processors + * assuming the first 'n' processors are all + * online. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void); +/** + * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no + * cpu_list argument is provided then + * perf_cpu_map__new_online_cpus is returned. + */ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); +/** perf_cpu_map__new_int - create a map with the one given cpu. */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_int(int cpu); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, - struct perf_cpu_map *other); +LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig, + struct perf_cpu_map *other); +LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +/** + * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index + * is invalid. + */ +LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +/** + * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a + * cpu of -1 for an invalid index, this makes an empty map + * look like it contains the "any CPU"/dummy value. Otherwise + * the result is the number CPUs in the map plus one if the + * "any CPU"/dummy value is present. + */ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); -LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); +/** + * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__is_empty - does the map contain no values and it doesn't + * contain the special "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value. + */ +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map); +/** + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value. + */ +LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); +LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, + const struct perf_cpu_map *rhs); +/** + * perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value? + */ +LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ (idx) < perf_cpu_map__nr(cpus); \ (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) +#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \ + for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \ + (idx) < perf_cpu_map__nr(cpus); \ + (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \ + if ((_cpu).cpu != -1) + +#define perf_cpu_map__for_each_idx(idx, cpus) \ + for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++) + #endif /* __LIBPERF_CPUMAP_H */ diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 69b44d2cc0f5..09b7c643ddac 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -8,6 +8,8 @@ #include <linux/bpf.h> #include <sys/types.h> /* pid_t */ +#define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem)) + struct perf_record_mmap { struct perf_event_header header; __u32 pid, tid; @@ -23,10 +25,20 @@ struct perf_record_mmap2 { __u64 start; __u64 len; __u64 pgoff; - __u32 maj; - __u32 min; - __u64 ino; - __u64 ino_generation; + union { + struct { + __u32 maj; + __u32 min; + __u64 ino; + __u64 ino_generation; + }; + struct { + __u8 build_id_size; + __u8 __reserved_1; + __u16 __reserved_2; + __u8 build_id[20]; + }; + }; __u32 prot; __u32 flags; char filename[PATH_MAX]; @@ -58,13 +70,21 @@ struct perf_record_lost { __u64 lost; }; +#define PERF_RECORD_MISC_LOST_SAMPLES_BPF (1 << 15) + struct perf_record_lost_samples { struct perf_event_header header; __u64 lost; }; +#define MAX_ID_HDR_ENTRIES 6 +struct perf_record_lost_samples_and_ids { + struct perf_record_lost_samples lost; + __u64 sample_ids[MAX_ID_HDR_ENTRIES]; +}; + /* - * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID + * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST */ struct perf_record_read { struct perf_event_header header; @@ -73,6 +93,7 @@ struct perf_record_read { __u64 time_enabled; __u64 time_running; __u64 id; + __u64 lost; }; struct perf_record_throttle { @@ -83,7 +104,7 @@ struct perf_record_throttle { }; #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif struct perf_record_ksymbol { @@ -111,6 +132,14 @@ struct perf_record_cgroup { char path[PATH_MAX]; }; +struct perf_record_text_poke_event { + struct perf_event_header header; + __u64 addr; + __u16 old_len; + __u16 new_len; + __u8 bytes[]; +}; + struct perf_record_sample { struct perf_event_header header; __u64 array[]; @@ -125,29 +154,91 @@ struct perf_record_switch { struct perf_record_header_attr { struct perf_event_header header; struct perf_event_attr attr; - __u64 id[]; + /* + * Array of u64 id follows here but we cannot use a flexible array + * because size of attr in the data can be different then current + * version. Please use perf_record_header_attr_id() below. + * + * __u64 id[]; // do not use this + */ }; +/* Returns the pointer to id array based on the actual attr size. */ +#define perf_record_header_attr_id(evt) \ + ((void *)&(evt)->attr.attr + (evt)->attr.attr.size) + enum { PERF_CPU_MAP__CPUS = 0, PERF_CPU_MAP__MASK = 1, + PERF_CPU_MAP__RANGE_CPUS = 2, }; +/* + * Array encoding of a perf_cpu_map where nr is the number of entries in cpu[] + * and each entry is a value for a CPU in the map. + */ struct cpu_map_entries { __u16 nr; __u16 cpu[]; }; -struct perf_record_record_cpu_map { +/* Bitmap encoding of a perf_cpu_map where bitmap entries are 32-bit. */ +struct perf_record_mask_cpu_map32 { + /* Number of mask values. */ __u16 nr; + /* Constant 4. */ __u16 long_size; - unsigned long mask[]; + /* Bitmap data. */ + __u32 mask[]; +}; + +/* Bitmap encoding of a perf_cpu_map where bitmap entries are 64-bit. */ +struct perf_record_mask_cpu_map64 { + /* Number of mask values. */ + __u16 nr; + /* Constant 8. */ + __u16 long_size; + /* Legacy padding. */ + char __pad[4]; + /* Bitmap data. */ + __u64 mask[]; +}; + +/* + * 'struct perf_record_cpu_map_data' is packed as unfortunately an earlier + * version had unaligned data and we wish to retain file format compatibility. + * -irogers + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" + +/* + * An encoding of a CPU map for a range starting at start_cpu through to + * end_cpu. If any_cpu is 1, an any CPU (-1) value (aka dummy value) is present. + */ +struct perf_record_range_cpu_map { + __u8 any_cpu; + __u8 __pad; + __u16 start_cpu; + __u16 end_cpu; }; struct perf_record_cpu_map_data { __u16 type; - char data[]; -}; + union { + /* Used when type == PERF_CPU_MAP__CPUS. */ + struct cpu_map_entries cpus_data; + /* Used when type == PERF_CPU_MAP__MASK and long_size == 4. */ + struct perf_record_mask_cpu_map32 mask32_data; + /* Used when type == PERF_CPU_MAP__MASK and long_size == 8. */ + struct perf_record_mask_cpu_map64 mask64_data; + /* Used when type == PERF_CPU_MAP__RANGE_CPUS. */ + struct perf_record_range_cpu_map range_cpu_data; + }; +} __attribute__((packed)); + +#pragma GCC diagnostic pop struct perf_record_cpu_map { struct perf_event_header header; @@ -173,7 +264,16 @@ struct perf_record_event_update { struct perf_event_header header; __u64 type; __u64 id; - char data[]; + union { + /* Used when type == PERF_EVENT_UPDATE__SCALE. */ + struct perf_record_event_update_scale scale; + /* Used when type == PERF_EVENT_UPDATE__UNIT. */ + char unit[0]; + /* Used when type == PERF_EVENT_UPDATE__NAME. */ + char name[0]; + /* Used when type == PERF_EVENT_UPDATE__CPUS. */ + struct perf_record_event_update_cpus cpus; + }; }; #define MAX_EVENT_NAME 64 @@ -193,10 +293,20 @@ struct perf_record_header_tracing_data { __u32 size; }; +#define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15) + struct perf_record_header_build_id { struct perf_event_header header; pid_t pid; - __u8 build_id[24]; + union { + __u8 build_id[24]; + struct { + __u8 data[20]; + __u8 size; + __u8 reserved1__; + __u16 reserved2__; + }; + }; char filename[]; }; @@ -207,10 +317,15 @@ struct id_index_entry { __u64 tid; }; +struct id_index_entry_2 { + __u64 machine_pid; + __u64 vcpu; +}; + struct perf_record_id_index { struct perf_event_header header; __u64 nr; - struct id_index_entry entries[0]; + struct id_index_entry entries[]; }; struct perf_record_auxtrace_info { @@ -244,6 +359,8 @@ struct perf_record_auxtrace_error { __u64 ip; __u64 time; char msg[MAX_AUXTRACE_ERROR_MSG]; + __u32 machine_pid; + __u32 vcpu; }; struct perf_record_aux { @@ -259,6 +376,11 @@ struct perf_record_itrace_start { __u32 tid; }; +struct perf_record_aux_output_hw_id { + struct perf_event_header header; + __u64 hw_id; +}; + struct perf_record_thread_map_entry { __u64 pid; char comm[16]; @@ -274,7 +396,8 @@ enum { PERF_STAT_CONFIG_TERM__AGGR_MODE = 0, PERF_STAT_CONFIG_TERM__INTERVAL = 1, PERF_STAT_CONFIG_TERM__SCALE = 2, - PERF_STAT_CONFIG_TERM__MAX = 3, + PERF_STAT_CONFIG_TERM__AGGR_LEVEL = 3, + PERF_STAT_CONFIG_TERM__MAX = 4, }; struct perf_record_stat_config_entry { @@ -316,6 +439,11 @@ struct perf_record_time_conv { __u64 time_shift; __u64 time_mult; __u64 time_zero; + __u64 time_cycles; + __u64 time_mask; + __u8 cap_user_time_zero; + __u8 cap_user_time_short; + __u8 reserved[6]; /* For alignment */ }; struct perf_record_header_feature { @@ -329,6 +457,16 @@ struct perf_record_compressed { char data[]; }; +/* + * `header.size` includes the padding we are going to add while writing the record. + * `data_size` only includes the size of `data[]` itself. + */ +struct perf_record_compressed2 { + struct perf_event_header header; + __u64 data_size; + char data[]; +}; + enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_USER_TYPE_START = 64, PERF_RECORD_HEADER_ATTR = 64, @@ -349,6 +487,8 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_TIME_CONV = 79, PERF_RECORD_HEADER_FEATURE = 80, PERF_RECORD_COMPRESSED = 81, + PERF_RECORD_FINISHED_INIT = 82, + PERF_RECORD_COMPRESSED2 = 83, PERF_RECORD_HEADER_MAX }; @@ -367,6 +507,7 @@ union perf_event { struct perf_record_sample sample; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; + struct perf_record_text_poke_event text_poke; struct perf_record_header_attr attr; struct perf_record_event_update event_update; struct perf_record_header_event_type event_type; @@ -378,6 +519,7 @@ union perf_event { struct perf_record_auxtrace_error auxtrace_error; struct perf_record_aux aux; struct perf_record_itrace_start itrace_start; + struct perf_record_aux_output_hw_id aux_output_hw_id; struct perf_record_switch context_switch; struct perf_record_thread_map thread_map; struct perf_record_cpu_map cpu_map; @@ -387,6 +529,7 @@ union perf_event { struct perf_record_time_conv time_conv; struct perf_record_header_feature feat; struct perf_record_compressed pack; + struct perf_record_compressed2 pack2; }; #endif /* __LIBPERF_EVENT_H */ diff --git a/tools/lib/perf/include/perf/evlist.h b/tools/lib/perf/include/perf/evlist.h index 0a7479dc13bf..e894b770779e 100644 --- a/tools/lib/perf/include/perf/evlist.h +++ b/tools/lib/perf/include/perf/evlist.h @@ -46,4 +46,6 @@ LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist, (pos) != NULL; \ (pos) = perf_evlist__next_mmap((evlist), (pos), overwrite)) +LIBPERF_API void perf_evlist__set_leader(struct perf_evlist *evlist); +LIBPERF_API int perf_evlist__nr_groups(struct perf_evlist *evlist); #endif /* __LIBPERF_EVLIST_H */ diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index c82ec39a4ad0..6f92204075c2 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -4,6 +4,8 @@ #include <stdint.h> #include <perf/core.h> +#include <stdbool.h> +#include <linux/types.h> struct perf_evsel; struct perf_event_attr; @@ -16,8 +18,10 @@ struct perf_counts_values { uint64_t val; uint64_t ena; uint64_t run; + uint64_t id; + uint64_t lost; }; - uint64_t values[3]; + uint64_t values[5]; }; }; @@ -26,15 +30,21 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); -LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); -LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); +LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages); +LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel); +LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); +LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); +LIBPERF_API int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); +LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, __s8 *pscaled); #endif /* __LIBPERF_EVSEL_H */ diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h index a7c50de8d010..44deb815b817 100644 --- a/tools/lib/perf/include/perf/threadmap.h +++ b/tools/lib/perf/include/perf/threadmap.h @@ -8,11 +8,13 @@ struct perf_thread_map; LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void); +LIBPERF_API struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array); -LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid); -LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread); +LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid); +LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx); LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads); -LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread); +LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx); +LIBPERF_API int perf_thread_map__idx(struct perf_thread_map *map, pid_t pid); LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map); |