diff options
Diffstat (limited to 'tools')
143 files changed, 3743 insertions, 560 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index f83a70e07df8..ce2ee8f1e361 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -20,5 +20,6 @@ #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS #define __ARCH_WANT_SYS_CLONE3 +#define __ARCH_WANT_MEMFD_SECRET #include <asm-generic/unistd.h> diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 39bb322707b4..b11cfc86a3d0 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -97,7 +97,7 @@ clean: bpftool_clean runqslower_clean resolve_btfids_clean $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf $(Q)$(RM) -r -- $(OUTPUT)feature -install: $(PROGS) bpftool_install runqslower_install +install: $(PROGS) bpftool_install $(call QUIET_INSTALL, bpf_jit_disasm) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm @@ -118,9 +118,6 @@ bpftool_clean: runqslower: $(call descend,runqslower) -runqslower_install: - $(call descend,runqslower,install) - runqslower_clean: $(call descend,runqslower,clean) @@ -131,5 +128,5 @@ resolve_btfids_clean: $(call descend,resolve_btfids,clean) .PHONY: all install clean bpftool bpftool_install bpftool_clean \ - runqslower runqslower_install runqslower_clean \ + runqslower runqslower_clean \ resolve_btfids resolve_btfids_clean diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 1828bba19020..dc6daa193557 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -222,6 +222,11 @@ int mount_bpffs_for_pin(const char *name) int err = 0; file = malloc(strlen(name) + 1); + if (!file) { + p_err("mem alloc failed"); + return -1; + } + strcpy(file, name); dir = dirname(file); diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index e7e7eee9f172..24734f2249d6 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -43,11 +43,13 @@ static int fprintf_json(void *out, const char *fmt, ...) { va_list ap; char *s; + int err; va_start(ap, fmt); - if (vasprintf(&s, fmt, ap) < 0) - return -1; + err = vasprintf(&s, fmt, ap); va_end(ap); + if (err < 0) + return -1; if (!oper_count) { int i; diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index 645530ca7e98..ab9353f2fd46 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -74,7 +74,7 @@ int handle__sched_switch(u64 *ctx) u32 pid; /* ivcsw: treat like an enqueue event and store timestamp */ - if (prev->state == TASK_RUNNING) + if (prev->__state == TASK_RUNNING) trace_enqueue(prev); pid = next->pid; diff --git a/tools/include/linux/kconfig.h b/tools/include/linux/kconfig.h index 1555a0c4f345..13b86bd3b746 100644 --- a/tools/include/linux/kconfig.h +++ b/tools/include/linux/kconfig.h @@ -4,12 +4,6 @@ /* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */ -#ifdef CONFIG_CPU_BIG_ENDIAN -#define __BIG_ENDIAN 4321 -#else -#define __LITTLE_ENDIAN 1234 -#endif - #define __ARG_PLACEHOLDER_1 0, #define __take_second_arg(__ignored, val, ...) val diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index f94f65d429be..1567a3294c3d 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -72,6 +72,9 @@ #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ +#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index d2a942086fcb..a9d6fcd95f42 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -863,7 +863,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -/* 443 is reserved for quotactl_path */ +#define __NR_quotactl_fd 443 +__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd) #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) @@ -872,8 +873,13 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) #define __NR_landlock_restrict_self 446 __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) +#ifdef __ARCH_WANT_MEMFD_SECRET +#define __NR_memfd_secret 447 +__SYSCALL(__NR_memfd_secret, sys_memfd_secret) +#endif + #undef __NR_syscalls -#define __NR_syscalls 447 +#define __NR_syscalls 448 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 67b94bc3c885..d043752a74cf 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -777,9 +777,12 @@ struct drm_get_cap { /** * DRM_CLIENT_CAP_STEREO_3D * - * if set to 1, the DRM core will expose the stereo 3D capabilities of the + * If set to 1, the DRM core will expose the stereo 3D capabilities of the * monitor by advertising the supported 3D layouts in the flags of struct - * drm_mode_modeinfo. + * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 3.13. */ #define DRM_CLIENT_CAP_STEREO_3D 1 @@ -788,6 +791,9 @@ struct drm_get_cap { * * If set to 1, the DRM core will expose all planes (overlay, primary, and * cursor) to userspace. + * + * This capability has been introduced in kernel version 3.15. Starting from + * kernel version 3.17, this capability is always supported for all drivers. */ #define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 @@ -797,6 +803,13 @@ struct drm_get_cap { * If set to 1, the DRM core will expose atomic properties to userspace. This * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and * &DRM_CLIENT_CAP_ASPECT_RATIO. + * + * If the driver doesn't support atomic mode-setting, enabling this capability + * will fail with -EOPNOTSUPP. + * + * This capability has been introduced in kernel version 4.0. Starting from + * kernel version 4.2, this capability is always supported for atomic-capable + * drivers. */ #define DRM_CLIENT_CAP_ATOMIC 3 @@ -804,6 +817,10 @@ struct drm_get_cap { * DRM_CLIENT_CAP_ASPECT_RATIO * * If set to 1, the DRM core will provide aspect ratio information in modes. + * See ``DRM_MODE_FLAG_PIC_AR_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 4.18. */ #define DRM_CLIENT_CAP_ASPECT_RATIO 4 @@ -811,8 +828,11 @@ struct drm_get_cap { * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS * * If set to 1, the DRM core will expose special connectors to be used for - * writing back to memory the scene setup in the commit. Depends on client - * also supporting DRM_CLIENT_CAP_ATOMIC + * writing back to memory the scene setup in the commit. The client must enable + * &DRM_CLIENT_CAP_ATOMIC first. + * + * This capability is always supported for atomic-capable drivers starting from + * kernel version 4.19. */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index ddc47bbf48b6..c2c7759b7d2e 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -62,8 +62,8 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" -/* - * i915_user_extension: Base class for defining a chain of extensions +/** + * struct i915_user_extension - Base class for defining a chain of extensions * * Many interfaces need to grow over time. In most cases we can simply * extend the struct and have userspace pass in more data. Another option, @@ -76,12 +76,58 @@ extern "C" { * increasing complexity, and for large parts of that interface to be * entirely optional. The downside is more pointer chasing; chasing across * the __user boundary with pointers encapsulated inside u64. + * + * Example chaining: + * + * .. code-block:: C + * + * struct i915_user_extension ext3 { + * .next_extension = 0, // end + * .name = ..., + * }; + * struct i915_user_extension ext2 { + * .next_extension = (uintptr_t)&ext3, + * .name = ..., + * }; + * struct i915_user_extension ext1 { + * .next_extension = (uintptr_t)&ext2, + * .name = ..., + * }; + * + * Typically the struct i915_user_extension would be embedded in some uAPI + * struct, and in this case we would feed it the head of the chain(i.e ext1), + * which would then apply all of the above extensions. + * */ struct i915_user_extension { + /** + * @next_extension: + * + * Pointer to the next struct i915_user_extension, or zero if the end. + */ __u64 next_extension; + /** + * @name: Name of the extension. + * + * Note that the name here is just some integer. + * + * Also note that the name space for this is not global for the whole + * driver, but rather its scope/meaning is limited to the specific piece + * of uAPI which has embedded the struct i915_user_extension. + */ __u32 name; - __u32 flags; /* All undefined bits must be zero. */ - __u32 rsvd[4]; /* Reserved for future use; must be zero. */ + /** + * @flags: MBZ + * + * All undefined bits must be zero. + */ + __u32 flags; + /** + * @rsvd: MBZ + * + * Reserved for future use; must be zero. + */ + __u32 rsvd[4]; }; /* @@ -360,6 +406,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_QUERY 0x39 #define DRM_I915_GEM_VM_CREATE 0x3a #define DRM_I915_GEM_VM_DESTROY 0x3b +#define DRM_I915_GEM_CREATE_EXT 0x3c /* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) @@ -392,6 +439,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) #define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) #define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext) #define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) #define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) #define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) @@ -1054,12 +1102,12 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; -/** +/* * See drm_i915_gem_execbuffer_ext_timeline_fences. */ #define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 -/** +/* * This structure describes an array of drm_syncobj and associated points for * timeline variants of drm_syncobj. It is invalid to append this structure to * the execbuf if I915_EXEC_FENCE_ARRAY is set. @@ -1700,7 +1748,7 @@ struct drm_i915_gem_context_param { __u64 value; }; -/** +/* * Context SSEU programming * * It may be necessary for either functional or performance reason to configure @@ -2067,7 +2115,7 @@ struct drm_i915_perf_open_param { __u64 properties_ptr; }; -/** +/* * Enable data capture for a stream that was either opened in a disabled state * via I915_PERF_FLAG_DISABLED or was later disabled via * I915_PERF_IOCTL_DISABLE. @@ -2081,7 +2129,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) -/** +/* * Disable data capture for a stream. * * It is an error to try and read a stream that is disabled. @@ -2090,7 +2138,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) -/** +/* * Change metrics_set captured by a stream. * * If the stream is bound to a specific context, the configuration change @@ -2103,7 +2151,7 @@ struct drm_i915_perf_open_param { */ #define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) -/** +/* * Common to all i915 perf records */ struct drm_i915_perf_record_header { @@ -2151,7 +2199,7 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; -/** +/* * Structure to upload perf dynamic configuration into the kernel. */ struct drm_i915_perf_oa_config { @@ -2172,53 +2220,95 @@ struct drm_i915_perf_oa_config { __u64 flex_regs_ptr; }; +/** + * struct drm_i915_query_item - An individual query for the kernel to process. + * + * The behaviour is determined by the @query_id. Note that exactly what + * @data_ptr is also depends on the specific @query_id. + */ struct drm_i915_query_item { + /** @query_id: The id for this query */ __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 #define DRM_I915_QUERY_ENGINE_INFO 2 #define DRM_I915_QUERY_PERF_CONFIG 3 +#define DRM_I915_QUERY_MEMORY_REGIONS 4 /* Must be kept compact -- no holes and well documented */ - /* + /** + * @length: + * * When set to zero by userspace, this is filled with the size of the - * data to be written at the data_ptr pointer. The kernel sets this + * data to be written at the @data_ptr pointer. The kernel sets this * value to a negative value to signal an error on a particular query * item. */ __s32 length; - /* + /** + * @flags: + * * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. * * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the - * following : - * - DRM_I915_QUERY_PERF_CONFIG_LIST - * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID - * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * following: + * + * - DRM_I915_QUERY_PERF_CONFIG_LIST + * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID */ __u32 flags; #define DRM_I915_QUERY_PERF_CONFIG_LIST 1 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 #define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 - /* - * Data will be written at the location pointed by data_ptr when the - * value of length matches the length of the data to be written by the + /** + * @data_ptr: + * + * Data will be written at the location pointed by @data_ptr when the + * value of @length matches the length of the data to be written by the * kernel. */ __u64 data_ptr; }; +/** + * struct drm_i915_query - Supply an array of struct drm_i915_query_item for the + * kernel to fill out. + * + * Note that this is generally a two step process for each struct + * drm_i915_query_item in the array: + * + * 1. Call the DRM_IOCTL_I915_QUERY, giving it our array of struct + * drm_i915_query_item, with &drm_i915_query_item.length set to zero. The + * kernel will then fill in the size, in bytes, which tells userspace how + * memory it needs to allocate for the blob(say for an array of properties). + * + * 2. Next we call DRM_IOCTL_I915_QUERY again, this time with the + * &drm_i915_query_item.data_ptr equal to our newly allocated blob. Note that + * the &drm_i915_query_item.length should still be the same as what the + * kernel previously set. At this point the kernel can fill in the blob. + * + * Note that for some query items it can make sense for userspace to just pass + * in a buffer/blob equal to or larger than the required size. In this case only + * a single ioctl call is needed. For some smaller query items this can work + * quite well. + * + */ struct drm_i915_query { + /** @num_items: The number of elements in the @items_ptr array */ __u32 num_items; - /* - * Unused for now. Must be cleared to zero. + /** + * @flags: Unused for now. Must be cleared to zero. */ __u32 flags; - /* - * This points to an array of num_items drm_i915_query_item structures. + /** + * @items_ptr: + * + * Pointer to an array of struct drm_i915_query_item. The number of + * array elements is @num_items. */ __u64 items_ptr; }; @@ -2292,21 +2382,21 @@ struct drm_i915_query_topology_info { * Describes one engine and it's capabilities as known to the driver. */ struct drm_i915_engine_info { - /** Engine class and instance. */ + /** @engine: Engine class and instance. */ struct i915_engine_class_instance engine; - /** Reserved field. */ + /** @rsvd0: Reserved field. */ __u32 rsvd0; - /** Engine flags. */ + /** @flags: Engine flags. */ __u64 flags; - /** Capabilities of this engine. */ + /** @capabilities: Capabilities of this engine. */ __u64 capabilities; #define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) - /** Reserved fields. */ + /** @rsvd1: Reserved fields. */ __u64 rsvd1[4]; }; @@ -2317,13 +2407,13 @@ struct drm_i915_engine_info { * an array of struct drm_i915_engine_info structures. */ struct drm_i915_query_engine_info { - /** Number of struct drm_i915_engine_info structs following. */ + /** @num_engines: Number of struct drm_i915_engine_info structs following. */ __u32 num_engines; - /** MBZ */ + /** @rsvd: MBZ */ __u32 rsvd[3]; - /** Marker for drm_i915_engine_info structures. */ + /** @engines: Marker for drm_i915_engine_info structures. */ struct drm_i915_engine_info engines[]; }; @@ -2377,6 +2467,241 @@ struct drm_i915_query_perf_config { __u8 data[]; }; +/** + * enum drm_i915_gem_memory_class - Supported memory classes + */ +enum drm_i915_gem_memory_class { + /** @I915_MEMORY_CLASS_SYSTEM: System memory */ + I915_MEMORY_CLASS_SYSTEM = 0, + /** @I915_MEMORY_CLASS_DEVICE: Device local-memory */ + I915_MEMORY_CLASS_DEVICE, +}; + +/** + * struct drm_i915_gem_memory_class_instance - Identify particular memory region + */ +struct drm_i915_gem_memory_class_instance { + /** @memory_class: See enum drm_i915_gem_memory_class */ + __u16 memory_class; + + /** @memory_instance: Which instance */ + __u16 memory_instance; +}; + +/** + * struct drm_i915_memory_region_info - Describes one region as known to the + * driver. + * + * Note that we reserve some stuff here for potential future work. As an example + * we might want expose the capabilities for a given region, which could include + * things like if the region is CPU mappable/accessible, what are the supported + * mapping types etc. + * + * Note that to extend struct drm_i915_memory_region_info and struct + * drm_i915_query_memory_regions in the future the plan is to do the following: + * + * .. code-block:: C + * + * struct drm_i915_memory_region_info { + * struct drm_i915_gem_memory_class_instance region; + * union { + * __u32 rsvd0; + * __u32 new_thing1; + * }; + * ... + * union { + * __u64 rsvd1[8]; + * struct { + * __u64 new_thing2; + * __u64 new_thing3; + * ... + * }; + * }; + * }; + * + * With this things should remain source compatible between versions for + * userspace, even as we add new fields. + * + * Note this is using both struct drm_i915_query_item and struct drm_i915_query. + * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS + * at &drm_i915_query_item.query_id. + */ +struct drm_i915_memory_region_info { + /** @region: The class:instance pair encoding */ + struct drm_i915_gem_memory_class_instance region; + + /** @rsvd0: MBZ */ + __u32 rsvd0; + + /** @probed_size: Memory probed by the driver (-1 = unknown) */ + __u64 probed_size; + + /** @unallocated_size: Estimate of memory remaining (-1 = unknown) */ + __u64 unallocated_size; + + /** @rsvd1: MBZ */ + __u64 rsvd1[8]; +}; + +/** + * struct drm_i915_query_memory_regions + * + * The region info query enumerates all regions known to the driver by filling + * in an array of struct drm_i915_memory_region_info structures. + * + * Example for getting the list of supported regions: + * + * .. code-block:: C + * + * struct drm_i915_query_memory_regions *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_MEMORY_REGIONS; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of regions. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with the all the region info. + * item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each region in the array + * for (i = 0; i < info->num_regions; i++) { + * struct drm_i915_memory_region_info mr = info->regions[i]; + * u16 class = mr.region.class; + * u16 instance = mr.region.instance; + * + * .... + * } + * + * free(info); + */ +struct drm_i915_query_memory_regions { + /** @num_regions: Number of supported regions */ + __u32 num_regions; + + /** @rsvd: MBZ */ + __u32 rsvd[3]; + + /** @regions: Info about each supported region */ + struct drm_i915_memory_region_info regions[]; +}; + +/** + * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added + * extension support using struct i915_user_extension. + * + * Note that in the future we want to have our buffer flags here, at least for + * the stuff that is immutable. Previously we would have two ioctls, one to + * create the object with gem_create, and another to apply various parameters, + * however this creates some ambiguity for the params which are considered + * immutable. Also in general we're phasing out the various SET/GET ioctls. + */ +struct drm_i915_gem_create_ext { + /** + * @size: Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + * + * Note that for some devices we have might have further minimum + * page-size restrictions(larger than 4K), like for device local-memory. + * However in general the final size here should always reflect any + * rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS + * extension to place the object in device local-memory. + */ + __u64 size; + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + /** @flags: MBZ */ + __u32 flags; + /** + * @extensions: The chain of extensions to apply to this object. + * + * This will be useful in the future when we need to support several + * different extensions, and we need to apply more than one when + * creating the object. See struct i915_user_extension. + * + * If we don't supply any extensions then we get the same old gem_create + * behaviour. + * + * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see + * struct drm_i915_gem_create_ext_memory_regions. + */ +#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 + __u64 extensions; +}; + +/** + * struct drm_i915_gem_create_ext_memory_regions - The + * I915_GEM_CREATE_EXT_MEMORY_REGIONS extension. + * + * Set the object with the desired set of placements/regions in priority + * order. Each entry must be unique and supported by the device. + * + * This is provided as an array of struct drm_i915_gem_memory_class_instance, or + * an equivalent layout of class:instance pair encodings. See struct + * drm_i915_query_memory_regions and DRM_I915_QUERY_MEMORY_REGIONS for how to + * query the supported regions for a device. + * + * As an example, on discrete devices, if we wish to set the placement as + * device local-memory we can do something like: + * + * .. code-block:: C + * + * struct drm_i915_gem_memory_class_instance region_lmem = { + * .memory_class = I915_MEMORY_CLASS_DEVICE, + * .memory_instance = 0, + * }; + * struct drm_i915_gem_create_ext_memory_regions regions = { + * .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS }, + * .regions = (uintptr_t)®ion_lmem, + * .num_regions = 1, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = 16 * PAGE_SIZE, + * .extensions = (uintptr_t)®ions, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + * + * At which point we get the object handle in &drm_i915_gem_create_ext.handle, + * along with the final object size in &drm_i915_gem_create_ext.size, which + * should account for any rounding up, if required. + */ +struct drm_i915_gem_create_ext_memory_regions { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + + /** @pad: MBZ */ + __u32 pad; + /** @num_regions: Number of elements in the @regions array. */ + __u32 num_regions; + /** + * @regions: The regions/placements array. + * + * An array of struct drm_i915_gem_memory_class_instance. + */ + __u64 regions; +}; + #if defined(__cplusplus) } #endif diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 535a7229e1d9..d17c061950df 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -710,7 +710,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 1) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 2) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, @@ -736,12 +736,38 @@ struct snd_rawmidi_info { unsigned char reserved[64]; /* reserved for future use */ }; +#define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0) +#define SNDRV_RAWMIDI_MODE_FRAMING_SHIFT 0 +#define SNDRV_RAWMIDI_MODE_FRAMING_NONE (0<<0) +#define SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP (1<<0) +#define SNDRV_RAWMIDI_MODE_CLOCK_MASK (7<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_SHIFT 3 +#define SNDRV_RAWMIDI_MODE_CLOCK_NONE (0<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_REALTIME (1<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC (2<<3) +#define SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC_RAW (3<<3) + +#define SNDRV_RAWMIDI_FRAMING_DATA_LENGTH 16 + +struct snd_rawmidi_framing_tstamp { + /* For now, frame_type is always 0. Midi 2.0 is expected to add new + * types here. Applications are expected to skip unknown frame types. + */ + __u8 frame_type; + __u8 length; /* number of valid bytes in data field */ + __u8 reserved[2]; + __u32 tv_nsec; /* nanoseconds */ + __u64 tv_sec; /* seconds */ + __u8 data[SNDRV_RAWMIDI_FRAMING_DATA_LENGTH]; +} __packed; + struct snd_rawmidi_params { int stream; size_t buffer_size; /* queue size in bytes */ size_t avail_min; /* minimum avail bytes for wakeup */ unsigned int no_active_sensing: 1; /* do not send active sensing byte in close() */ - unsigned char reserved[16]; /* reserved for future use */ + unsigned int mode; /* For input data only, frame incoming data */ + unsigned char reserved[12]; /* reserved for future use */ }; #ifndef __KERNEL__ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1e04ce724240..6f5e2757bb3c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10136,7 +10136,7 @@ int bpf_link__unpin(struct bpf_link *link) err = unlink(link->pin_path); if (err != 0) - return libbpf_err_errno(err); + return -errno; pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); zfree(&link->pin_path); @@ -11197,7 +11197,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); if (cnt < 0) - return libbpf_err_errno(cnt); + return -errno; for (i = 0; i < cnt; i++) { struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; diff --git a/tools/lib/perf/Build b/tools/lib/perf/Build index 2ef9a4ec6d99..e8f5b7fb9973 100644 --- a/tools/lib/perf/Build +++ b/tools/lib/perf/Build @@ -11,3 +11,5 @@ libperf-y += lib.o $(OUTPUT)zalloc.o: ../../lib/zalloc.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +tests-y += tests/ diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 3718d65cffac..08fe6e3c4089 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -52,6 +52,8 @@ else Q = @ endif +TEST_ARGS := $(if $(V),-v) + # Set compile option CFLAGS ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) @@ -136,12 +138,30 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ - *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC) - $(Q)$(MAKE) -C tests clean + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ + $(TESTS_STATIC) $(TESTS_SHARED) + +TESTS_IN = tests-in.o + +TESTS_STATIC = $(OUTPUT)tests-static +TESTS_SHARED = $(OUTPUT)tests-shared + +$(TESTS_IN): FORCE + $(Q)$(MAKE) $(build)=tests + +$(TESTS_STATIC): $(TESTS_IN) $(LIBPERF_A) $(LIBAPI) + $(QUIET_LINK)$(CC) -o $@ $^ + +$(TESTS_SHARED): $(TESTS_IN) $(LIBAPI) + $(QUIET_LINK)$(CC) -o $@ -L$(if $(OUTPUT),$(OUTPUT),.) $^ -lperf + +make-tests: libs $(TESTS_SHARED) $(TESTS_STATIC) -tests: libs - $(Q)$(MAKE) -C tests - $(Q)$(MAKE) -C tests run +tests: make-tests + @echo "running static:" + @./$(TESTS_STATIC) $(TEST_ARGS) + @echo "running dynamic:" + @LD_LIBRARY_PATH=. ./$(TESTS_SHARED) $(TEST_ARGS) $(LIBPERF_PC): $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index a0aaf385cbb5..e37dfad31383 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -66,6 +66,7 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *evsel) { + evsel->idx = evlist->nr_entries; list_add_tail(&evsel->node, &evlist->entries); evlist->nr_entries += 1; __perf_evlist__propagate_maps(evlist, evsel); @@ -641,3 +642,24 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; } + +void __perf_evlist__set_leader(struct list_head *list) +{ + struct perf_evsel *evsel, *leader; + + leader = list_entry(list->next, struct perf_evsel, node); + evsel = list_entry(list->prev, struct perf_evsel, node); + + leader->nr_members = evsel->idx - leader->idx + 1; + + __perf_evlist__for_each_entry(list, evsel) + evsel->leader = leader; +} + +void perf_evlist__set_leader(struct perf_evlist *evlist) +{ + if (evlist->nr_entries) { + evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; + __perf_evlist__set_leader(&evlist->entries); + } +} diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index bd8c2f19ef74..d8886720e83d 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -17,11 +17,15 @@ #include <linux/string.h> #include <sys/ioctl.h> #include <sys/mman.h> +#include <asm/bug.h> -void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr) +void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, + int idx) { INIT_LIST_HEAD(&evsel->node); evsel->attr = *attr; + evsel->idx = idx; + evsel->leader = evsel; } struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr) @@ -29,7 +33,7 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr) struct perf_evsel *evsel = zalloc(sizeof(*evsel)); if (evsel != NULL) - perf_evsel__init(evsel, attr); + perf_evsel__init(evsel, attr, 0); return evsel; } @@ -73,6 +77,32 @@ sys_perf_event_open(struct perf_event_attr *attr, return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } +static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) +{ + struct perf_evsel *leader = evsel->leader; + int fd; + + if (evsel == leader) { + *group_fd = -1; + return 0; + } + + /* + * Leader must be already processed/open, + * if not it's a bug. + */ + if (!leader->fd) + return -ENOTCONN; + + fd = FD(leader, cpu, thread); + if (fd == -1) + return -EBADF; + + *group_fd = fd; + + return 0; +} + int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { @@ -108,11 +138,15 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < threads->nr; thread++) { - int fd; + int fd, group_fd; + + err = get_group_fd(evsel, cpu, thread, &group_fd); + if (err < 0) + return err; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, - cpus->map[cpu], -1, 0); + cpus->map[cpu], group_fd, 0); if (fd < 0) return -errno; diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 212c29063ad4..f366dbad6a88 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -16,6 +16,7 @@ struct perf_mmap_param; struct perf_evlist { struct list_head entries; int nr_entries; + int nr_groups; bool has_user_cpus; struct perf_cpu_map *cpus; struct perf_cpu_map *all_cpus; @@ -126,4 +127,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, void perf_evlist__reset_id_hash(struct perf_evlist *evlist); +void __perf_evlist__set_leader(struct list_head *list); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 1c067d088bc6..1f3eacbad2e8 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -45,13 +45,16 @@ struct perf_evsel { struct xyarray *sample_id; u64 *id; u32 ids; + struct perf_evsel *leader; /* parse modifier helper */ int nr_members; bool system_wide; + int idx; }; -void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr); +void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, + int idx); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__close_fd(struct perf_evsel *evsel); void perf_evsel__free_fd(struct perf_evsel *evsel); diff --git a/tools/lib/perf/include/internal/tests.h b/tools/lib/perf/include/internal/tests.h index 29425c2dabe1..61052099225b 100644 --- a/tools/lib/perf/include/internal/tests.h +++ b/tools/lib/perf/include/internal/tests.h @@ -5,8 +5,8 @@ #include <stdio.h> #include <unistd.h> -int tests_failed; -int tests_verbose; +extern int tests_failed; +extern int tests_verbose; static inline int get_verbose(char **argv, int argc) { diff --git a/tools/lib/perf/include/perf/evlist.h b/tools/lib/perf/include/perf/evlist.h index 0a7479dc13bf..9ca399d49bb4 100644 --- a/tools/lib/perf/include/perf/evlist.h +++ b/tools/lib/perf/include/perf/evlist.h @@ -46,4 +46,5 @@ LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist, (pos) != NULL; \ (pos) = perf_evlist__next_mmap((evlist), (pos), overwrite)) +LIBPERF_API void perf_evlist__set_leader(struct perf_evlist *evlist); #endif /* __LIBPERF_EVLIST_H */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index c0c7ceb11060..71468606e8a7 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -45,6 +45,7 @@ LIBPERF_0.0.1 { perf_evlist__munmap; perf_evlist__filter_pollfd; perf_evlist__next_mmap; + perf_evlist__set_leader; perf_mmap__consume; perf_mmap__read_init; perf_mmap__read_done; diff --git a/tools/lib/perf/tests/Build b/tools/lib/perf/tests/Build new file mode 100644 index 000000000000..56e81378d443 --- /dev/null +++ b/tools/lib/perf/tests/Build @@ -0,0 +1,5 @@ +tests-y += main.o +tests-y += test-evsel.o +tests-y += test-evlist.o +tests-y += test-cpumap.o +tests-y += test-threadmap.o diff --git a/tools/lib/perf/tests/Makefile b/tools/lib/perf/tests/Makefile deleted file mode 100644 index b536cc9a26dd..000000000000 --- a/tools/lib/perf/tests/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -TESTS = test-cpumap test-threadmap test-evlist test-evsel - -TESTS_SO := $(addsuffix -so,$(TESTS)) -TESTS_A := $(addsuffix -a,$(TESTS)) - -TEST_ARGS := $(if $(V),-v) - -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - -all: - -include $(srctree)/tools/scripts/Makefile.include - -INCLUDE = -I$(srctree)/tools/lib/perf/include -I$(srctree)/tools/include -I$(srctree)/tools/lib - -$(TESTS_A): FORCE - $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a $(LIBAPI) - -$(TESTS_SO): FORCE - $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -L.. -o $@ $(subst -so,.c,$@) $(LIBAPI) -lperf - -all: $(TESTS_A) $(TESTS_SO) - -run: - @echo "running static:" - @for i in $(TESTS_A); do ./$$i $(TEST_ARGS); done - @echo "running dynamic:" - @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i $(TEST_ARGS); done - -clean: - $(call QUIET_CLEAN, tests)$(RM) $(TESTS_A) $(TESTS_SO) - -.PHONY: all clean FORCE diff --git a/tools/lib/perf/tests/main.c b/tools/lib/perf/tests/main.c new file mode 100644 index 000000000000..56423fd4db19 --- /dev/null +++ b/tools/lib/perf/tests/main.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <internal/tests.h> +#include "tests.h" + +int tests_failed; +int tests_verbose; + +int main(int argc, char **argv) +{ + __T("test cpumap", !test_cpumap(argc, argv)); + __T("test threadmap", !test_threadmap(argc, argv)); + __T("test evlist", !test_evlist(argc, argv)); + __T("test evsel", !test_evsel(argc, argv)); + return 0; +} diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index c70e9e03af3e..d39378eaf897 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <perf/cpumap.h> #include <internal/tests.h> +#include "tests.h" static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) @@ -10,7 +11,7 @@ static int libperf_print(enum libperf_print_level level, return vfprintf(stderr, fmt, ap); } -int main(int argc, char **argv) +int test_cpumap(int argc, char **argv) { struct perf_cpu_map *cpus; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index e2ac0b7f432e..c67c83399170 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -18,6 +18,8 @@ #include <perf/event.h> #include <internal/tests.h> #include <api/fs/fs.h> +#include "tests.h" +#include <internal/evsel.h> static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) @@ -29,7 +31,7 @@ static int test_stat_cpu(void) { struct perf_cpu_map *cpus; struct perf_evlist *evlist; - struct perf_evsel *evsel; + struct perf_evsel *evsel, *leader; struct perf_event_attr attr1 = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, @@ -46,7 +48,7 @@ static int test_stat_cpu(void) evlist = perf_evlist__new(); __T("failed to create evlist", evlist); - evsel = perf_evsel__new(&attr1); + evsel = leader = perf_evsel__new(&attr1); __T("failed to create evsel1", evsel); perf_evlist__add(evlist, evsel); @@ -56,6 +58,10 @@ static int test_stat_cpu(void) perf_evlist__add(evlist, evsel); + perf_evlist__set_leader(evlist); + __T("failed to set leader", leader->leader == leader); + __T("failed to set leader", evsel->leader == leader); + perf_evlist__set_maps(evlist, cpus, NULL); err = perf_evlist__open(evlist); @@ -84,7 +90,7 @@ static int test_stat_thread(void) struct perf_counts_values counts = { .val = 0 }; struct perf_thread_map *threads; struct perf_evlist *evlist; - struct perf_evsel *evsel; + struct perf_evsel *evsel, *leader; struct perf_event_attr attr1 = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, @@ -103,7 +109,7 @@ static int test_stat_thread(void) evlist = perf_evlist__new(); __T("failed to create evlist", evlist); - evsel = perf_evsel__new(&attr1); + evsel = leader = perf_evsel__new(&attr1); __T("failed to create evsel1", evsel); perf_evlist__add(evlist, evsel); @@ -113,6 +119,10 @@ static int test_stat_thread(void) perf_evlist__add(evlist, evsel); + perf_evlist__set_leader(evlist); + __T("failed to set leader", leader->leader == leader); + __T("failed to set leader", evsel->leader == leader); + perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); @@ -135,7 +145,7 @@ static int test_stat_thread_enable(void) struct perf_counts_values counts = { .val = 0 }; struct perf_thread_map *threads; struct perf_evlist *evlist; - struct perf_evsel *evsel; + struct perf_evsel *evsel, *leader; struct perf_event_attr attr1 = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, @@ -156,7 +166,7 @@ static int test_stat_thread_enable(void) evlist = perf_evlist__new(); __T("failed to create evlist", evlist); - evsel = perf_evsel__new(&attr1); + evsel = leader = perf_evsel__new(&attr1); __T("failed to create evsel1", evsel); perf_evlist__add(evlist, evsel); @@ -166,6 +176,10 @@ static int test_stat_thread_enable(void) perf_evlist__add(evlist, evsel); + perf_evlist__set_leader(evlist); + __T("failed to set leader", leader->leader == leader); + __T("failed to set leader", evsel->leader == leader); + perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); @@ -253,6 +267,7 @@ static int test_mmap_thread(void) evsel = perf_evsel__new(&attr); __T("failed to create evsel1", evsel); + __T("failed to set leader", evsel->leader == evsel); perf_evlist__add(evlist, evsel); @@ -338,6 +353,7 @@ static int test_mmap_cpus(void) evsel = perf_evsel__new(&attr); __T("failed to create evsel1", evsel); + __T("failed to set leader", evsel->leader == evsel); perf_evlist__add(evlist, evsel); @@ -397,7 +413,7 @@ static int test_mmap_cpus(void) return 0; } -int main(int argc, char **argv) +int test_evlist(int argc, char **argv) { __T_START; diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 288b5feaefe2..a184e4861627 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -6,6 +6,7 @@ #include <perf/threadmap.h> #include <perf/evsel.h> #include <internal/tests.h> +#include "tests.h" static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) @@ -184,7 +185,7 @@ static int test_stat_user_read(int event) return 0; } -int main(int argc, char **argv) +int test_evsel(int argc, char **argv) { __T_START; diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 384471441b48..5e2a0291e94c 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <perf/threadmap.h> #include <internal/tests.h> +#include "tests.h" static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) @@ -10,7 +11,7 @@ static int libperf_print(enum libperf_print_level level, return vfprintf(stderr, fmt, ap); } -int main(int argc, char **argv) +int test_threadmap(int argc, char **argv) { struct perf_thread_map *threads; diff --git a/tools/lib/perf/tests/tests.h b/tools/lib/perf/tests/tests.h new file mode 100644 index 000000000000..604838f21b2b --- /dev/null +++ b/tools/lib/perf/tests/tests.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef TESTS_H +#define TESTS_H + +int test_cpumap(int argc, char **argv); +int test_threadmap(int argc, char **argv); +int test_evlist(int argc, char **argv); +int test_evsel(int argc, char **argv); + +#endif /* TESTS_H */ diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index b0872c801866..3bb75c1f25e8 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -706,6 +706,12 @@ intel-pt.*:: If set, Intel PT decoder will set the mispred flag on all branches. + intel-pt.max-loops:: + If set and non-zero, the maximum number of unconditional + branches decoded without consuming any trace packets. If + the maximum is exceeded there will be a "Never-ending loop" + error. The default is 100000. + auxtrace.*:: auxtrace.dumpdir:: diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index c9e0de5b00c1..77e7f18c0bd0 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -923,7 +923,7 @@ install-tools: all install-gtk $(call QUIET_INSTALL, binaries) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \ $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \ - $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(dir_SQ)/trace'; \ + $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(includedir_SQ)/perf'; \ $(INSTALL) util/perf_dlfilter.h -t '$(DESTDIR_SQ)$(includedir_SQ)/perf' ifndef NO_PERF_READ_VDSO32 @@ -1017,6 +1017,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h +SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool @@ -1030,7 +1031,21 @@ $(BPFTOOL): | $(SKEL_TMP_OUT) CFLAGS= $(MAKE) -C ../bpf/bpftool \ OUTPUT=$(SKEL_TMP_OUT)/ bootstrap -$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) + +$(SKEL_OUT)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) +ifeq ($(VMLINUX_H),) + $(QUIET_GEN)$(BPFTOOL) btf dump file $< format c > $@ +else + $(Q)cp "$(VMLINUX_H)" $@ +endif + +$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) $(SKEL_OUT)/vmlinux.h | $(SKEL_TMP_OUT) $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \ -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@ diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 9cd1c34f31b5..ac653d08b1ea 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -357,7 +357,7 @@ 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 442 n64 mount_setattr sys_mount_setattr -# 443 reserved for quotactl_path +443 n64 quotactl_fd sys_quotactl_fd 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 8f052ff4058c..aef2a290e71a 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -522,7 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -# 443 reserved for quotactl_path +443 common quotactl_fd sys_quotactl_fd 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 0690263df1dd..64d51ab5a8b4 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr -# 443 reserved for quotactl_path +443 common quotactl_fd sys_quotactl_fd sys_quotactl_fd 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index ce18119ea0d0..f6b57799c1ea 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,10 +364,11 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -# 443 reserved for quotactl_path +443 common quotactl_fd sys_quotactl_fd 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +447 common memfd_secret sys_memfd_secret # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 8c6732cc7794..0b0951030a2f 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -5,11 +5,15 @@ #include "util/parse-events.h" #define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}" +#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}" int arch_evlist__add_default_attrs(struct evlist *evlist) { if (!pmu_have_event("cpu", "slots")) return 0; - return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL); + if (pmu_have_event("cpu", "topdown-heavy-ops")) + return parse_events(evlist, TOPDOWN_L2_EVENTS, NULL); + else + return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL); } diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index d63acb782b63..eeafe97b8105 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -322,7 +322,7 @@ static int iostat_event_group(struct evlist *evl, } evlist__for_each_entry(evl, evsel) { - evsel->priv = list->rps[evsel->idx / metrics_count]; + evsel->priv = list->rps[evsel->core.idx / metrics_count]; } list->nr_entries = 0; err: @@ -428,7 +428,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, { double iostat_value = 0; u64 prev_count_val = 0; - const char *iostat_metric = iostat_metric_by_idx(evsel->idx); + const char *iostat_metric = iostat_metric_by_idx(evsel->core.idx); u8 die = ((struct iio_root_port *)evsel->priv)->die; struct perf_counts_values *count = perf_counts(evsel->counts, die, 0); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f52b3a799e76..80450c0e8f36 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1031,12 +1031,12 @@ static int process_base_stream(struct data__file *data_base, continue; es_base = evsel_streams__entry(data_base->evlist_streams, - evsel_base->idx); + evsel_base->core.idx); if (!es_base) return -1; es_pair = evsel_streams__entry(data_pair->evlist_streams, - evsel_pair->idx); + evsel_pair->core.idx); if (!es_pair) return -1; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 5d6f583e2cd3..c88c61e7f8cc 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -361,9 +361,10 @@ static struct dso *findnew_dso(int pid, int tid, const char *filename, dso = machine__findnew_dso_id(machine, filename, id); } - if (dso) + if (dso) { + nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; - else + } else nsinfo__put(nsi); thread__put(thread); @@ -992,8 +993,10 @@ int cmd_inject(int argc, const char **argv) data.path = inject.input_name; inject.session = perf_session__new(&data, inject.output.is_pipe, &inject.tool); - if (IS_ERR(inject.session)) - return PTR_ERR(inject.session); + if (IS_ERR(inject.session)) { + ret = PTR_ERR(inject.session); + goto out_close_output; + } if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); @@ -1035,6 +1038,8 @@ int cmd_inject(int argc, const char **argv) out_delete: zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); +out_close_output: + perf_data__close(&inject.output); free(inject.itrace_synth_opts.vm_tm_corr_args); return ret; } diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 2bfd41df621c..e1dd51f2874b 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -31,7 +31,7 @@ #include <linux/zalloc.h> #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" -#define DEFAULT_FUNC_FILTER "!_*" +#define DEFAULT_FUNC_FILTER "!_* & !*@plt" #define DEFAULT_LIST_FILTER "*" /* Session management structure */ diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 71efe6573ee7..671a21c9ee4d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -891,11 +891,12 @@ static int record__open(struct record *rec) int rc = 0; /* - * For initial_delay or system wide, we need to add a dummy event so - * that we can track PERF_RECORD_MMAP to cover the delay of waiting or - * event synthesis. + * For initial_delay, system wide or a hybrid system, we need to add a + * dummy event so that we can track PERF_RECORD_MMAP to cover the delay + * of waiting or event synthesis. */ - if (opts->initial_delay || target__has_cpu(&opts->target)) { + if (opts->initial_delay || target__has_cpu(&opts->target) || + perf_pmu__has_hybrid()) { pos = evlist__get_tracking_event(evlist); if (!evsel__is_dummy_event(pos)) { /* Set up dummy event. */ @@ -926,7 +927,7 @@ try_again: goto try_again; } if ((errno == EINVAL || errno == EBADF) && - pos->leader != pos && + pos->core.leader != &pos->core && pos->weak_group) { pos = evlist__reset_weak_group(evlist, pos, true); goto try_again; @@ -1776,7 +1777,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) rec->tool.ordered_events = false; } - if (!rec->evlist->nr_groups) + if (!rec->evlist->core.nr_groups) perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); if (data->is_pipe) { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bc5c393021dc..dc0364f671b9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -332,7 +332,7 @@ static int process_read_event(struct perf_tool *tool, const char *name = evsel__name(evsel); int err = perf_read_values_add_value(&rep->show_threads_values, event->read.pid, event->read.tid, - evsel->idx, + evsel->core.idx, name, event->read.value); @@ -666,7 +666,7 @@ static int report__collapse_hists(struct report *rep) evlist__for_each_entry(rep->session->evlist, pos) { struct hists *hists = evsel__hists(pos); - if (pos->idx == 0) + if (pos->core.idx == 0) hists->symbol_filter_str = rep->symbol_filter_str; hists->socket_filter = rep->socket_filter; @@ -677,7 +677,7 @@ static int report__collapse_hists(struct report *rep) /* Non-group events are considered as leader */ if (symbol_conf.event_group && !evsel__is_group_leader(pos)) { - struct hists *leader_hists = evsel__hists(pos->leader); + struct hists *leader_hists = evsel__hists(evsel__leader(pos)); hists__match(leader_hists, hists); hists__link(leader_hists, hists); @@ -729,9 +729,14 @@ static int count_sample_event(struct perf_tool *tool __maybe_unused, return 0; } +static int process_attr(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct evlist **pevlist); + static void stats_setup(struct report *rep) { memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.attr = process_attr; rep->tool.sample = count_sample_event; rep->tool.no_warn = true; } @@ -753,6 +758,7 @@ static void tasks_setup(struct report *rep) rep->tool.mmap = perf_event__process_mmap; rep->tool.mmap2 = perf_event__process_mmap2; } + rep->tool.attr = process_attr; rep->tool.comm = perf_event__process_comm; rep->tool.exit = perf_event__process_exit; rep->tool.fork = perf_event__process_fork; @@ -1169,6 +1175,8 @@ int cmd_report(int argc, const char **argv) .annotation_opts = annotation__default_options, .skip_empty = true, }; + char *sort_order_help = sort_help("sort by key(s):"); + char *field_order_help = sort_help("output field(s): overhead period sample "); const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), @@ -1203,9 +1211,9 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN(0, "header-only", &report.header_only, "Show only data header."), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - sort_help("sort by key(s):")), + sort_order_help), OPT_STRING('F', "fields", &field_order, "key[,keys...]", - sort_help("output field(s): overhead period sample ")), + field_order_help), OPT_BOOLEAN(0, "show-cpu-utilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_BOOLEAN_FLAG(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, @@ -1338,11 +1346,11 @@ int cmd_report(int argc, const char **argv) char sort_tmp[128]; if (ret < 0) - return ret; + goto exit; ret = perf_config(report__config, &report); if (ret) - return ret; + goto exit; argc = parse_options(argc, argv, options, report_usage, 0); if (argc) { @@ -1356,8 +1364,10 @@ int cmd_report(int argc, const char **argv) report.symbol_filter_str = argv[0]; } - if (annotate_check_args(&report.annotation_opts) < 0) - return -EINVAL; + if (annotate_check_args(&report.annotation_opts) < 0) { + ret = -EINVAL; + goto exit; + } if (report.mmaps_mode) report.tasks_mode = true; @@ -1371,12 +1381,14 @@ int cmd_report(int argc, const char **argv) if (symbol_conf.vmlinux_name && access(symbol_conf.vmlinux_name, R_OK)) { pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); - return -EINVAL; + ret = -EINVAL; + goto exit; } if (symbol_conf.kallsyms_name && access(symbol_conf.kallsyms_name, R_OK)) { pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); - return -EINVAL; + ret = -EINVAL; + goto exit; } if (report.inverted_callchain) @@ -1400,12 +1412,14 @@ int cmd_report(int argc, const char **argv) repeat: session = perf_session__new(&data, false, &report.tool); - if (IS_ERR(session)) - return PTR_ERR(session); + if (IS_ERR(session)) { + ret = PTR_ERR(session); + goto exit; + } ret = evswitch__init(&report.evswitch, session->evlist, stderr); if (ret) - return ret; + goto exit; if (zstd_init(&(session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed. Reported data may be incomplete.\n"); @@ -1426,7 +1440,7 @@ repeat: setup_forced_leader(&report, session->evlist); - if (symbol_conf.group_sort_idx && !session->evlist->nr_groups) { + if (symbol_conf.group_sort_idx && !session->evlist->core.nr_groups) { parse_options_usage(NULL, options, "group-sort-idx", 0); ret = -EINVAL; goto error; @@ -1640,5 +1654,8 @@ error: zstd_fini(&(session->zstd_data)); perf_session__delete(session); +exit: + free(sort_order_help); + free(field_order_help); return ret; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 954ce2f594e9..1ff10d4bccf3 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -670,7 +670,7 @@ static void create_tasks(struct perf_sched *sched) err = pthread_attr_init(&attr); BUG_ON(err); err = pthread_attr_setstacksize(&attr, - (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); + (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN)); BUG_ON(err); err = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(err); @@ -3335,6 +3335,16 @@ static void setup_sorting(struct perf_sched *sched, const struct option *options sort_dimension__add("pid", &sched->cmp_pid); } +static bool schedstat_events_exposed(void) +{ + /* + * Select "sched:sched_stat_wait" event to check + * whether schedstat tracepoints are exposed. + */ + return IS_ERR(trace_event__tp_format("sched", "sched_stat_wait")) ? + false : true; +} + static int __cmd_record(int argc, const char **argv) { unsigned int rec_argc, i, j; @@ -3346,21 +3356,33 @@ static int __cmd_record(int argc, const char **argv) "-m", "1024", "-c", "1", "-e", "sched:sched_switch", - "-e", "sched:sched_stat_wait", - "-e", "sched:sched_stat_sleep", - "-e", "sched:sched_stat_iowait", "-e", "sched:sched_stat_runtime", "-e", "sched:sched_process_fork", "-e", "sched:sched_wakeup_new", "-e", "sched:sched_migrate_task", }; + + /* + * The tracepoints trace_sched_stat_{wait, sleep, iowait} + * are not exposed to user if CONFIG_SCHEDSTATS is not set, + * to prevent "perf sched record" execution failure, determine + * whether to record schedstat events according to actual situation. + */ + const char * const schedstat_args[] = { + "-e", "sched:sched_stat_wait", + "-e", "sched:sched_stat_sleep", + "-e", "sched:sched_stat_iowait", + }; + unsigned int schedstat_argc = schedstat_events_exposed() ? + ARRAY_SIZE(schedstat_args) : 0; + struct tep_event *waking_event; /* * +2 for either "-e", "sched:sched_wakeup" or * "-e", "sched:sched_waking" */ - rec_argc = ARRAY_SIZE(record_args) + 2 + argc - 1; + rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) @@ -3376,6 +3398,9 @@ static int __cmd_record(int argc, const char **argv) else rec_argv[i++] = strdup("sched:sched_wakeup"); + for (j = 0; j < schedstat_argc; j++) + rec_argv[i++] = strdup(schedstat_args[j]); + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2030936cc891..064da7f3618d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1899,6 +1899,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, struct perf_sample *sample, FILE *fp) { + struct evsel *leader = evsel__leader(evsel); struct perf_stat_output_ctx ctx = { .print_metric = script_print_metric, .new_line = script_new_line, @@ -1915,7 +1916,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, if (!evsel->stats) evlist__alloc_stats(script->session->evlist, false); - if (evsel_script(evsel->leader)->gnum++ == 0) + if (evsel_script(leader)->gnum++ == 0) perf_stat__reset_shadow_stats(); val = sample->period * evsel->scale; perf_stat__update_shadow_stats(evsel, @@ -1923,8 +1924,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, sample->cpu, &rt_stat); evsel_script(evsel)->val = val; - if (evsel_script(evsel->leader)->gnum == evsel->leader->core.nr_members) { - for_each_group_member (ev2, evsel->leader) { + if (evsel_script(leader)->gnum == leader->core.nr_members) { + for_each_group_member (ev2, leader) { perf_stat__print_shadow_stats(&stat_config, ev2, evsel_script(ev2)->val, sample->cpu, @@ -1932,7 +1933,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, NULL, &rt_stat); } - evsel_script(evsel->leader)->gnum = 0; + evsel_script(leader)->gnum = 0; } } @@ -2600,6 +2601,12 @@ static void perf_script__exit_per_event_dump_stats(struct perf_script *script) } } +static void perf_script__exit(struct perf_script *script) +{ + perf_thread_map__put(script->threads); + perf_cpu_map__put(script->cpus); +} + static int __cmd_script(struct perf_script *script) { int ret; @@ -4142,8 +4149,10 @@ out_delete: zfree(&script.ptime_range); } + zstd_fini(&(session->zstd_data)); evlist__free_stats(session->evlist); perf_session__delete(session); + perf_script__exit(&script); if (script_started) cleanup_scripting(); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f9f74a514315..634375937db9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -154,6 +154,8 @@ static const char *topdown_metric_L2_attrs[] = { NULL, }; +#define TOPDOWN_MAX_LEVEL 2 + static const char *smi_cost_attrs = { "{" "msr/aperf/," @@ -248,7 +250,7 @@ static void evlist__check_cpu_maps(struct evlist *evlist) evlist__warn_hybrid_group(evlist); evlist__for_each_entry(evlist, evsel) { - leader = evsel->leader; + leader = evsel__leader(evsel); /* Check that leader matches cpus with each member. */ if (leader == evsel) @@ -269,10 +271,10 @@ static void evlist__check_cpu_maps(struct evlist *evlist) } for_each_group_evsel(pos, leader) { - pos->leader = pos; + evsel__set_leader(pos, pos); pos->core.nr_members = 0; } - evsel->leader->core.nr_members = 0; + evsel->core.leader->nr_members = 0; } } @@ -745,8 +747,8 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) */ counter->errored = true; - if ((counter->leader != counter) || - !(counter->leader->core.nr_members > 1)) + if ((evsel__leader(counter) != counter) || + !(counter->core.leader->nr_members > 1)) return COUNTER_SKIP; } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) { if (verbose > 0) @@ -839,7 +841,7 @@ try_again: * Don't close here because we're in the wrong affinity. */ if ((errno == EINVAL || errno == EBADF) && - counter->leader != counter && + evsel__leader(counter) != counter && counter->weak_group) { evlist__reset_weak_group(evsel_list, counter, false); assert(counter->reset_group); @@ -1931,6 +1933,7 @@ setup_metrics: if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; + stat_config.topdown_level = TOPDOWN_MAX_LEVEL; if (arch_evlist__add_default_attrs(evsel_list) < 0) return -1; } @@ -2442,9 +2445,6 @@ int cmd_stat(int argc, const char **argv) evlist__check_cpu_maps(evsel_list); - if (perf_pmu__has_hybrid()) - stat_config.no_merge = true; - /* * Initialize thread_map with comm names, * so we could print it out on output. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2d570bfe7a56..02f8bb5dbc0f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -264,9 +264,9 @@ static void perf_top__show_details(struct perf_top *top) if (top->evlist->enabled) { if (top->zero) - symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx); + symbol__annotate_zero_histogram(symbol, top->sym_evsel->core.idx); else - symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx); + symbol__annotate_decay_histogram(symbol, top->sym_evsel->core.idx); } if (more != 0) printf("%d lines not displayed, maybe increase display entries [e]\n", more); @@ -301,7 +301,7 @@ static void perf_top__resort_hists(struct perf_top *t) /* Non-group events are considered as leader */ if (symbol_conf.event_group && !evsel__is_group_leader(pos)) { - struct hists *leader_hists = evsel__hists(pos->leader); + struct hists *leader_hists = evsel__hists(evsel__leader(pos)); hists__match(leader_hists, hists); hists__link(leader_hists, hists); @@ -530,7 +530,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) fprintf(stderr, "\nAvailable events:"); evlist__for_each_entry(top->evlist, top->sym_evsel) - fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, evsel__name(top->sym_evsel)); + fprintf(stderr, "\n\t%d %s", top->sym_evsel->core.idx, evsel__name(top->sym_evsel)); prompt_integer(&counter, "Enter details event counter"); @@ -541,7 +541,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) break; } evlist__for_each_entry(top->evlist, top->sym_evsel) - if (top->sym_evsel->idx == counter) + if (top->sym_evsel->core.idx == counter) break; } else top->sym_evsel = evlist__first(top->evlist); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7ec18ff57fc4..9c265fa96011 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2266,6 +2266,14 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam return augmented_args; } +static void syscall__exit(struct syscall *sc) +{ + if (!sc) + return; + + free(sc->arg_fmt); +} + static int trace__sys_enter(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -3095,6 +3103,21 @@ static struct evsel *evsel__new_pgfault(u64 config) return evsel; } +static void evlist__free_syscall_tp_fields(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + struct evsel_trace *et = evsel->priv; + + if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls")) + continue; + + free(et->fmt); + free(et); + } +} + static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) { const u32 type = event->header.type; @@ -4130,7 +4153,7 @@ out_disable: out_delete_evlist: trace__symbols__exit(trace); - + evlist__free_syscall_tp_fields(evlist); evlist__delete(evlist); cgroup__put(trace->cgroup); trace->evlist = NULL; @@ -4636,6 +4659,9 @@ do_concat: err = parse_events_option(&o, lists[0], 0); } out: + free(strace_groups_dir); + free(lists[0]); + free(lists[1]); if (sep) *sep = ','; @@ -4701,6 +4727,21 @@ out: return err; } +static void trace__exit(struct trace *trace) +{ + int i; + + strlist__delete(trace->ev_qualifier); + free(trace->ev_qualifier_ids.entries); + if (trace->syscalls.table) { + for (i = 0; i <= trace->sctbl->syscalls.max_id; i++) + syscall__exit(&trace->syscalls.table[i]); + free(trace->syscalls.table); + } + syscalltbl__delete(trace->sctbl); + zfree(&trace->perfconfig_events); +} + int cmd_trace(int argc, const char **argv) { const char *trace_usage[] = { @@ -5135,6 +5176,6 @@ out_close: if (output_name != NULL) fclose(trace.output); out: - zfree(&trace.perfconfig_events); + trace__exit(&trace); return err; } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json new file mode 100644 index 000000000000..8ba3e81c9808 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json @@ -0,0 +1,424 @@ +[ + { + "MetricName": "VEC_GROUP_PUMP_RETRY_RATIO_P01", + "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP01\\,chip\\=?@ / hv_24x7@PM_PB_VG_PUMP01\\,chip\\=?@) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "VEC_GROUP_PUMP_RETRY_RATIO_P23", + "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP23\\,chip\\=?@ / hv_24x7@PM_PB_VG_PUMP23\\,chip\\=?@) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "LOCAL_NODE_PUMP_RETRY_RATIO_P01", + "MetricExpr": "(hv_24x7@PM_PB_RTY_LNS_PUMP01\\,chip\\=?@ / hv_24x7@PM_PB_LNS_PUMP01\\,chip\\=?@) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "LOCAL_NODE_PUMP_RETRY_RATIO_P23", + "MetricExpr": "(hv_24x7@PM_PB_RTY_LNS_PUMP23\\,chip\\=?@ / hv_24x7@PM_PB_LNS_PUMP23\\,chip\\=?@) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "GROUP_PUMP_RETRY_RATIO_P01", + "MetricExpr": "(hv_24x7@PM_PB_RTY_GROUP_PUMP01\\,chip\\=?@ / hv_24x7@PM_PB_GROUP_PUMP01\\,chip\\=?@) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "GROUP_PUMP_RETRY_RATIO_P23", + "MetricExpr": "(hv_24x7@PM_PB_RTY_GROUP_PUMP23\\,chip\\=?@ / hv_24x7@PM_PB_GROUP_PUMP23\\,chip\\=?@) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_GROUP_PUMPS_P01", + "MetricExpr": "(hv_24x7@PM_PB_GROUP_PUMP01\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_GROUP_PUMPS_P23", + "MetricExpr": "(hv_24x7@PM_PB_GROUP_PUMP23\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_GROUP_PUMPS_RETRIES_P01", + "MetricExpr": "(hv_24x7@PM_PB_RTY_GROUP_PUMP01\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_GROUP_PUMPS_RETRIES_P23", + "MetricExpr": "(hv_24x7@PM_PB_RTY_GROUP_PUMP23\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "REMOTE_NODE_PUMPS_RETRIES_RATIO_P01", + "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP01\\,chip\\=?@ / hv_24x7@PM_PB_RNS_PUMP01\\,chip\\=?@) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "REMOTE_NODE_PUMPS_RETRIES_RATIO_P23", + "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP23\\,chip\\=?@ / hv_24x7@PM_PB_RNS_PUMP23\\,chip\\=?@) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_VECTOR_GROUP_PUMPS_P01", + "MetricExpr": "(hv_24x7@PM_PB_VG_PUMP01\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_VECTOR_GROUP_PUMPS_P23", + "MetricExpr": "(hv_24x7@PM_PB_VG_PUMP23\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_LOCAL_NODE_PUMPS_P01", + "MetricExpr": "(hv_24x7@PM_PB_LNS_PUMP01\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_LOCAL_NODE_PUMPS_P23", + "MetricExpr": "(hv_24x7@PM_PB_LNS_PUMP23\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_VECTOR_GROUP_PUMPS_RETRIES_P01", + "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP01\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_VECTOR_GROUP_PUMPS_RETRIES_P23", + "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP23\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_LOCAL_NODE_PUMPS_RETRIES_P01", + "MetricExpr": "(hv_24x7@PM_PB_RTY_LNS_PUMP01\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_LOCAL_NODE_PUMPS_RETRIES_P23", + "MetricExpr": "(hv_24x7@PM_PB_RTY_LNS_PUMP23\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_REMOTE_NODE_PUMPS_P01", + "MetricExpr": "(hv_24x7@PM_PB_RNS_PUMP01\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_REMOTE_NODE_PUMPS_P23", + "MetricExpr": "(hv_24x7@PM_PB_RNS_PUMP23\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_NEAR_NODE_PUMPS_P01", + "MetricExpr": "(hv_24x7@PM_PB_NNS_PUMP01\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_NEAR_NODE_PUMPS_P23", + "MetricExpr": "(hv_24x7@PM_PB_NNS_PUMP23\\,chip\\=?@ / hv_24x7@PM_PAU_CYC\\,chip\\=?@)", + "ScaleUnit": "4", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_INT_PB_BW", + "MetricExpr": "(hv_24x7@PM_PB_INT_DATA_XFER\\,chip\\=?@)", + "ScaleUnit": "2.09MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK0_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK1_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK2_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK3_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK4_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK5_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK6_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK7_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK0_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK1_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK2_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK3_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK4_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK5_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK6_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK7_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK0_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK1_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK2_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK3_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK4_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK5_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK6_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK7_OUT_TOTAL_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK0_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK1_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK2_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK3_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK4_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK5_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK6_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK7_OUT_DATA_UTILIZATION", + "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_DATA_BANDWIDTH_TRANSFERRED_OVER_PB_PCI1", + "MetricExpr": "(hv_24x7@PM_PCI1_32B_INOUT\\,chip\\=?@)", + "ScaleUnit": "3.28e-2MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_DATA_BANDWIDTH_TRANSFERRED_OVER_PB_PCI0", + "MetricExpr": "(hv_24x7@PM_PCI0_32B_INOUT\\,chip\\=?@)", + "ScaleUnit": "3.28e-2MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_MCS_READ_BW_MC0_CHAN01", + "MetricExpr": "(hv_24x7@PM_MCS_128B_RD_DATA_BLOCKS_MC0_CHAN01\\,chip\\=?@)", + "ScaleUnit": "5.24e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_MCS_READ_BW_MC1_CHAN01", + "MetricExpr": "(hv_24x7@PM_MCS_128B_RD_DATA_BLOCKS_MC1_CHAN01\\,chip\\=?@)", + "ScaleUnit": "5.24e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_MCS_READ_BW_MC2_CHAN01", + "MetricExpr": "(hv_24x7@PM_MCS_128B_RD_DATA_BLOCKS_MC2_CHAN01\\,chip\\=?@)", + "ScaleUnit": "5.24e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_MCS_READ_BW_MC3_CHAN01", + "MetricExpr": "(hv_24x7@PM_MCS_128B_RD_DATA_BLOCKS_MC3_CHAN01\\,chip\\=?@)", + "ScaleUnit": "5.24e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_MCS_WRITE_BW_MC0_CHAN01", + "MetricExpr": "(hv_24x7@PM_MCS_64B_WR_DATA_BLOCKS_MC0_CHAN01\\,chip\\=?@)", + "ScaleUnit": "2.6e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_MCS_WRITE_BW_MC1_CHAN01", + "MetricExpr": "(hv_24x7@PM_MCS_64B_WR_DATA_BLOCKS_MC1_CHAN01\\,chip\\=?@)", + "ScaleUnit": "2.6e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_MCS_WRITE_BW_MC2_CHAN01", + "MetricExpr": "(hv_24x7@PM_MCS_64B_WR_DATA_BLOCKS_MC2_CHAN01\\,chip\\=?@)", + "ScaleUnit": "2.6e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "TOTAL_MCS_WRITE_BW_MC3_CHAN01", + "MetricExpr": "(hv_24x7@PM_MCS_64B_WR_DATA_BLOCKS_MC3_CHAN01\\,chip\\=?@)", + "ScaleUnit": "2.6e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "Memory_RD_BW_Chip", + "MetricExpr": "(hv_24x7@PM_MCS_128B_RD_DATA_BLOCKS_MC0_CHAN01\\,chip\\=?@ + hv_24x7@PM_MCS_128B_RD_DATA_BLOCKS_MC1_CHAN01\\,chip\\=?@ + hv_24x7@PM_MCS_128B_RD_DATA_BLOCKS_MC2_CHAN01\\,chip\\=?@ + hv_24x7@PM_MCS_128B_RD_DATA_BLOCKS_MC3_CHAN01\\,chip\\=?@)", + "MetricGroup": "Memory_BW", + "ScaleUnit": "5.24e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "Memory_WR_BW_Chip", + "MetricExpr": "(hv_24x7@PM_MCS_64B_WR_DATA_BLOCKS_MC0_CHAN01\\,chip\\=?@ + hv_24x7@PM_MCS_64B_WR_DATA_BLOCKS_MC1_CHAN01\\,chip\\=?@ + hv_24x7@PM_MCS_64B_WR_DATA_BLOCKS_MC2_CHAN01\\,chip\\=?@ + hv_24x7@PM_MCS_64B_WR_DATA_BLOCKS_MC3_CHAN01\\,chip\\=?@ )", + "MetricGroup": "Memory_BW", + "ScaleUnit": "2.6e-1MB", + "AggregationMode": "PerChip" + }, + { + "MetricName": "PowerBUS_Frequency", + "MetricExpr": "(hv_24x7@PM_PAU_CYC\\,chip\\=?@ )", + "ScaleUnit": "2.56e-7GHz", + "AggregationMode": "PerChip" + } +] diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index c72adbd67386..dbf5f5215abe 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include <stdio.h> +#include <stdlib.h> #include <sys/epoll.h> #include <sys/types.h> #include <sys/stat.h> @@ -151,7 +152,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), } evlist__splice_list_tail(evlist, &parse_state.list); - evlist->nr_groups = parse_state.nr_groups; + evlist->core.nr_groups = parse_state.nr_groups; evlist__config(evlist, &opts, NULL); @@ -276,6 +277,7 @@ static int __test__bpf(int idx) } out: + free(obj_buf); bpf__clear(); return ret; } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 41e3cf6bb66c..5e6242576236 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -26,6 +26,7 @@ #include <linux/kernel.h> #include <linux/string.h> #include <subcmd/exec-cmd.h> +#include <linux/zalloc.h> static bool dont_fork; @@ -540,7 +541,7 @@ static int shell_tests__max_desc_width(void) { struct dirent **entlist; struct dirent *ent; - int n_dirs; + int n_dirs, e; char path_dir[PATH_MAX]; const char *path = shell_tests__dir(path_dir, sizeof(path_dir)); int width = 0; @@ -564,8 +565,9 @@ static int shell_tests__max_desc_width(void) } } + for (e = 0; e < n_dirs; e++) + zfree(&entlist[e]); free(entlist); - return width; } @@ -596,7 +598,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) { struct dirent **entlist; struct dirent *ent; - int n_dirs; + int n_dirs, e; char path_dir[PATH_MAX]; struct shell_test st = { .dir = shell_tests__dir(path_dir, sizeof(path_dir)), @@ -629,6 +631,8 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) test_and_print(&test, false, -1); } + for (e = 0; e < n_dirs; e++) + zfree(&entlist[e]); free(entlist); return 0; } @@ -730,7 +734,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i) { struct dirent **entlist; struct dirent *ent; - int n_dirs; + int n_dirs, e; char path_dir[PATH_MAX]; const char *path = shell_tests__dir(path_dir, sizeof(path_dir)); @@ -752,8 +756,11 @@ static int perf_test__list_shell(int argc, const char **argv, int i) continue; pr_info("%2d: %s\n", i, t.desc); + } + for (e = 0; e < n_dirs; e++) + zfree(&entlist[e]); free(entlist); return 0; } diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 656218179222..44a50527f9d9 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -88,6 +88,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu struct evsel *evsel; struct event_name tmp; struct evlist *evlist = evlist__new_default(); + char *unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to get evlist", evlist); @@ -98,7 +99,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123); - evsel->unit = strdup("KRAVA"); + evsel->unit = unit; TEST_ASSERT_VAL("failed to synthesize attr update unit", !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit)); @@ -118,6 +119,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu TEST_ASSERT_VAL("failed to synthesize attr update cpus", !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); - perf_cpu_map__put(evsel->core.own_cpus); + free(unit); + evlist__delete(evlist); return 0; } diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index b74cf80d1f10..4e09f0a312af 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -5,6 +5,7 @@ #include "tests.h" #include "debug.h" #include "pmu.h" +#include "pmu-hybrid.h" #include <errno.h> #include <linux/kernel.h> @@ -44,7 +45,7 @@ static int perf_evsel__roundtrip_cache_name_test(void) for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); - if (evsel->idx != idx) + if (evsel->core.idx != idx) continue; ++idx; @@ -84,9 +85,9 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names, err = 0; evlist__for_each_entry(evlist, evsel) { - if (strcmp(evsel__name(evsel), names[evsel->idx / distance])) { + if (strcmp(evsel__name(evsel), names[evsel->core.idx / distance])) { --err; - pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx / distance]); + pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->core.idx / distance]); } } @@ -102,7 +103,7 @@ int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int { int err = 0, ret = 0; - if (perf_pmu__has_hybrid()) + if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) return perf_evsel__name_array_test(evsel__hw_names, 2); err = perf_evsel__name_array_test(evsel__hw_names, 1); diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c index edcbc70ff9d6..1ac72919fa35 100644 --- a/tools/perf/tests/maps.c +++ b/tools/perf/tests/maps.c @@ -116,5 +116,7 @@ int test__maps__merge_in(struct test *t __maybe_unused, int subtest __maybe_unus ret = check_maps(merged3, ARRAY_SIZE(merged3), &maps); TEST_ASSERT_VAL("merge check failed", !ret); + + maps__exit(&maps); return TEST_OK; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 73ae8f7aa066..d38757db2dc2 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -139,7 +139,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse " doesn't map to an evsel\n", sample.id); goto out_delete_evlist; } - nr_events[evsel->idx]++; + nr_events[evsel->core.idx]++; perf_mmap__consume(&md->core); } perf_mmap__read_done(&md->core); @@ -147,10 +147,10 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse out_init: err = 0; evlist__for_each_entry(evlist, evsel) { - if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) { + if (nr_events[evsel->core.idx] != expected_nr_events[evsel->core.idx]) { pr_debug("expected %d %s events, got %d\n", - expected_nr_events[evsel->idx], - evsel__name(evsel), nr_events[evsel->idx]); + expected_nr_events[evsel->core.idx], + evsel__name(evsel), nr_events[evsel->core.idx]); err = -1; goto out_delete_evlist; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 0f113b2b36a3..8d4866739255 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -6,6 +6,7 @@ #include "tests.h" #include "debug.h" #include "pmu.h" +#include "pmu-hybrid.h" #include <dirent.h> #include <errno.h> #include <sys/types.h> @@ -49,7 +50,7 @@ static int test__checkevent_tracepoint(struct evlist *evlist) struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->core.nr_groups); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type); @@ -62,7 +63,7 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist) struct evsel *evsel; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries > 1); - TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->core.nr_groups); evlist__for_each_entry(evlist, evsel) { TEST_ASSERT_VAL("wrong type", @@ -668,7 +669,7 @@ static int test__group1(struct evlist *evlist) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->core.nr_groups); /* instructions:k */ evsel = leader = evlist__first(evlist); @@ -698,7 +699,7 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); @@ -710,7 +711,7 @@ static int test__group2(struct evlist *evlist) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->core.nr_groups); /* faults + :ku modifier */ evsel = leader = evlist__first(evlist); @@ -739,7 +740,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); @@ -765,7 +766,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->core.nr_groups); /* group1 syscalls:sys_enter_openat:H */ evsel = leader = evlist__first(evlist); @@ -798,7 +799,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 3); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); @@ -831,7 +832,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); @@ -857,7 +858,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->core.nr_groups); /* cycles:u + p */ evsel = leader = evlist__first(evlist); @@ -889,7 +890,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); @@ -901,7 +902,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->core.nr_groups); /* cycles + G */ evsel = leader = evlist__first(evlist); @@ -931,7 +932,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); @@ -963,7 +964,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); /* cycles */ @@ -987,7 +988,7 @@ static int test__group_gh1(struct evlist *evlist) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->core.nr_groups); /* cycles + :H group modifier */ evsel = leader = evlist__first(evlist); @@ -1016,7 +1017,7 @@ static int test__group_gh1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; @@ -1027,7 +1028,7 @@ static int test__group_gh2(struct evlist *evlist) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->core.nr_groups); /* cycles + :G group modifier */ evsel = leader = evlist__first(evlist); @@ -1056,7 +1057,7 @@ static int test__group_gh2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; @@ -1067,7 +1068,7 @@ static int test__group_gh3(struct evlist *evlist) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->core.nr_groups); /* cycles:G + :u group modifier */ evsel = leader = evlist__first(evlist); @@ -1096,7 +1097,7 @@ static int test__group_gh3(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; @@ -1107,7 +1108,7 @@ static int test__group_gh4(struct evlist *evlist) struct evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); - TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->core.nr_groups); /* cycles:G + :uG group modifier */ evsel = leader = evlist__first(evlist); @@ -1136,7 +1137,7 @@ static int test__group_gh4(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; @@ -1160,7 +1161,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); /* cache-misses - not sampling */ @@ -1174,7 +1175,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); /* branch-misses - not sampling */ @@ -1189,7 +1190,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); return 0; @@ -1213,7 +1214,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); /* branch-misses - not sampling */ @@ -1228,7 +1229,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); return 0; @@ -1259,7 +1260,7 @@ static int test__pinned_group(struct evlist *evlist) TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned); /* cache-misses - can not be pinned, but will go on with the leader */ @@ -1303,7 +1304,7 @@ static int test__exclusive_group(struct evlist *evlist) TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive); /* cache-misses - can not be pinned, but will go on with the leader */ @@ -1530,12 +1531,12 @@ static int test__hybrid_hw_group_event(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); return 0; } @@ -1546,12 +1547,12 @@ static int test__hybrid_sw_hw_group_event(struct evlist *evlist) evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); return 0; } @@ -1563,11 +1564,11 @@ static int test__hybrid_hw_sw_group_event(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); return 0; } @@ -1579,14 +1580,14 @@ static int test__hybrid_group_modifier1(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); return 0; @@ -1596,6 +1597,13 @@ static int test__hybrid_raw1(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + if (!perf_pmu__hybrid_mounted("cpu_atom")) { + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); + return 0; + } + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); @@ -1620,13 +1628,9 @@ static int test__hybrid_cache_event(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); - TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 0x2 == (evsel->core.attr.config & 0xffffffff)); - - evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", 0x10002 == (evsel->core.attr.config & 0xffffffff)); return 0; } @@ -2028,7 +2032,7 @@ static struct evlist_test test__hybrid_events[] = { .id = 7, }, { - .name = "cpu_core/LLC-loads/,cpu_atom/LLC-load-misses/", + .name = "cpu_core/LLC-loads/", .check = test__hybrid_cache_event, .id = 8, }, diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 85d75b9b25a1..7c56bc1f4cff 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -21,6 +21,7 @@ #include "mmap.h" #include "tests.h" #include "pmu.h" +#include "pmu-hybrid.h" #define CHECK__(x) { \ while ((x) < 0) { \ @@ -93,7 +94,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe * For hybrid "cycles:u", it creates two events. * Init the second evsel here. */ - if (perf_pmu__has_hybrid()) { + if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) { evsel = evsel__next(evsel); evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c index acd50944f6af..e8fd0da0762b 100644 --- a/tools/perf/tests/pfm.c +++ b/tools/perf/tests/pfm.c @@ -96,7 +96,7 @@ static int test__pfm_events(void) count_pfm_events(&evlist->core), table[i].nr_events); TEST_ASSERT_EQUAL(table[i].events, - evlist->nr_groups, + evlist->core.nr_groups, 0); evlist__delete(evlist); @@ -180,7 +180,7 @@ static int test__pfm_group(void) count_pfm_events(&evlist->core), table[i].nr_events); TEST_ASSERT_EQUAL(table[i].events, - evlist->nr_groups, + evlist->core.nr_groups, table[i].nr_groups); evlist__delete(evlist); diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index ec4e3b21b831..b5efe675b321 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -61,6 +61,7 @@ static int session_write_header(char *path) TEST_ASSERT_VAL("failed to write header", !perf_session__write_header(session, session->evlist, data.file.fd, true)); + evlist__delete(session->evlist); perf_session__delete(session); return 0; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index f5509a958e38..701130ad43a2 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -350,14 +350,14 @@ static struct annotation_line *annotate_browser__find_next_asm_line( struct annotation_line *it = al; /* find next asm line */ - list_for_each_entry_continue(it, browser->b.top, node) { + list_for_each_entry_continue(it, browser->b.entries, node) { if (it->idx_asm >= 0) return it; } /* no asm line found forwards, try backwards */ it = al; - list_for_each_entry_continue_reverse(it, browser->b.top, node) { + list_for_each_entry_continue_reverse(it, browser->b.entries, node) { if (it->idx_asm >= 0) return it; } @@ -749,7 +749,7 @@ static int annotate_browser__run(struct annotate_browser *browser, hbt->timer(hbt->arg); if (delay_secs != 0) { - symbol__annotate_decay_histogram(sym, evsel->idx); + symbol__annotate_decay_histogram(sym, evsel->core.idx); hists__scnprintf_title(hists, title, sizeof(title)); annotate_browser__show(&browser->b, title, help); } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index a7dff77f2018..94167bfed722 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -135,12 +135,12 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms, ret += perf_gtk__get_percent(s + ret, sizeof(s) - ret, sym, pos, - evsel->idx + i); + evsel->core.idx + i); ret += scnprintf(s + ret, sizeof(s) - ret, " "); } } else { ret = perf_gtk__get_percent(s, sizeof(s), sym, pos, - evsel->idx); + evsel->core.idx); } if (ret) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 1a909b53dc15..2d4fa1304178 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -141,6 +141,7 @@ perf-y += clockid.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index abe1499a9164..aa04a3655236 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -961,7 +961,7 @@ static int symbol__inc_addr_samples(struct map_symbol *ms, if (sym == NULL) return 0; src = symbol__hists(sym, evsel->evlist->core.nr_entries); - return src ? __symbol__inc_addr_samples(ms, src, evsel->idx, addr, sample) : 0; + return src ? __symbol__inc_addr_samples(ms, src, evsel->core.idx, addr, sample) : 0; } static int symbol__account_cycles(u64 addr, u64 start, @@ -2159,7 +2159,7 @@ static void annotation__calc_percent(struct annotation *notes, BUG_ON(i >= al->data_nr); - sym_hist = annotation__histogram(notes, evsel->idx); + sym_hist = annotation__histogram(notes, evsel->core.idx); data = &al->data[i++]; calc_percent(sym_hist, hists, data, al->offset, end); @@ -2340,7 +2340,7 @@ static void print_summary(struct rb_root *root, const char *filename) static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel) { struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evsel->idx); + struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); u64 len = symbol__size(sym), offset; for (offset = 0; offset < len; ++offset) @@ -2373,7 +2373,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, const char *d_filename; const char *evsel_name = evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evsel->idx); + struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); struct annotation_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0, addr_fmt_width; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 9350eeb3a3fc..cb19669d2a5b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -73,8 +73,8 @@ static int evlist__regroup(struct evlist *evlist, struct evsel *leader, struct e grp = false; evlist__for_each_entry(evlist, evsel) { if (grp) { - if (!(evsel->leader == leader || - (evsel->leader == evsel && + if (!(evsel__leader(evsel) == leader || + (evsel__leader(evsel) == evsel && evsel->core.nr_members <= 1))) return -EINVAL; } else if (evsel == leader) { @@ -87,8 +87,8 @@ static int evlist__regroup(struct evlist *evlist, struct evsel *leader, struct e grp = false; evlist__for_each_entry(evlist, evsel) { if (grp) { - if (evsel->leader != leader) { - evsel->leader = leader; + if (!evsel__has_leader(evsel, leader)) { + evsel__set_leader(evsel, leader); if (leader->core.nr_members < 1) leader->core.nr_members = 1; leader->core.nr_members += 1; @@ -1231,11 +1231,11 @@ static void unleader_evsel(struct evlist *evlist, struct evsel *leader) /* Find new leader for the group */ evlist__for_each_entry(evlist, evsel) { - if (evsel->leader != leader || evsel == leader) + if (!evsel__has_leader(evsel, leader) || evsel == leader) continue; if (!new_leader) new_leader = evsel; - evsel->leader = new_leader; + evsel__set_leader(evsel, new_leader); } /* Update group information */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 21c8e71162b1..8150e03367bb 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -18,6 +18,7 @@ #include "evsel.h" #include "evlist.h" #include "target.h" +#include "cgroup.h" #include "cpumap.h" #include "thread_map.h" @@ -352,7 +353,7 @@ static int bperf_check_target(struct evsel *evsel, enum bperf_filter_type *filter_type, __u32 *filter_entry_cnt) { - if (evsel->leader->core.nr_members > 1) { + if (evsel->core.leader->nr_members > 1) { pr_err("bpf managed perf events do not yet support groups.\n"); return -1; } @@ -742,6 +743,8 @@ struct bpf_counter_ops bperf_ops = { .destroy = bperf__destroy, }; +extern struct bpf_counter_ops bperf_cgrp_ops; + static inline bool bpf_counter_skip(struct evsel *evsel) { return list_empty(&evsel->bpf_counter_list) && @@ -759,6 +762,8 @@ int bpf_counter__load(struct evsel *evsel, struct target *target) { if (target->bpf_str) evsel->bpf_counter_ops = &bpf_program_profiler_ops; + else if (cgrp_event_expanded && target->use_bpf) + evsel->bpf_counter_ops = &bperf_cgrp_ops; else if (target->use_bpf || evsel->bpf_counter || evsel__match_bpf_counter_events(evsel->name)) evsel->bpf_counter_ops = &bperf_ops; diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c new file mode 100644 index 000000000000..89aa5e71db1a --- /dev/null +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (c) 2021 Facebook */ +/* Copyright (c) 2021 Google */ + +#include <assert.h> +#include <limits.h> +#include <unistd.h> +#include <sys/file.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <linux/err.h> +#include <linux/zalloc.h> +#include <linux/perf_event.h> +#include <api/fs/fs.h> +#include <perf/bpf_perf.h> + +#include "affinity.h" +#include "bpf_counter.h" +#include "cgroup.h" +#include "counts.h" +#include "debug.h" +#include "evsel.h" +#include "evlist.h" +#include "target.h" +#include "cpumap.h" +#include "thread_map.h" + +#include "bpf_skel/bperf_cgroup.skel.h" + +static struct perf_event_attr cgrp_switch_attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CGROUP_SWITCHES, + .size = sizeof(cgrp_switch_attr), + .sample_period = 1, + .disabled = 1, +}; + +static struct evsel *cgrp_switch; +static struct bperf_cgroup_bpf *skel; + +#define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0)) + +static int bperf_load_program(struct evlist *evlist) +{ + struct bpf_link *link; + struct evsel *evsel; + struct cgroup *cgrp, *leader_cgrp; + __u32 i, cpu; + __u32 nr_cpus = evlist->core.all_cpus->nr; + int total_cpus = cpu__max_cpu(); + int map_size, map_fd; + int prog_fd, err; + + skel = bperf_cgroup_bpf__open(); + if (!skel) { + pr_err("Failed to open cgroup skeleton\n"); + return -1; + } + + skel->rodata->num_cpus = total_cpus; + skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups; + + BUG_ON(evlist->core.nr_entries % nr_cgroups != 0); + + /* we need one copy of events per cpu for reading */ + map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; + bpf_map__resize(skel->maps.events, map_size); + bpf_map__resize(skel->maps.cgrp_idx, nr_cgroups); + /* previous result is saved in a per-cpu array */ + map_size = evlist->core.nr_entries / nr_cgroups; + bpf_map__resize(skel->maps.prev_readings, map_size); + /* cgroup result needs all events (per-cpu) */ + map_size = evlist->core.nr_entries; + bpf_map__resize(skel->maps.cgrp_readings, map_size); + + set_max_rlimit(); + + err = bperf_cgroup_bpf__load(skel); + if (err) { + pr_err("Failed to load cgroup skeleton\n"); + goto out; + } + + if (cgroup_is_v2("perf_event") > 0) + skel->bss->use_cgroup_v2 = 1; + + err = -1; + + cgrp_switch = evsel__new(&cgrp_switch_attr); + if (evsel__open_per_cpu(cgrp_switch, evlist->core.all_cpus, -1) < 0) { + pr_err("Failed to open cgroup switches event\n"); + goto out; + } + + for (i = 0; i < nr_cpus; i++) { + link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch, + FD(cgrp_switch, i)); + if (IS_ERR(link)) { + pr_err("Failed to attach cgroup program\n"); + err = PTR_ERR(link); + goto out; + } + } + + /* + * Update cgrp_idx map from cgroup-id to event index. + */ + cgrp = NULL; + i = 0; + + evlist__for_each_entry(evlist, evsel) { + if (cgrp == NULL || evsel->cgrp == leader_cgrp) { + leader_cgrp = evsel->cgrp; + evsel->cgrp = NULL; + + /* open single copy of the events w/o cgroup */ + err = evsel__open_per_cpu(evsel, evlist->core.all_cpus, -1); + if (err) { + pr_err("Failed to open first cgroup events\n"); + goto out; + } + + map_fd = bpf_map__fd(skel->maps.events); + for (cpu = 0; cpu < nr_cpus; cpu++) { + int fd = FD(evsel, cpu); + __u32 idx = evsel->core.idx * total_cpus + + evlist->core.all_cpus->map[cpu]; + + err = bpf_map_update_elem(map_fd, &idx, &fd, + BPF_ANY); + if (err < 0) { + pr_err("Failed to update perf_event fd\n"); + goto out; + } + } + + evsel->cgrp = leader_cgrp; + } + evsel->supported = true; + + if (evsel->cgrp == cgrp) + continue; + + cgrp = evsel->cgrp; + + if (read_cgroup_id(cgrp) < 0) { + pr_err("Failed to get cgroup id\n"); + err = -1; + goto out; + } + + map_fd = bpf_map__fd(skel->maps.cgrp_idx); + err = bpf_map_update_elem(map_fd, &cgrp->id, &i, BPF_ANY); + if (err < 0) { + pr_err("Failed to update cgroup index map\n"); + goto out; + } + + i++; + } + + /* + * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check + * whether the kernel support it + */ + prog_fd = bpf_program__fd(skel->progs.trigger_read); + err = bperf_trigger_reading(prog_fd, 0); + if (err) { + pr_warning("The kernel does not support test_run for raw_tp BPF programs.\n" + "Therefore, --for-each-cgroup might show inaccurate readings\n"); + err = 0; + } + +out: + return err; +} + +static int bperf_cgrp__load(struct evsel *evsel, + struct target *target __maybe_unused) +{ + static bool bperf_loaded = false; + + evsel->bperf_leader_prog_fd = -1; + evsel->bperf_leader_link_fd = -1; + + if (!bperf_loaded && bperf_load_program(evsel->evlist)) + return -1; + + bperf_loaded = true; + /* just to bypass bpf_counter_skip() */ + evsel->follower_skel = (struct bperf_follower_bpf *)skel; + + return 0; +} + +static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused, + int cpu __maybe_unused, int fd __maybe_unused) +{ + /* nothing to do */ + return 0; +} + +/* + * trigger the leader prog on each cpu, so the cgrp_reading map could get + * the latest results. + */ +static int bperf_cgrp__sync_counters(struct evlist *evlist) +{ + int i, cpu; + int nr_cpus = evlist->core.all_cpus->nr; + int prog_fd = bpf_program__fd(skel->progs.trigger_read); + + for (i = 0; i < nr_cpus; i++) { + cpu = evlist->core.all_cpus->map[i]; + bperf_trigger_reading(prog_fd, cpu); + } + + return 0; +} + +static int bperf_cgrp__enable(struct evsel *evsel) +{ + if (evsel->core.idx) + return 0; + + bperf_cgrp__sync_counters(evsel->evlist); + + skel->bss->enabled = 1; + return 0; +} + +static int bperf_cgrp__disable(struct evsel *evsel) +{ + if (evsel->core.idx) + return 0; + + bperf_cgrp__sync_counters(evsel->evlist); + + skel->bss->enabled = 0; + return 0; +} + +static int bperf_cgrp__read(struct evsel *evsel) +{ + struct evlist *evlist = evsel->evlist; + int i, cpu, nr_cpus = evlist->core.all_cpus->nr; + int total_cpus = cpu__max_cpu(); + struct perf_counts_values *counts; + struct bpf_perf_event_value *values; + int reading_map_fd, err = 0; + __u32 idx; + + if (evsel->core.idx) + return 0; + + bperf_cgrp__sync_counters(evsel->evlist); + + values = calloc(total_cpus, sizeof(*values)); + if (values == NULL) + return -ENOMEM; + + reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings); + + evlist__for_each_entry(evlist, evsel) { + idx = evsel->core.idx; + err = bpf_map_lookup_elem(reading_map_fd, &idx, values); + if (err) { + pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n", + idx, evsel__name(evsel), evsel->cgrp->name); + goto out; + } + + for (i = 0; i < nr_cpus; i++) { + cpu = evlist->core.all_cpus->map[i]; + + counts = perf_counts(evsel->counts, i, 0); + counts->val = values[cpu].counter; + counts->ena = values[cpu].enabled; + counts->run = values[cpu].running; + } + } + +out: + free(values); + return err; +} + +static int bperf_cgrp__destroy(struct evsel *evsel) +{ + if (evsel->core.idx) + return 0; + + bperf_cgroup_bpf__destroy(skel); + evsel__delete(cgrp_switch); // it'll destroy on_switch progs too + + return 0; +} + +struct bpf_counter_ops bperf_cgrp_ops = { + .load = bperf_cgrp__load, + .enable = bperf_cgrp__enable, + .disable = bperf_cgrp__disable, + .read = bperf_cgrp__read, + .install_pe = bperf_cgrp__install_pe, + .destroy = bperf_cgrp__destroy, +}; diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c new file mode 100644 index 000000000000..292c430768b5 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook +// Copyright (c) 2021 Google +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#define MAX_LEVELS 10 // max cgroup hierarchy level: arbitrary +#define MAX_EVENTS 32 // max events per cgroup: arbitrary + +// NOTE: many of map and global data will be modified before loading +// from the userspace (perf tool) using the skeleton helpers. + +// single set of global perf events to measure +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 1); +} events SEC(".maps"); + +// from cgroup id to event index +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 1); +} cgrp_idx SEC(".maps"); + +// per-cpu event snapshots to calculate delta +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); +} prev_readings SEC(".maps"); + +// aggregated event values for each cgroup (per-cpu) +// will be read from the user-space +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); +} cgrp_readings SEC(".maps"); + +const volatile __u32 num_events = 1; +const volatile __u32 num_cpus = 1; + +int enabled = 0; +int use_cgroup_v2 = 0; + +static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) +{ + struct task_struct *p = (void *)bpf_get_current_task(); + struct cgroup *cgrp; + register int i = 0; + __u32 *elem; + int level; + int cnt; + + cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup); + level = BPF_CORE_READ(cgrp, level); + + for (cnt = 0; i < MAX_LEVELS; i++) { + __u64 cgrp_id; + + if (i > level) + break; + + // convert cgroup-id to a map index + cgrp_id = BPF_CORE_READ(cgrp, ancestor_ids[i]); + elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id); + if (!elem) + continue; + + cgrps[cnt++] = *elem; + if (cnt == size) + break; + } + + return cnt; +} + +static inline int get_cgroup_v2_idx(__u32 *cgrps, int size) +{ + register int i = 0; + __u32 *elem; + int cnt; + + for (cnt = 0; i < MAX_LEVELS; i++) { + __u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i); + + if (cgrp_id == 0) + break; + + // convert cgroup-id to a map index + elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id); + if (!elem) + continue; + + cgrps[cnt++] = *elem; + if (cnt == size) + break; + } + + return cnt; +} + +static int bperf_cgroup_count(void) +{ + register __u32 idx = 0; // to have it in a register to pass BPF verifier + register int c = 0; + struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val; + __u32 cpu = bpf_get_smp_processor_id(); + __u32 cgrp_idx[MAX_LEVELS]; + int cgrp_cnt; + __u32 key, cgrp; + long err; + + if (use_cgroup_v2) + cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS); + else + cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS); + + for ( ; idx < MAX_EVENTS; idx++) { + if (idx == num_events) + break; + + // XXX: do not pass idx directly (for verifier) + key = idx; + // this is per-cpu array for diff + prev_val = bpf_map_lookup_elem(&prev_readings, &key); + if (!prev_val) { + val.counter = val.enabled = val.running = 0; + bpf_map_update_elem(&prev_readings, &key, &val, BPF_ANY); + + prev_val = bpf_map_lookup_elem(&prev_readings, &key); + if (!prev_val) + continue; + } + + // read from global perf_event array + key = idx * num_cpus + cpu; + err = bpf_perf_event_read_value(&events, key, &val, sizeof(val)); + if (err) + continue; + + if (enabled) { + delta.counter = val.counter - prev_val->counter; + delta.enabled = val.enabled - prev_val->enabled; + delta.running = val.running - prev_val->running; + + for (c = 0; c < MAX_LEVELS; c++) { + if (c == cgrp_cnt) + break; + + cgrp = cgrp_idx[c]; + + // aggregate the result by cgroup + key = cgrp * num_events + idx; + cgrp_val = bpf_map_lookup_elem(&cgrp_readings, &key); + if (cgrp_val) { + cgrp_val->counter += delta.counter; + cgrp_val->enabled += delta.enabled; + cgrp_val->running += delta.running; + } else { + bpf_map_update_elem(&cgrp_readings, &key, + &delta, BPF_ANY); + } + } + } + + *prev_val = val; + } + return 0; +} + +// This will be attached to cgroup-switches event for each cpu +SEC("perf_events") +int BPF_PROG(on_cgrp_switch) +{ + return bperf_cgroup_count(); +} + +SEC("raw_tp/sched_switch") +int BPF_PROG(trigger_read) +{ + return bperf_cgroup_count(); +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index e819a4f30fc2..e99b41f9be45 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -18,6 +18,7 @@ #include <regex.h> int nr_cgroups; +bool cgrp_event_expanded; /* used to match cgroup name with patterns */ struct cgroup_name { @@ -458,7 +459,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, if (evsel__is_group_leader(pos)) leader = evsel; - evsel->leader = leader; + evsel__set_leader(evsel, leader); evlist__add(tmp_list, evsel); } @@ -484,6 +485,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, } ret = 0; + cgrp_event_expanded = true; out_err: evlist__delete(orig_list); diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index de5b272560ab..12256b78608c 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -18,6 +18,7 @@ struct cgroup { }; extern int nr_cgroups; /* number of explicit cgroups defined */ +extern bool cgrp_event_expanded; struct cgroup *cgroup__get(struct cgroup *cgroup); void cgroup__put(struct cgroup *cgroup); diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 32ad92d3e454..22f8326547eb 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2683,6 +2683,172 @@ static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, return metadata; } +/** + * Puts a fragment of an auxtrace buffer into the auxtrace queues based + * on the bounds of aux_event, if it matches with the buffer that's at + * file_offset. + * + * Normally, whole auxtrace buffers would be added to the queue. But we + * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder + * is reset across each buffer, so splitting the buffers up in advance has + * the same effect. + */ +static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, + struct perf_record_aux *aux_event, struct perf_sample *sample) +{ + int err; + char buf[PERF_SAMPLE_MAX_SIZE]; + union perf_event *auxtrace_event_union; + struct perf_record_auxtrace *auxtrace_event; + union perf_event auxtrace_fragment; + __u64 aux_offset, aux_size; + + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + /* + * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got + * from looping through the auxtrace index. + */ + err = perf_session__peek_event(session, file_offset, buf, + PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); + if (err) + return err; + auxtrace_event = &auxtrace_event_union->auxtrace; + if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) + return -EINVAL; + + if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || + auxtrace_event->header.size != sz) { + return -EINVAL; + } + + /* + * In per-thread mode, CPU is set to -1, but TID will be set instead. See + * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match. + */ + if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) || + auxtrace_event->cpu != sample->cpu) + return 1; + + if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { + /* + * Clamp size in snapshot mode. The buffer size is clamped in + * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect + * the buffer size. + */ + aux_size = min(aux_event->aux_size, auxtrace_event->size); + + /* + * In this mode, the head also points to the end of the buffer so aux_offset + * needs to have the size subtracted so it points to the beginning as in normal mode + */ + aux_offset = aux_event->aux_offset - aux_size; + } else { + aux_size = aux_event->aux_size; + aux_offset = aux_event->aux_offset; + } + + if (aux_offset >= auxtrace_event->offset && + aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { + /* + * If this AUX event was inside this buffer somewhere, create a new auxtrace event + * based on the sizes of the aux event, and queue that fragment. + */ + auxtrace_fragment.auxtrace = *auxtrace_event; + auxtrace_fragment.auxtrace.size = aux_size; + auxtrace_fragment.auxtrace.offset = aux_offset; + file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; + + pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); + return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, + file_offset, NULL); + } + + /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ + return 1; +} + +static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, + u64 offset __maybe_unused, void *data __maybe_unused) +{ + struct perf_sample sample; + int ret; + struct auxtrace_index_entry *ent; + struct auxtrace_index *auxtrace_index; + struct evsel *evsel; + size_t i; + + /* Don't care about any other events, we're only queuing buffers for AUX events */ + if (event->header.type != PERF_RECORD_AUX) + return 0; + + if (event->header.size < sizeof(struct perf_record_aux)) + return -EINVAL; + + /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ + if (!event->aux.aux_size) + return 0; + + /* + * Parse the sample, we need the sample_id_all data that comes after the event so that the + * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. + */ + evsel = evlist__event2evsel(session->evlist, event); + if (!evsel) + return -EINVAL; + ret = evsel__parse_sample(evsel, event, &sample); + if (ret) + return ret; + + /* + * Loop through the auxtrace index to find the buffer that matches up with this aux event. + */ + list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { + for (i = 0; i < auxtrace_index->nr; i++) { + ent = &auxtrace_index->entries[i]; + ret = cs_etm__queue_aux_fragment(session, ent->file_offset, + ent->sz, &event->aux, &sample); + /* + * Stop search on error or successful values. Continue search on + * 1 ('not found') + */ + if (ret != 1) + return ret; + } + } + + /* + * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but + * don't exit with an error because it will still be possible to decode other aux records. + */ + pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); + return 0; +} + +static int cs_etm__queue_aux_records(struct perf_session *session) +{ + struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, + struct auxtrace_index, list); + if (index && index->nr > 0) + return perf_session__peek_events(session, session->header.data_offset, + session->header.data_size, + cs_etm__queue_aux_records_cb, NULL); + + /* + * We would get here if there are no entries in the index (either no auxtrace + * buffers or no index at all). Fail silently as there is the possibility of + * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still + * false. + * + * In that scenario, buffers will not be split by AUX records. + */ + return 0; +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -2883,7 +3049,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_delete_thread; - err = auxtrace_queues__process_index(&etm->queues, session); + err = cs_etm__queue_aux_records(session); if (err) goto err_delete_thread; diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a9c102e8e3c0..f5d260b1df4d 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -20,7 +20,7 @@ static void close_dir(struct perf_data_file *files, int nr) { - while (--nr >= 1) { + while (--nr >= 0) { close(files[nr].fd); zfree(&files[nr].path); } diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d786cf6b0cfa..ee15db2be2f4 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1154,8 +1154,10 @@ struct map *dso__new_map(const char *name) struct map *map = NULL; struct dso *dso = dso__new(name); - if (dso) + if (dso) { map = map__new2(0, dso); + dso__put(dso); + } return map; } diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 7d2ba8419b0c..609ca1671501 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -113,14 +113,14 @@ static Dwarf_Line *cu_getsrc_die(Dwarf_Die *cu_die, Dwarf_Addr addr) * * Find a line number and file name for @addr in @cu_die. */ -int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr, - const char **fname, int *lineno) +int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr, + const char **fname, int *lineno) { Dwarf_Line *line; Dwarf_Die die_mem; Dwarf_Addr faddr; - if (die_find_realfunc(cu_die, (Dwarf_Addr)addr, &die_mem) + if (die_find_realfunc(cu_die, addr, &die_mem) && die_entrypc(&die_mem, &faddr) == 0 && faddr == addr) { *fname = dwarf_decl_file(&die_mem); @@ -128,7 +128,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr, goto out; } - line = cu_getsrc_die(cu_die, (Dwarf_Addr)addr); + line = cu_getsrc_die(cu_die, addr); if (line && dwarf_lineno(line, lineno) == 0) { *fname = dwarf_linesrc(line, NULL, NULL); if (!*fname) diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index cb99646843a9..7ee0fa19b5c4 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -19,7 +19,7 @@ const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname); const char *cu_get_comp_dir(Dwarf_Die *cu_die); /* Get a line number and file name for given address */ -int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, +int cu_find_lineinfo(Dwarf_Die *cudie, Dwarf_Addr addr, const char **fname, int *lineno); /* Walk on functions at given address */ diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index ebc5e9ad35db..cec2e6cad8aa 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -186,10 +186,12 @@ void perf_env__exit(struct perf_env *env) zfree(&env->cpuid); zfree(&env->cmdline); zfree(&env->cmdline_argv); + zfree(&env->sibling_dies); zfree(&env->sibling_cores); zfree(&env->sibling_threads); zfree(&env->pmu_mappings); zfree(&env->cpu); + zfree(&env->cpu_pmu_caps); zfree(&env->numa_map); for (i = 0; i < env->nr_numa_nodes; i++) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6ba9664089bd..47581a237c7a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -165,11 +165,9 @@ void evlist__delete(struct evlist *evlist) void evlist__add(struct evlist *evlist, struct evsel *entry) { - entry->evlist = evlist; - entry->idx = evlist->core.nr_entries; - entry->tracking = !entry->idx; - perf_evlist__add(&evlist->core, &entry->core); + entry->evlist = evlist; + entry->tracking = !entry->core.idx; if (evlist->core.nr_entries == 1) evlist__set_id_pos(evlist); @@ -194,7 +192,7 @@ void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list) } __evlist__for_each_entry_safe(list, temp, evsel) { - if (evsel->leader == leader) { + if (evsel__has_leader(evsel, leader)) { list_del_init(&evsel->core.node); evlist__add(evlist, evsel); } @@ -225,26 +223,9 @@ out: return err; } -void __evlist__set_leader(struct list_head *list) -{ - struct evsel *evsel, *leader; - - leader = list_entry(list->next, struct evsel, core.node); - evsel = list_entry(list->prev, struct evsel, core.node); - - leader->core.nr_members = evsel->idx - leader->idx + 1; - - __evlist__for_each_entry(list, evsel) { - evsel->leader = leader; - } -} - void evlist__set_leader(struct evlist *evlist) { - if (evlist->core.nr_entries) { - evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0; - __evlist__set_leader(&evlist->core.entries); - } + perf_evlist__set_leader(&evlist->core); } int __evlist__add_default(struct evlist *evlist, bool precise) @@ -1626,7 +1607,7 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel) return; evlist__for_each_entry_safe(evlist, n, evsel) { - if (evsel->leader == move_evsel->leader) + if (evsel__leader(evsel) == evsel__leader(move_evsel)) list_move_tail(&evsel->core.node, &move); } @@ -1750,7 +1731,7 @@ bool evlist__exclude_kernel(struct evlist *evlist) */ void evlist__force_leader(struct evlist *evlist) { - if (!evlist->nr_groups) { + if (!evlist->core.nr_groups) { struct evsel *leader = evlist__first(evlist); evlist__set_leader(evlist); @@ -1763,7 +1744,8 @@ struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel * struct evsel *c2, *leader; bool is_open = true; - leader = evsel->leader; + leader = evsel__leader(evsel); + pr_debug("Weak group for %s/%d failed\n", leader->name, leader->core.nr_members); @@ -1774,10 +1756,10 @@ struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel * evlist__for_each_entry(evsel_list, c2) { if (c2 == evsel) is_open = false; - if (c2->leader == leader) { + if (evsel__has_leader(c2, leader)) { if (is_open && close) perf_evsel__close(&c2->core); - c2->leader = c2; + evsel__set_leader(c2, c2); c2->core.nr_members = 0; /* * Set this for all former members of the group @@ -2137,7 +2119,7 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->idx == idx) + if (evsel->core.idx == idx) return evsel; } return NULL; @@ -2174,13 +2156,13 @@ void evlist__check_mem_load_aux(struct evlist *evlist) * any valid memory load information. */ evlist__for_each_entry(evlist, evsel) { - leader = evsel->leader; + leader = evsel__leader(evsel); if (leader == evsel) continue; if (leader->name && strstr(leader->name, "mem-loads-aux")) { for_each_group_evsel(pos, leader) { - pos->leader = pos; + evsel__set_leader(pos, pos); pos->core.nr_members = 0; } } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 2073cfa79f79..5c22383489ae 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -50,7 +50,6 @@ enum bkw_mmap_state { struct evlist { struct perf_evlist core; - int nr_groups; bool enabled; int id_pos; int is_pos; @@ -202,7 +201,6 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel); int evlist__create_maps(struct evlist *evlist, struct target *target); int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); -void __evlist__set_leader(struct list_head *list); void evlist__set_leader(struct evlist *evlist); u64 __evlist__combined_sample_type(struct evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b1c930eca40f..f61e5dd53f5d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -239,10 +239,8 @@ bool evsel__is_function_event(struct evsel *evsel) void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx) { - perf_evsel__init(&evsel->core, attr); - evsel->idx = idx; + perf_evsel__init(&evsel->core, attr, idx); evsel->tracking = !idx; - evsel->leader = evsel; evsel->unit = ""; evsel->scale = 1.0; evsel->max_events = ULONG_MAX; @@ -410,7 +408,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->cgrp = cgroup__get(orig->cgrp); evsel->tp_format = orig->tp_format; evsel->handler = orig->handler; - evsel->leader = orig->leader; + evsel->core.leader = orig->core.leader; evsel->max_events = orig->max_events; evsel->tool_event = orig->tool_event; @@ -1075,7 +1073,7 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel) void evsel__config(struct evsel *evsel, struct record_opts *opts, struct callchain_param *callchain) { - struct evsel *leader = evsel->leader; + struct evsel *leader = evsel__leader(evsel); struct perf_event_attr *attr = &evsel->core.attr; int track = evsel->tracking; bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread; @@ -1593,7 +1591,7 @@ static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other, static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu) { - struct evsel *leader = evsel->leader; + struct evsel *leader = evsel__leader(evsel); if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) || (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) { @@ -1605,7 +1603,7 @@ static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu) static int get_group_fd(struct evsel *evsel, int cpu, int thread) { - struct evsel *leader = evsel->leader; + struct evsel *leader = evsel__leader(evsel); int fd; if (evsel__is_group_leader(evsel)) @@ -2851,3 +2849,23 @@ bool evsel__is_hybrid(struct evsel *evsel) { return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name); } + +struct evsel *evsel__leader(struct evsel *evsel) +{ + return container_of(evsel->core.leader, struct evsel, core); +} + +bool evsel__has_leader(struct evsel *evsel, struct evsel *leader) +{ + return evsel->core.leader == &leader->core; +} + +bool evsel__is_leader(struct evsel *evsel) +{ + return evsel__has_leader(evsel, evsel); +} + +void evsel__set_leader(struct evsel *evsel, struct evsel *leader) +{ + evsel->core.leader = &leader->core; +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index bdad52a06438..80383096d51c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -49,7 +49,6 @@ struct evsel { struct perf_evsel core; struct evlist *evlist; off_t id_offset; - int idx; int id_pos; int is_pos; unsigned int sample_size; @@ -119,7 +118,6 @@ struct evsel { bool reset_group; bool errored; struct hashmap *per_pkg_mask; - struct evsel *leader; int err; int cpu_iter; struct { @@ -368,7 +366,7 @@ static inline struct evsel *evsel__prev(struct evsel *evsel) */ static inline bool evsel__is_group_leader(const struct evsel *evsel) { - return evsel->leader == evsel; + return evsel->core.leader == &evsel->core; } /** @@ -406,19 +404,19 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, static inline int evsel__group_idx(struct evsel *evsel) { - return evsel->idx - evsel->leader->idx; + return evsel->core.idx - evsel->core.leader->idx; } /* Iterates group WITHOUT the leader. */ #define for_each_group_member(_evsel, _leader) \ for ((_evsel) = list_entry((_leader)->core.node.next, struct evsel, core.node); \ - (_evsel) && (_evsel)->leader == (_leader); \ + (_evsel) && (_evsel)->core.leader == (&_leader->core); \ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node)) /* Iterates group WITH the leader. */ #define for_each_group_evsel(_evsel, _leader) \ for ((_evsel) = _leader; \ - (_evsel) && (_evsel)->leader == (_leader); \ + (_evsel) && (_evsel)->core.leader == (&_leader->core); \ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node)) static inline bool evsel__has_branch_callstack(const struct evsel *evsel) @@ -463,4 +461,8 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); void evsel__zero_per_pkg(struct evsel *evsel); bool evsel__is_hybrid(struct evsel *evsel); +struct evsel *evsel__leader(struct evsel *evsel); +bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); +bool evsel__is_leader(struct evsel *evsel); +void evsel__set_leader(struct evsel *evsel, struct evsel *leader); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 0158d2945bab..44249027507a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -778,7 +778,7 @@ static int write_pmu_mappings(struct feat_fd *ff, static int write_group_desc(struct feat_fd *ff, struct evlist *evlist) { - u32 nr_groups = evlist->nr_groups; + u32 nr_groups = evlist->core.nr_groups; struct evsel *evsel; int ret; @@ -789,7 +789,7 @@ static int write_group_desc(struct feat_fd *ff, evlist__for_each_entry(evlist, evsel) { if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) { const char *name = evsel->group_name ?: "{anon_group}"; - u32 leader_idx = evsel->idx; + u32 leader_idx = evsel->core.idx; u32 nr_members = evsel->core.nr_members; ret = do_write_string(ff, name); @@ -1844,7 +1844,7 @@ static struct evsel *read_event_desc(struct feat_fd *ff) msz = sz; for (i = 0, evsel = events; i < nre; evsel++, i++) { - evsel->idx = i; + evsel->core.idx = i; /* * must read entire on-file attr struct to @@ -2379,7 +2379,7 @@ static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->idx == idx) + if (evsel->core.idx == idx) return evsel; } @@ -2393,7 +2393,7 @@ static void evlist__set_event_name(struct evlist *evlist, struct evsel *event) if (!event->name) return; - evsel = evlist__find_by_index(evlist, event->idx); + evsel = evlist__find_by_index(evlist, event->core.idx); if (!evsel) return; @@ -2735,12 +2735,12 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) * Rebuild group relationship based on the group_desc */ session = container_of(ff->ph, struct perf_session, header); - session->evlist->nr_groups = nr_groups; + session->evlist->core.nr_groups = nr_groups; i = nr = 0; evlist__for_each_entry(session->evlist, evsel) { - if (evsel->idx == (int) desc[i].leader_idx) { - evsel->leader = evsel; + if (evsel->core.idx == (int) desc[i].leader_idx) { + evsel__set_leader(evsel, evsel); /* {anon_group} is a dummy name */ if (strcmp(desc[i].name, "{anon_group}")) { evsel->group_name = desc[i].name; @@ -2758,7 +2758,7 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) i++; } else if (nr) { /* This is a group member */ - evsel->leader = leader; + evsel__set_leader(evsel, leader); nr--; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index cb2520abf261..5ab631702769 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -41,8 +41,11 @@ #define INTEL_PT_RETURN 1 -/* Maximum number of loops with no packets consumed i.e. stuck in a loop */ -#define INTEL_PT_MAX_LOOPS 10000 +/* + * Default maximum number of loops with no packets consumed i.e. stuck in a + * loop. + */ +#define INTEL_PT_MAX_LOOPS 100000 struct intel_pt_blk { struct intel_pt_blk *prev; @@ -220,6 +223,7 @@ struct intel_pt_decoder { uint64_t timestamp_insn_cnt; uint64_t sample_insn_cnt; uint64_t stuck_ip; + int max_loops; int no_progress; int stuck_ip_prd; int stuck_ip_cnt; @@ -315,6 +319,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->vm_tm_corr_dry_run = params->vm_tm_corr_dry_run; decoder->first_timestamp = params->first_timestamp; decoder->last_reliable_timestamp = params->first_timestamp; + decoder->max_loops = params->max_loops ? params->max_loops : INTEL_PT_MAX_LOOPS; decoder->flags = params->flags; @@ -483,7 +488,7 @@ static const char *intel_pt_err_msgs[] = { [INTEL_PT_ERR_OVR] = "Overflow packet", [INTEL_PT_ERR_LOST] = "Lost trace data", [INTEL_PT_ERR_UNK] = "Unknown error!", - [INTEL_PT_ERR_NELOOP] = "Never-ending loop", + [INTEL_PT_ERR_NELOOP] = "Never-ending loop (refer perf config intel-pt.max-loops)", }; int intel_pt__strerror(int code, char *buf, size_t buflen) @@ -1168,7 +1173,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, decoder->stuck_ip = decoder->state.to_ip; decoder->stuck_ip_prd = 1; decoder->stuck_ip_cnt = 1; - } else if (cnt > INTEL_PT_MAX_LOOPS || + } else if (cnt > decoder->max_loops || decoder->state.to_ip == decoder->stuck_ip) { intel_pt_log_at("ERROR: Never-ending loop", decoder->state.to_ip); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 714c475808c0..4b5e79fcf557 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -270,6 +270,7 @@ struct intel_pt_params { uint32_t tsc_ctc_ratio_d; enum intel_pt_param_flags flags; unsigned int quick; + int max_loops; }; struct intel_pt_decoder; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 154a1077f22e..6f852b305e92 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -123,6 +123,7 @@ struct intel_pt { u64 noretcomp_bit; unsigned max_non_turbo_ratio; unsigned cbr2khz; + int max_loops; unsigned long num_events; @@ -1200,6 +1201,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.vm_time_correlation = pt->synth_opts.vm_time_correlation; params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run; params.first_timestamp = pt->first_timestamp; + params.max_loops = pt->max_loops; if (pt->filts.cnt > 0) params.pgd_ip = intel_pt_pgd_ip; @@ -3431,6 +3433,9 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data) if (!strcmp(var, "intel-pt.mispred-all")) pt->mispred_all = perf_config_bool(var, value); + if (!strcmp(var, "intel-pt.max-loops")) + perf_config_int(&pt->max_loops, var, value); + return 0; } diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 39062df02629..51424cdc3b68 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -69,7 +69,7 @@ int lzma_decompress_to_file(const char *input, int output_fd) if (ferror(infile)) { pr_err("lzma: read error: %s\n", strerror(errno)); - goto err_fclose; + goto err_lzma_end; } if (feof(infile)) @@ -83,7 +83,7 @@ int lzma_decompress_to_file(const char *input, int output_fd) if (writen(output_fd, buf_out, write_size) != write_size) { pr_err("lzma: write error: %s\n", strerror(errno)); - goto err_fclose; + goto err_lzma_end; } strm.next_out = buf_out; @@ -95,11 +95,13 @@ int lzma_decompress_to_file(const char *input, int output_fd) break; pr_err("lzma: failed %s\n", lzma_strerror(ret)); - goto err_fclose; + goto err_lzma_end; } } err = 0; +err_lzma_end: + lzma_end(&strm); err_fclose: fclose(infile); return err; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 8af693d9678c..72e7f3616157 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -192,6 +192,8 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (!(prot & PROT_EXEC)) dso__set_loaded(dso); } + + nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; if (build_id__is_defined(bid)) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index d3cf2dee36c8..99d047c5ead0 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -219,9 +219,9 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, if (has_constraint && ev->weak_group) continue; /* Ignore event if already used and merging is disabled. */ - if (metric_no_merge && test_bit(ev->idx, evlist_used)) + if (metric_no_merge && test_bit(ev->core.idx, evlist_used)) continue; - if (!has_constraint && ev->leader != current_leader) { + if (!has_constraint && !evsel__has_leader(ev, current_leader)) { /* * Start of a new group, discard the whole match and * start again. @@ -229,7 +229,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, matched_events = 0; memset(metric_events, 0, sizeof(struct evsel *) * idnum); - current_leader = ev->leader; + current_leader = evsel__leader(ev); } /* * Check for duplicate events with the same name. For example, @@ -269,7 +269,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, for (i = 0; i < idnum; i++) { ev = metric_events[i]; /* Don't free the used events. */ - set_bit(ev->idx, evlist_used); + set_bit(ev->core.idx, evlist_used); /* * The metric leader points to the identically named event in * metric_events. @@ -287,11 +287,11 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, * when then group is left. */ if (!has_constraint && - ev->leader != metric_events[i]->leader && - evsel_same_pmu_or_none(ev->leader, metric_events[i]->leader)) + ev->core.leader != metric_events[i]->core.leader && + evsel_same_pmu_or_none(evsel__leader(ev), evsel__leader(metric_events[i]))) break; if (!strcmp(metric_events[i]->name, ev->name)) { - set_bit(ev->idx, evlist_used); + set_bit(ev->core.idx, evlist_used); ev->metric_leader = metric_events[i]; } } @@ -391,7 +391,7 @@ static int metricgroup__setup_events(struct list_head *groups, } evlist__for_each_entry_safe(perf_evlist, tmp, evsel) { - if (!test_bit(evsel->idx, evlist_used)) { + if (!test_bit(evsel->core.idx, evlist_used)) { evlist__remove(perf_evlist, evsel); evsel__delete(evsel); } @@ -1312,7 +1312,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, nd = rblist__entry(old_metric_events, i); old_me = container_of(nd, struct metric_event, nd); - evsel = evlist__find_evsel(evlist, old_me->evsel->idx); + evsel = evlist__find_evsel(evlist, old_me->evsel->core.idx); if (!evsel) return -EINVAL; new_me = metricgroup__lookup(new_metric_events, evsel, true); @@ -1320,7 +1320,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, return -ENOMEM; pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n", - cgrp ? cgrp->name : "root", evsel->name, evsel->idx); + cgrp ? cgrp->name : "root", evsel->name, evsel->core.idx); list_for_each_entry(old_expr, &old_me->head, nd) { new_expr = malloc(sizeof(*new_expr)); @@ -1363,7 +1363,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, /* copy evsel in the same position */ for (idx = 0; idx < nr; idx++) { evsel = old_expr->metric_events[idx]; - evsel = evlist__find_evsel(evlist, evsel->idx); + evsel = evlist__find_evsel(evlist, evsel->core.idx); if (evsel == NULL) { free(new_expr->metric_events); free(new_expr->metric_refs); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 84108c17f48d..e5eae23cfceb 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1740,7 +1740,7 @@ parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list, leader = list_first_entry(list, struct evsel, core.node); evsel = list_last_entry(list, struct evsel, core.node); - total_members = evsel->idx - leader->idx + 1; + total_members = evsel->core.idx - leader->core.idx + 1; leaders = calloc(total_members, sizeof(uintptr_t)); if (WARN_ON(!leaders)) @@ -1800,7 +1800,7 @@ parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list, __evlist__for_each_entry(list, evsel) { if (i >= nr_pmu) i = 0; - evsel->leader = (struct evsel *) leaders[i++]; + evsel__set_leader(evsel, (struct evsel *) leaders[i++]); } /* The number of members and group name are same for each group */ @@ -1833,7 +1833,7 @@ void parse_events__set_leader(char *name, struct list_head *list, if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) return; - __evlist__set_leader(list); + __perf_evlist__set_leader(list); leader = list_entry(list->next, struct evsel, core.node); leader->group_name = name ? strdup(name) : NULL; } @@ -2285,7 +2285,7 @@ int __parse_events(struct evlist *evlist, const char *str, if (!ret) { struct evsel *last; - evlist->nr_groups += parse_state.nr_groups; + evlist->core.nr_groups += parse_state.nr_groups; last = evlist__last(evlist); last->cmdline_group_boundary = true; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index aba12a4d488e..9321bd0e2f76 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -316,7 +316,7 @@ event_pmu_name opt_pmu_config if (!strncmp(name, "uncore_", 7) && strncmp($1, "uncore_", 7)) name += 7; - if (!fnmatch(pattern, name, 0)) { + if (!perf_pmu__match(pattern, name, $1)) { if (parse_events_copy_term_list(orig_terms, &terms)) CLEANUP_YYABORT; if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false)) diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 6eef6dfeaa57..756295dedccc 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -99,7 +99,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, grp_leader = evsel; if (grp_evt > -1) { - evsel->leader = grp_leader; + evsel__set_leader(evsel, grp_leader); grp_leader->core.nr_members++; grp_evt++; } @@ -110,7 +110,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, "cannot close a non-existing event group\n"); goto error; } - evlist->nr_groups++; + evlist->core.nr_groups++; grp_leader = NULL; grp_evt = -1; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 88c8ecdc60b0..a1bd7007a8b4 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -3,6 +3,7 @@ #include <linux/compiler.h> #include <linux/string.h> #include <linux/zalloc.h> +#include <linux/ctype.h> #include <subcmd/pager.h> #include <sys/types.h> #include <errno.h> @@ -17,6 +18,7 @@ #include <locale.h> #include <regex.h> #include <perf/cpumap.h> +#include <fnmatch.h> #include "debug.h" #include "evsel.h" #include "pmu.h" @@ -740,6 +742,27 @@ struct pmu_events_map *__weak pmu_events_map__find(void) return perf_pmu__find_map(NULL); } +static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) +{ + char *p; + + if (strncmp(pmu_name, tok, strlen(tok))) + return false; + + p = pmu_name + strlen(tok); + if (*p == 0) + return true; + + if (*p != '_') + return false; + + ++p; + if (*p == 0 || !isdigit(*p)) + return false; + + return true; +} + bool pmu_uncore_alias_match(const char *pmu_name, const char *name) { char *tmp = NULL, *tok, *str; @@ -768,7 +791,7 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name) */ for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { name = strstr(name, tok); - if (!name) { + if (!name || !perf_pmu__valid_suffix((char *)name, tok)) { res = false; goto out; } @@ -927,6 +950,13 @@ static struct perf_pmu *pmu_lookup(const char *name) LIST_HEAD(format); LIST_HEAD(aliases); __u32 type; + bool is_hybrid = perf_pmu__hybrid_mounted(name); + + /* + * Check pmu name for hybrid and the pmu may be invalid in sysfs + */ + if (!strncmp(name, "cpu_", 4) && !is_hybrid) + return NULL; /* * The pmu data we store & need consists of the pmu @@ -955,7 +985,7 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) pmu->id = pmu_id(name); - pmu->is_hybrid = perf_pmu__hybrid_mounted(name); + pmu->is_hybrid = is_hybrid; pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); pmu_add_sys_aliases(&aliases, pmu); @@ -1872,3 +1902,14 @@ bool perf_pmu__has_hybrid(void) return !list_empty(&perf_pmu__hybrid_pmus); } + +int perf_pmu__match(char *pattern, char *name, char *tok) +{ + if (fnmatch(pattern, name, 0)) + return -1; + + if (tok && !perf_pmu__valid_suffix(name, tok)) + return -1; + + return 0; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index a790ef758171..926da483a141 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -133,5 +133,6 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, char *name); bool perf_pmu__has_hybrid(void); +int perf_pmu__match(char *pattern, char *name, char *tok); #endif /* __PMU_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c14e1d228e56..b2a02c9ab8ea 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -179,8 +179,10 @@ struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) struct map *map; map = dso__new_map(target); - if (map && map->dso) + if (map && map->dso) { + nsinfo__put(map->dso->nsinfo); map->dso->nsinfo = nsinfo__get(nsi); + } return map; } else { return kernel_get_module_map(target); @@ -237,8 +239,8 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) clear_probe_trace_event(tevs + i); } -static bool kprobe_blacklist__listed(unsigned long address); -static bool kprobe_warn_out_range(const char *symbol, unsigned long address) +static bool kprobe_blacklist__listed(u64 address); +static bool kprobe_warn_out_range(const char *symbol, u64 address) { struct map *map; bool ret = false; @@ -398,8 +400,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, pr_debug("Symbol %s address found : %" PRIx64 "\n", pp->function, address); - ret = debuginfo__find_probe_point(dinfo, (unsigned long)address, - result); + ret = debuginfo__find_probe_point(dinfo, address, result); if (ret <= 0) ret = (!ret) ? -ENOENT : ret; else { @@ -587,7 +588,7 @@ static void debuginfo_cache__exit(void) } -static int get_text_start_address(const char *exec, unsigned long *address, +static int get_text_start_address(const char *exec, u64 *address, struct nsinfo *nsi) { Elf *elf; @@ -632,7 +633,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, bool is_kprobe) { struct debuginfo *dinfo = NULL; - unsigned long stext = 0; + u64 stext = 0; u64 addr = tp->address; int ret = -ENOENT; @@ -660,8 +661,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, dinfo = debuginfo_cache__open(tp->module, verbose <= 0); if (dinfo) - ret = debuginfo__find_probe_point(dinfo, - (unsigned long)addr, pp); + ret = debuginfo__find_probe_point(dinfo, addr, pp); else ret = -ENOENT; @@ -676,7 +676,7 @@ error: /* Adjust symbol name and address */ static int post_process_probe_trace_point(struct probe_trace_point *tp, - struct map *map, unsigned long offs) + struct map *map, u64 offs) { struct symbol *sym; u64 addr = tp->address - offs; @@ -719,7 +719,7 @@ post_process_offline_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *pathname) { struct map *map; - unsigned long stext = 0; + u64 stext = 0; int i, ret = 0; /* Prepare a map for offline binary */ @@ -745,7 +745,7 @@ static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, struct nsinfo *nsi) { int i, ret = 0; - unsigned long stext = 0; + u64 stext = 0; if (!exec) return 0; @@ -790,7 +790,7 @@ post_process_module_probe_trace_events(struct probe_trace_event *tevs, mod_name = find_module_name(module); for (i = 0; i < ntevs; i++) { ret = post_process_probe_trace_point(&tevs[i].point, - map, (unsigned long)text_offs); + map, text_offs); if (ret < 0) break; tevs[i].point.module = @@ -1534,7 +1534,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * so tmp[1] should always valid (but could be '\0'). */ if (tmp && !strncmp(tmp, "0x", 2)) { - pp->abs_address = strtoul(pp->function, &tmp, 0); + pp->abs_address = strtoull(pp->function, &tmp, 0); if (*tmp != '\0') { semantic_error("Invalid absolute address.\n"); return -EINVAL; @@ -1909,7 +1909,7 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) argv[i] = NULL; argc -= 1; } else - tp->address = strtoul(fmt1_str, NULL, 0); + tp->address = strtoull(fmt1_str, NULL, 0); } else { /* Only the symbol-based probe has offset */ tp->symbol = strdup(fmt1_str); @@ -2155,7 +2155,7 @@ synthesize_uprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf) return -EINVAL; /* Use the tp->address for uprobes */ - err = strbuf_addf(buf, "%s:0x%lx", tp->module, tp->address); + err = strbuf_addf(buf, "%s:0x%" PRIx64, tp->module, tp->address); if (err >= 0 && tp->ref_ctr_offset) { if (!uprobe_ref_ctr_is_supported()) @@ -2170,7 +2170,7 @@ synthesize_kprobe_trace_def(struct probe_trace_point *tp, struct strbuf *buf) { if (!strncmp(tp->symbol, "0x", 2)) { /* Absolute address. See try_to_find_absolute_address() */ - return strbuf_addf(buf, "%s%s0x%lx", tp->module ?: "", + return strbuf_addf(buf, "%s%s0x%" PRIx64, tp->module ?: "", tp->module ? ":" : "", tp->address); } else { return strbuf_addf(buf, "%s%s%s+%lu", tp->module ?: "", @@ -2269,7 +2269,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, pp->function = strdup(tp->symbol); pp->offset = tp->offset; } else { - ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); + ret = e_snprintf(buf, 128, "0x%" PRIx64, tp->address); if (ret < 0) return ret; pp->function = strdup(buf); @@ -2450,8 +2450,8 @@ void clear_probe_trace_event(struct probe_trace_event *tev) struct kprobe_blacklist_node { struct list_head list; - unsigned long start; - unsigned long end; + u64 start; + u64 end; char *symbol; }; @@ -2496,7 +2496,7 @@ static int kprobe_blacklist__load(struct list_head *blacklist) } INIT_LIST_HEAD(&node->list); list_add_tail(&node->list, blacklist); - if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) { + if (sscanf(buf, "0x%" PRIx64 "-0x%" PRIx64, &node->start, &node->end) != 2) { ret = -EINVAL; break; } @@ -2512,7 +2512,7 @@ static int kprobe_blacklist__load(struct list_head *blacklist) ret = -ENOMEM; break; } - pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n", + pr_debug2("Blacklist: 0x%" PRIx64 "-0x%" PRIx64 ", %s\n", node->start, node->end, node->symbol); ret++; } @@ -2524,8 +2524,7 @@ static int kprobe_blacklist__load(struct list_head *blacklist) } static struct kprobe_blacklist_node * -kprobe_blacklist__find_by_address(struct list_head *blacklist, - unsigned long address) +kprobe_blacklist__find_by_address(struct list_head *blacklist, u64 address) { struct kprobe_blacklist_node *node; @@ -2553,7 +2552,7 @@ static void kprobe_blacklist__release(void) kprobe_blacklist__delete(&kprobe_blacklist); } -static bool kprobe_blacklist__listed(unsigned long address) +static bool kprobe_blacklist__listed(u64 address) { return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address); } @@ -3221,7 +3220,7 @@ static int try_to_find_absolute_address(struct perf_probe_event *pev, * In __add_probe_trace_events, a NULL symbol is interpreted as * invalid. */ - if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0) + if (asprintf(&tp->symbol, "0x%" PRIx64, tp->address) < 0) goto errout; /* For kprobe, check range */ @@ -3232,7 +3231,7 @@ static int try_to_find_absolute_address(struct perf_probe_event *pev, goto errout; } - if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0) + if (asprintf(&tp->realname, "abs_%" PRIx64, tp->address) < 0) goto errout; if (pev->target) { diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 65769d7949a3..8ad5b1579f1d 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -33,7 +33,7 @@ struct probe_trace_point { char *module; /* Module name */ unsigned long offset; /* Offset from symbol */ unsigned long ref_ctr_offset; /* SDT reference counter offset */ - unsigned long address; /* Actual address of the trace point */ + u64 address; /* Actual address of the trace point */ bool retprobe; /* Return probe flag */ }; @@ -70,7 +70,7 @@ struct perf_probe_point { bool retprobe; /* Return probe flag */ char *lazy_line; /* Lazy matching pattern */ unsigned long offset; /* Offset from function entry */ - unsigned long abs_address; /* Absolute address of the point */ + u64 abs_address; /* Absolute address of the point */ }; /* Perf probe probing argument field chain */ diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index f9a6cbcd6415..3d50de3217d5 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -377,11 +377,11 @@ int probe_file__del_events(int fd, struct strfilter *filter) ret = probe_file__get_events(fd, filter, namelist); if (ret < 0) - return ret; + goto out; ret = probe_file__del_strlist(fd, namelist); +out: strlist__delete(namelist); - return ret; } diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index b029c29ce227..50d861a80f57 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -118,12 +118,17 @@ struct debuginfo *debuginfo__new(const char *path) char buf[PATH_MAX], nil = '\0'; struct dso *dso; struct debuginfo *dinfo = NULL; + struct build_id bid; /* Try to open distro debuginfo files */ dso = dso__new(path); if (!dso) goto out; + /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ + if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) + dso__set_build_id(dso, &bid); + for (type = distro_dwarf_types; !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; type++) { @@ -663,7 +668,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, } tp->offset = (unsigned long)(paddr - eaddr); - tp->address = (unsigned long)paddr; + tp->address = paddr; tp->symbol = strdup(symbol); if (!tp->symbol) return -ENOMEM; @@ -1702,7 +1707,7 @@ int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, } /* Reverse search */ -int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, +int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt) { Dwarf_Die cudie, spdie, indie; @@ -1715,14 +1720,14 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, addr += baseaddr; /* Find cu die */ if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { - pr_warning("Failed to find debug information for address %lx\n", + pr_warning("Failed to find debug information for address %" PRIx64 "\n", addr); ret = -EINVAL; goto end; } /* Find a corresponding line (filename and lineno) */ - cu_find_lineinfo(&cudie, addr, &fname, &lineno); + cu_find_lineinfo(&cudie, (Dwarf_Addr)addr, &fname, &lineno); /* Don't care whether it failed or not */ /* Find a corresponding function (name, baseline and baseaddr) */ @@ -1737,7 +1742,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, } fname = dwarf_decl_file(&spdie); - if (addr == (unsigned long)baseaddr) { + if (addr == baseaddr) { /* Function entry - Relative line number is 0 */ lineno = baseline; goto post; @@ -1783,7 +1788,7 @@ post: if (lineno) ppt->line = lineno - baseline; else if (basefunc) { - ppt->offset = addr - (unsigned long)baseaddr; + ppt->offset = addr - baseaddr; func = basefunc; } @@ -1823,8 +1828,7 @@ static int line_range_add_line(const char *src, unsigned int lineno, } static int line_range_walk_cb(const char *fname, int lineno, - Dwarf_Addr addr __maybe_unused, - void *data) + Dwarf_Addr addr, void *data) { struct line_finder *lf = data; const char *__fname; diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 2febb5875678..8bc1c80d3c1c 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -46,7 +46,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, struct probe_trace_event **tevs); /* Find a perf_probe_point from debuginfo */ -int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, +int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt); int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 412f8e79e409..8feef3a05af7 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1032,7 +1032,7 @@ static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist, Py_INCREF(pevsel); evsel = &((struct pyrf_evsel *)pevsel)->evsel; - evsel->idx = evlist->core.nr_entries; + evsel->core.idx = evlist->core.nr_entries; evlist__add(evlist, evsel); return Py_BuildValue("i", evlist->core.nr_entries); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 43e5b563dee8..bff669b615ee 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -25,12 +25,12 @@ */ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist) { - struct evsel *leader = evsel->leader; + struct evsel *leader = evsel__leader(evsel); if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader) || is_mem_loads_aux_event(leader)) { evlist__for_each_entry(evlist, evsel) { - if (evsel->leader == leader && evsel != evsel->leader) + if (evsel__leader(evsel) == leader && evsel != evsel__leader(evsel)) return evsel; } } @@ -53,7 +53,7 @@ static u64 evsel__config_term_mask(struct evsel *evsel) static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist) { struct perf_event_attr *attr = &evsel->core.attr; - struct evsel *leader = evsel->leader; + struct evsel *leader = evsel__leader(evsel); struct evsel *read_sampler; u64 term_types, freq_mask; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 164d2f45028c..69129e2aa7a1 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -687,7 +687,7 @@ static void set_sample_datasrc_in_dict(PyObject *dict, _PyUnicode_FromString(decode)); } -static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) +static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) { unsigned int i = 0, r; int printed = 0; @@ -695,7 +695,7 @@ static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) bf[0] = 0; if (!regs || !regs->regs) - return 0; + return; for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; @@ -704,8 +704,6 @@ static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) "%5s:0x%" PRIx64 " ", perf_reg_name(r), val); } - - return printed; } static void set_regs_in_dict(PyObject *dict, @@ -713,7 +711,16 @@ static void set_regs_in_dict(PyObject *dict, struct evsel *evsel) { struct perf_event_attr *attr = &evsel->core.attr; - char bf[512]; + + /* + * Here value 28 is a constant size which can be used to print + * one register value and its corresponds to: + * 16 chars is to specify 64 bit register in hexadecimal. + * 2 chars is for appending "0x" to the hexadecimal value and + * 10 chars is for register name. + */ + int size = __sw_hweight64(attr->sample_regs_intr) * 28; + char bf[size]; regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf)); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e9c929a39973..51f727402912 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -306,6 +306,7 @@ void perf_session__delete(struct perf_session *session) evlist__delete(session->evlist); perf_data__close(session->data); } + trace_event__cleanup(&session->tevent); free(session); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 88ce47f2547e..568a88c001c6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -3370,7 +3370,7 @@ static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int add_key(sb, s[i].name, llen); } -const char *sort_help(const char *prefix) +char *sort_help(const char *prefix) { struct strbuf sb; char *s; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 87a092645aa7..b67c469aba79 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -302,7 +302,7 @@ void reset_output_field(void); void sort__setup_elide(FILE *fp); void perf_hpp__set_elide(int idx, bool elide); -const char *sort_help(const char *prefix); +char *sort_help(const char *prefix); int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index c588a6b7a8db..588601000f3f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -19,6 +19,7 @@ #include "util.h" #include "iostat.h" #include "pmu-hybrid.h" +#include "evlist-hybrid.h" #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" @@ -465,9 +466,11 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int config->csv_sep); if (counter->supported) { - config->print_free_counters_hint = 1; - if (is_mixed_hw_group(counter)) - config->print_mixed_hw_group_error = 1; + if (!evlist__has_hybrid(counter->evlist)) { + config->print_free_counters_hint = 1; + if (is_mixed_hw_group(counter)) + config->print_mixed_hw_group_error = 1; + } } fprintf(config->output, "%-*s%s", @@ -593,6 +596,18 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c } } +static bool is_uncore(struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + return pmu && pmu->is_uncore; +} + +static bool hybrid_uniquify(struct evsel *evsel) +{ + return perf_pmu__has_hybrid() && !is_uncore(evsel); +} + static bool collect_data(struct perf_stat_config *config, struct evsel *counter, void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, bool first), @@ -601,7 +616,7 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter, if (counter->merged_stat) return false; cb(config, counter, data, true); - if (config->no_merge) + if (config->no_merge || hybrid_uniquify(counter)) uniquify_event_name(counter); else if (counter->auto_merge_stats) collect_all_aliases(config, counter, cb, data); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 39967a45f55b..34a7f5c1fff7 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -379,7 +379,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) evlist__for_each_entry(evsel_list, counter) { bool invalid = false; - leader = counter->leader; + leader = evsel__leader(counter); if (!counter->metric_expr) continue; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index d3ec2624e036..09ea334586f2 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -534,7 +534,7 @@ int create_perf_stat_counter(struct evsel *evsel, int cpu) { struct perf_event_attr *attr = &evsel->core.attr; - struct evsel *leader = evsel->leader; + struct evsel *leader = evsel__leader(evsel); attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c index 4bd5e5a00aa5..545e44981a27 100644 --- a/tools/perf/util/stream.c +++ b/tools/perf/util/stream.c @@ -139,7 +139,7 @@ static int evlist__init_callchain_streams(struct evlist *evlist, hists__output_resort(hists, NULL); init_hot_callchain(hists, &es[i]); - es[i].evsel_idx = pos->idx; + es[i].evsel_idx = pos->core.idx; i++; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a73345730ba9..31cd59a2b66e 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1074,14 +1074,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, return 0; } -int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, - struct symsrc *runtime_ss, int kmodule) +static int +dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, + struct symsrc *runtime_ss, int kmodule, int dynsym) { struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; struct maps *kmaps = kmap ? map__kmaps(map) : NULL; struct map *curr_map = map; struct dso *curr_dso = dso; - Elf_Data *symstrs, *secstrs; + Elf_Data *symstrs, *secstrs, *secstrs_run, *secstrs_sym; uint32_t nr_syms; int err = -1; uint32_t idx; @@ -1098,34 +1099,15 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, if (kmap && !kmaps) return -1; - dso->symtab_type = syms_ss->type; - dso->is_64_bit = syms_ss->is_64_bit; - dso->rel = syms_ss->ehdr.e_type == ET_REL; - - /* - * Modules may already have symbols from kallsyms, but those symbols - * have the wrong values for the dso maps, so remove them. - */ - if (kmodule && syms_ss->symtab) - symbols__delete(&dso->symbols); - - if (!syms_ss->symtab) { - /* - * If the vmlinux is stripped, fail so we will fall back - * to using kallsyms. The vmlinux runtime symbols aren't - * of much use. - */ - if (dso->kernel) - goto out_elf_end; - - syms_ss->symtab = syms_ss->dynsym; - syms_ss->symshdr = syms_ss->dynshdr; - } - elf = syms_ss->elf; ehdr = syms_ss->ehdr; - sec = syms_ss->symtab; - shdr = syms_ss->symshdr; + if (dynsym) { + sec = syms_ss->dynsym; + shdr = syms_ss->dynshdr; + } else { + sec = syms_ss->symtab; + shdr = syms_ss->symshdr; + } if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr, ".text", NULL)) @@ -1150,8 +1132,16 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, if (sec_strndx == NULL) goto out_elf_end; - secstrs = elf_getdata(sec_strndx, NULL); - if (secstrs == NULL) + secstrs_run = elf_getdata(sec_strndx, NULL); + if (secstrs_run == NULL) + goto out_elf_end; + + sec_strndx = elf_getscn(elf, ehdr.e_shstrndx); + if (sec_strndx == NULL) + goto out_elf_end; + + secstrs_sym = elf_getdata(sec_strndx, NULL); + if (secstrs_sym == NULL) goto out_elf_end; nr_syms = shdr.sh_size / shdr.sh_entsize; @@ -1237,6 +1227,8 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, gelf_getshdr(sec, &shdr); + secstrs = secstrs_sym; + /* * We have to fallback to runtime when syms' section header has * NOBITS set. NOBITS results in file offset (sh_offset) not @@ -1249,6 +1241,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, goto out_elf_end; gelf_getshdr(sec, &shdr); + secstrs = secstrs_run; } if (is_label && !elf_sec__filter(&shdr, secstrs)) @@ -1312,6 +1305,50 @@ out_elf_end: return err; } +int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + struct symsrc *runtime_ss, int kmodule) +{ + int nr = 0; + int err = -1; + + dso->symtab_type = syms_ss->type; + dso->is_64_bit = syms_ss->is_64_bit; + dso->rel = syms_ss->ehdr.e_type == ET_REL; + + /* + * Modules may already have symbols from kallsyms, but those symbols + * have the wrong values for the dso maps, so remove them. + */ + if (kmodule && syms_ss->symtab) + symbols__delete(&dso->symbols); + + if (!syms_ss->symtab) { + /* + * If the vmlinux is stripped, fail so we will fall back + * to using kallsyms. The vmlinux runtime symbols aren't + * of much use. + */ + if (dso->kernel) + return err; + } else { + err = dso__load_sym_internal(dso, map, syms_ss, runtime_ss, + kmodule, 0); + if (err < 0) + return err; + nr = err; + } + + if (syms_ss->dynsym) { + err = dso__load_sym_internal(dso, map, syms_ss, runtime_ss, + kmodule, 1); + if (err < 0) + return err; + err += nr; + } + + return err; +} + static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data) { GElf_Phdr phdr; diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index be8d8d4a4e08..6276ce0c0196 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -12,6 +12,8 @@ import sys import os import time +assert sys.version_info >= (3, 7), "Python version is too old" + from collections import namedtuple from enum import Enum, auto diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 90bc007f1f93..2c6f916ccbaf 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -6,15 +6,13 @@ # Author: Felix Guo <felixguoxiuping@gmail.com> # Author: Brendan Higgins <brendanhiggins@google.com> -from __future__ import annotations import importlib.util import logging import subprocess import os import shutil import signal -from typing import Iterator -from typing import Optional +from typing import Iterator, Optional, Tuple from contextlib import ExitStack @@ -208,7 +206,7 @@ def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceT raise ConfigError(arch + ' is not a valid arch') def get_source_tree_ops_from_qemu_config(config_path: str, - cross_compile: Optional[str]) -> tuple[ + cross_compile: Optional[str]) -> Tuple[ str, LinuxSourceTreeOperations]: # The module name/path has very little to do with where the actual file # exists (I learned this through experimentation and could not find it diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index c3c524b79db8..b88db3f51dc5 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -338,9 +338,11 @@ def bubble_up_suite_errors(test_suites: Iterable[TestSuite]) -> TestStatus: def parse_test_result(lines: LineStream) -> TestResult: consume_non_diagnostic(lines) if not lines or not parse_tap_header(lines): - return TestResult(TestStatus.NO_TESTS, [], lines) + return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) expected_test_suite_num = parse_test_plan(lines) - if not expected_test_suite_num: + if expected_test_suite_num == 0: + return TestResult(TestStatus.NO_TESTS, [], lines) + elif expected_test_suite_num is None: return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) test_suites = [] for i in range(1, expected_test_suite_num + 1): diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index bdae0e5f6197..75045aa0f8a1 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -157,8 +157,18 @@ class KUnitParserTest(unittest.TestCase): kunit_parser.TestStatus.FAILURE, result.status) + def test_no_header(self): + empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log') + with open(empty_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.extract_tap_lines(file.readlines())) + self.assertEqual(0, len(result.suites)) + self.assertEqual( + kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, + result.status) + def test_no_tests(self): - empty_log = test_data_path('test_is_test_passed-no_tests_run.log') + empty_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') with open(empty_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) @@ -173,7 +183,7 @@ class KUnitParserTest(unittest.TestCase): with open(crash_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - print_mock.assert_any_call(StrContains('no tests run!')) + print_mock.assert_any_call(StrContains('could not parse test results!')) print_mock.stop() file.close() @@ -309,7 +319,7 @@ class KUnitJsonTest(unittest.TestCase): result["sub_groups"][1]["test_cases"][0]) def test_no_tests_json(self): - result = self._json_for('test_is_test_passed-no_tests_run.log') + result = self._json_for('test_is_test_passed-no_tests_run_with_header.log') self.assertEqual(0, len(result['sub_groups'])) class StrContains(str): diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_no_header.log index ba69f5c94b75..ba69f5c94b75 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_no_header.log diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log new file mode 100644 index 000000000000..5f48ee659d40 --- /dev/null +++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log @@ -0,0 +1,2 @@ +TAP version 14 +1..0 diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index ee27d68d2a1c..b5940e6ca67c 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -715,6 +715,8 @@ out: bpf_object__close(obj); } +#include "tailcall_bpf2bpf4.skel.h" + /* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved * across tailcalls combined with bpf2bpf calls. for making sure that tailcall * counter behaves correctly, bpf program will go through following flow: @@ -727,10 +729,15 @@ out: * the loop begins. At the end of the test make sure that the global counter is * equal to 31, because tailcall counter includes the first two tailcalls * whereas global counter is incremented only on loop presented on flow above. + * + * The noise parameter is used to insert bpf_map_update calls into the logic + * to force verifier to patch instructions. This allows us to ensure jump + * logic remains correct with instruction movement. */ -static void test_tailcall_bpf2bpf_4(void) +static void test_tailcall_bpf2bpf_4(bool noise) { - int err, map_fd, prog_fd, main_fd, data_fd, i, val; + int err, map_fd, prog_fd, main_fd, data_fd, i; + struct tailcall_bpf2bpf4__bss val; struct bpf_map *prog_array, *data_map; struct bpf_program *prog; struct bpf_object *obj; @@ -774,11 +781,6 @@ static void test_tailcall_bpf2bpf_4(void) goto out; } - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - &duration, &retval, NULL); - CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); - data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) return; @@ -788,9 +790,21 @@ static void test_tailcall_bpf2bpf_4(void) return; i = 0; + val.noise = noise; + val.count = 0; + err = bpf_map_update_elem(data_fd, &i, &val, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 0; err = bpf_map_lookup_elem(data_fd, &i, &val); - CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n", - err, errno, val); + CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n", + err, errno, val.count); out: bpf_object__close(obj); @@ -815,5 +829,7 @@ void test_tailcalls(void) if (test__start_subtest("tailcall_bpf2bpf_3")) test_tailcall_bpf2bpf_3(); if (test__start_subtest("tailcall_bpf2bpf_4")) - test_tailcall_bpf2bpf_4(); + test_tailcall_bpf2bpf_4(false); + if (test__start_subtest("tailcall_bpf2bpf_5")) + test_tailcall_bpf2bpf_4(true); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c index 77df6d4db895..e89368a50b97 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -3,6 +3,13 @@ #include <bpf/bpf_helpers.h> struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} nop_table SEC(".maps"); + +struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); __uint(max_entries, 3); __uint(key_size, sizeof(__u32)); @@ -10,10 +17,21 @@ struct { } jmp_table SEC(".maps"); int count = 0; +int noise = 0; + +__always_inline int subprog_noise(void) +{ + __u32 key = 0; + + bpf_map_lookup_elem(&nop_table, &key); + return 0; +} __noinline int subprog_tail_2(struct __sk_buff *skb) { + if (noise) + subprog_noise(); bpf_tail_call_static(skb, &jmp_table, 2); return skb->len * 3; } diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc index 2950bfbc6fce..adae72665500 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -39,6 +39,24 @@ grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ reset_trigger +echo "Test histogram with sym modifier" + +echo 'hist:keys=call_site.sym' > events/kmem/kmalloc/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep '{ call_site: \[[0-9a-f][0-9a-f]*\] [_a-zA-Z][_a-zA-Z]* *}' events/kmem/kmalloc/hist > /dev/null || \ + fail "sym modifier on kmalloc call_site did not work" + +reset_trigger + +echo "Test histogram with sym-offset modifier" + +echo 'hist:keys=call_site.sym-offset' > events/kmem/kmalloc/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep '{ call_site: \[[0-9a-f][0-9a-f]*\] [_a-zA-Z][_a-zA-Z]*+0x[0-9a-f][0-9a-f]*' events/kmem/kmalloc/hist > /dev/null || \ + fail "sym-offset modifier on kmalloc call_site did not work" + +reset_trigger + echo "Test histogram with sort key" echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 06a351b4f93b..0709af0144c8 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -38,6 +38,7 @@ /x86_64/xen_vmcall_test /x86_64/xss_msr_test /x86_64/vmx_pmu_msrs_test +/access_tracking_perf_test /demand_paging_test /dirty_log_test /dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index b853be2ae3c6..5832f510a16c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index a16c8f05366c..cc898181faab 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -1019,7 +1019,8 @@ static __u64 sve_rejects_set[] = { #define VREGS_SUBLIST \ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } #define PMU_SUBLIST \ - { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } + { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \ + .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } #define SVE_SUBLIST \ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c new file mode 100644 index 000000000000..e2baa187a21e --- /dev/null +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * access_tracking_perf_test + * + * Copyright (C) 2021, Google, Inc. + * + * This test measures the performance effects of KVM's access tracking. + * Access tracking is driven by the MMU notifiers test_young, clear_young, and + * clear_flush_young. These notifiers do not have a direct userspace API, + * however the clear_young notifier can be triggered by marking a pages as idle + * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to + * enable access tracking on guest memory. + * + * To measure performance this test runs a VM with a configurable number of + * vCPUs that each touch every page in disjoint regions of memory. Performance + * is measured in the time it takes all vCPUs to finish touching their + * predefined region. + * + * Note that a deterministic correctness test of access tracking is not possible + * by using page_idle as it exists today. This is for a few reasons: + * + * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This + * means subsequent guest accesses are not guaranteed to see page table + * updates made by KVM until some time in the future. + * + * 2. page_idle only operates on LRU pages. Newly allocated pages are not + * immediately allocated to LRU lists. Instead they are held in a "pagevec", + * which is drained to LRU lists some time in the future. There is no + * userspace API to force this drain to occur. + * + * These limitations are worked around in this test by using a large enough + * region of memory for each vCPU such that the number of translations cached in + * the TLB and the number of pages held in pagevecs are a small fraction of the + * overall workload. And if either of those conditions are not true this test + * will fail rather than silently passing. + */ +#include <inttypes.h> +#include <limits.h> +#include <pthread.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "kvm_util.h" +#include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" + +/* Global variable used to synchronize all of the vCPU threads. */ +static int iteration = -1; + +/* Defines what vCPU threads should do during a given iteration. */ +static enum { + /* Run the vCPU to access all its memory. */ + ITERATION_ACCESS_MEMORY, + /* Mark the vCPU's memory idle in page_idle. */ + ITERATION_MARK_IDLE, +} iteration_work; + +/* Set to true when vCPU threads should exit. */ +static bool done; + +/* The iteration that was last completed by each vCPU. */ +static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; + +/* Whether to overlap the regions of memory vCPUs access. */ +static bool overlap_memory_access; + +struct test_params { + /* The backing source for the region of memory. */ + enum vm_mem_backing_src_type backing_src; + + /* The amount of memory to allocate for each vCPU. */ + uint64_t vcpu_memory_bytes; + + /* The number of vCPUs to create in the VM. */ + int vcpus; +}; + +static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) +{ + uint64_t value; + off_t offset = index * sizeof(value); + + TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value), + "pread from %s offset 0x%" PRIx64 " failed!", + filename, offset); + + return value; + +} + +#define PAGEMAP_PRESENT (1ULL << 63) +#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1) + +static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) +{ + uint64_t hva = (uint64_t) addr_gva2hva(vm, gva); + uint64_t entry; + uint64_t pfn; + + entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize()); + if (!(entry & PAGEMAP_PRESENT)) + return 0; + + pfn = entry & PAGEMAP_PFN_MASK; + if (!pfn) { + print_skip("Looking up PFNs requires CAP_SYS_ADMIN"); + exit(KSFT_SKIP); + } + + return pfn; +} + +static bool is_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64); + + return !!((bits >> (pfn % 64)) & 1); +} + +static void mark_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = 1ULL << (pfn % 64); + + TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8, + "Set page_idle bits for PFN 0x%" PRIx64, pfn); +} + +static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id) +{ + uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva; + uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages; + uint64_t page; + uint64_t still_idle = 0; + uint64_t no_pfn = 0; + int page_idle_fd; + int pagemap_fd; + + /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */ + if (overlap_memory_access && vcpu_id) + return; + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle."); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); + + for (page = 0; page < pages; page++) { + uint64_t gva = base_gva + page * perf_test_args.guest_page_size; + uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); + + if (!pfn) { + no_pfn++; + continue; + } + + if (is_page_idle(page_idle_fd, pfn)) { + still_idle++; + continue; + } + + mark_page_idle(page_idle_fd, pfn); + } + + /* + * Assumption: Less than 1% of pages are going to be swapped out from + * under us during this test. + */ + TEST_ASSERT(no_pfn < pages / 100, + "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.", + vcpu_id, no_pfn, pages); + + /* + * Test that at least 90% of memory has been marked idle (the rest might + * not be marked idle because the pages have not yet made it to an LRU + * list or the translations are still cached in the TLB). 90% is + * arbitrary; high enough that we ensure most memory access went through + * access tracking but low enough as to not make the test too brittle + * over time and across architectures. + */ + TEST_ASSERT(still_idle < pages / 10, + "vCPU%d: Too many pages still idle (%"PRIu64 " out of %" + PRIu64 ").\n", + vcpu_id, still_idle, pages); + + close(page_idle_fd); + close(pagemap_fd); +} + +static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id, + uint64_t expected_ucall) +{ + struct ucall uc; + uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc); + + TEST_ASSERT(expected_ucall == actual_ucall, + "Guest exited unexpectedly (expected ucall %" PRIu64 + ", got %" PRIu64 ")", + expected_ucall, actual_ucall); +} + +static bool spin_wait_for_next_iteration(int *current_iteration) +{ + int last_iteration = *current_iteration; + + do { + if (READ_ONCE(done)) + return false; + + *current_iteration = READ_ONCE(iteration); + } while (last_iteration == *current_iteration); + + return true; +} + +static void *vcpu_thread_main(void *arg) +{ + struct perf_test_vcpu_args *vcpu_args = arg; + struct kvm_vm *vm = perf_test_args.vm; + int vcpu_id = vcpu_args->vcpu_id; + int current_iteration = -1; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + + while (spin_wait_for_next_iteration(¤t_iteration)) { + switch (READ_ONCE(iteration_work)) { + case ITERATION_ACCESS_MEMORY: + vcpu_run(vm, vcpu_id); + assert_ucall(vm, vcpu_id, UCALL_SYNC); + break; + case ITERATION_MARK_IDLE: + mark_vcpu_memory_idle(vm, vcpu_id); + break; + }; + + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + } + + return NULL; +} + +static void spin_wait_for_vcpu(int vcpu_id, int target_iteration) +{ + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != + target_iteration) { + continue; + } +} + +/* The type of memory accesses to perform in the VM. */ +enum access_type { + ACCESS_READ, + ACCESS_WRITE, +}; + +static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) +{ + struct timespec ts_start; + struct timespec ts_elapsed; + int next_iteration; + int vcpu_id; + + /* Kick off the vCPUs by incrementing iteration. */ + next_iteration = ++iteration; + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + + /* Wait for all vCPUs to finish the iteration. */ + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) + spin_wait_for_vcpu(vcpu_id, next_iteration); + + ts_elapsed = timespec_elapsed(ts_start); + pr_info("%-30s: %ld.%09lds\n", + description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec); +} + +static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access, + const char *description) +{ + perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1; + sync_global_to_guest(vm, perf_test_args); + iteration_work = ITERATION_ACCESS_MEMORY; + run_iteration(vm, vcpus, description); +} + +static void mark_memory_idle(struct kvm_vm *vm, int vcpus) +{ + /* + * Even though this parallelizes the work across vCPUs, this is still a + * very slow operation because page_idle forces the test to mark one pfn + * at a time and the clear_young notifier serializes on the KVM MMU + * lock. + */ + pr_debug("Marking VM memory idle (slow)...\n"); + iteration_work = ITERATION_MARK_IDLE; + run_iteration(vm, vcpus, "Mark memory idle"); +} + +static pthread_t *create_vcpu_threads(int vcpus) +{ + pthread_t *vcpu_threads; + int i; + + vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0])); + TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads."); + + for (i = 0; i < vcpus; i++) { + vcpu_last_completed_iteration[i] = iteration; + pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main, + &perf_test_args.vcpu_args[i]); + } + + return vcpu_threads; +} + +static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus) +{ + int i; + + /* Set done to signal the vCPU threads to exit */ + done = true; + + for (i = 0; i < vcpus; i++) + pthread_join(vcpu_threads[i], NULL); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *params = arg; + struct kvm_vm *vm; + pthread_t *vcpu_threads; + int vcpus = params->vcpus; + + vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, + params->backing_src); + + perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes, + !overlap_memory_access); + + vcpu_threads = create_vcpu_threads(vcpus); + + pr_info("\n"); + access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory"); + + /* As a control, read and write to the populated memory first. */ + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory"); + access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory"); + + /* Repeat on memory that has been marked as idle. */ + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory"); + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory"); + + terminate_vcpu_threads(vcpu_threads, vcpus); + free(vcpu_threads); + perf_test_destroy_vm(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n", + name); + puts(""); + printf(" -h: Display this help message."); + guest_modes_help(); + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + printf(" -o: Overlap guest memory accesses instead of partitioning\n" + " them into a separate region of memory for each vCPU.\n"); + printf(" -s: specify the type of memory that should be used to\n" + " back the guest data region.\n\n"); + backing_src_help(); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + struct test_params params = { + .backing_src = VM_MEM_SRC_ANONYMOUS, + .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, + .vcpus = 1, + }; + int page_idle_fd; + int opt; + + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 'b': + params.vcpu_memory_bytes = parse_size(optarg); + break; + case 'v': + params.vcpus = atoi(optarg); + break; + case 'o': + overlap_memory_access = true; + break; + case 's': + params.backing_src = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + if (page_idle_fd < 0) { + print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled"); + exit(KSFT_SKIP); + } + close(page_idle_fd); + + for_each_guest_mode(run_test, ¶ms); + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 04a2641261be..80cbd3a748c0 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -312,6 +312,7 @@ int main(int argc, char *argv[]) break; case 'o': p.partition_vcpu_memory_access = false; + break; case 's': p.backing_src = parse_backing_src_type(optarg); break; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 615ab254899d..010b59b13917 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -45,6 +45,7 @@ enum vm_guest_mode { VM_MODE_P40V48_64K, VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, NUM_VM_MODES, }; @@ -62,7 +63,7 @@ enum vm_guest_mode { #elif defined(__s390x__) -#define VM_MODE_DEFAULT VM_MODE_P47V64_4K +#define VM_MODE_DEFAULT VM_MODE_P44V64_4K #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 16) diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 9f49f6caafe5..632b74d6b3ca 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -401,7 +401,7 @@ unexpected_exception: void vm_init_descriptor_tables(struct kvm_vm *vm) { vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, 0, 0); + vm->page_size); *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 25bff307c71f..c330f414ef96 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -22,6 +22,22 @@ void guest_modes_append_default(void) } } #endif +#ifdef __s390x__ + { + int kvm_fd, vm_fd; + struct kvm_s390_vm_cpu_processor info; + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0); + kvm_device_access(vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info, false); + close(vm_fd); + close(kvm_fd); + /* Starting with z13 we have 47bits of physical address */ + if (info.ibc >= 0x30) + guest_mode_append(VM_MODE_P47V64_4K, true, true); + } +#endif } void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 5b56b57b3c20..10a8ed691c66 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -176,6 +176,7 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", + [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -194,6 +195,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { { 40, 48, 0x10000, 16 }, { 0, 0, 0x1000, 12 }, { 47, 64, 0x1000, 12 }, + { 44, 64, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -282,6 +284,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) case VM_MODE_P47V64_4K: vm->pgtable_levels = 5; break; + case VM_MODE_P44V64_4K: + vm->pgtable_levels = 5; + break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 85b18bb8f762..72a1c9b4882c 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -377,7 +377,8 @@ static void test_add_max_memory_regions(void) (max_mem_slots - 1), MEM_REGION_SIZE >> 10); mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index b0031f2d38fd..ecec30865a74 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -320,7 +320,7 @@ int main(int ac, char **av) run_delay = get_run_delay(); pthread_create(&thread, &attr, do_steal_time, NULL); do - pthread_yield(); + sched_yield(); while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS); pthread_join(thread, NULL); run_delay = get_run_delay() - run_delay; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 42bd658f52a8..af27c7e829c1 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -615,7 +615,7 @@ int main(void) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); pr_info("Testing access to Hyper-V specific MSRs\n"); guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva), diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c index 523371cf8e8f..da2325fcad87 100644 --- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c @@ -71,7 +71,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val) /* Set up a #PF handler to eat the RSVD #PF and signal all done! */ vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, PF_VECTOR, guest_pf_handler); + vm_install_exception_handler(vm, PF_VECTOR, guest_pf_handler); r = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r); diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index c1f831803ad2..d0fe2fdce58c 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -53,15 +53,28 @@ static inline void sync_with_host(uint64_t phase) : "+a" (phase)); } -void self_smi(void) +static void self_smi(void) { x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); } -void guest_code(void *arg) +static void l2_guest_code(void) { + sync_with_host(8); + + sync_with_host(10); + + vmcall(); +} + +static void guest_code(void *arg) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); + struct svm_test_data *svm = arg; + struct vmx_pages *vmx_pages = arg; sync_with_host(1); @@ -74,21 +87,50 @@ void guest_code(void *arg) sync_with_host(4); if (arg) { - if (cpu_has_svm()) - generic_svm_setup(arg, NULL, NULL); - else - GUEST_ASSERT(prepare_for_vmx_operation(arg)); + if (cpu_has_svm()) { + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + } else { + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + } sync_with_host(5); self_smi(); sync_with_host(7); + + if (cpu_has_svm()) { + run_guest(svm->vmcb, svm->vmcb_gpa); + svm->vmcb->save.rip += 3; + run_guest(svm->vmcb, svm->vmcb_gpa); + } else { + vmlaunch(); + vmresume(); + } + + /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */ + sync_with_host(12); } sync_with_host(DONE); } +void inject_smi(struct kvm_vm *vm) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vm, VCPU_ID, &events); + + events.smi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_SMM; + + vcpu_events_set(vm, VCPU_ID, &events); +} + int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; @@ -147,6 +189,22 @@ int main(int argc, char *argv[]) "Unexpected stage: #%x, got %x", stage, stage_reported); + /* + * Enter SMM during L2 execution and check that we correctly + * return from it. Do not perform save/restore while in SMM yet. + */ + if (stage == 8) { + inject_smi(vm); + continue; + } + + /* + * Perform save/restore while the guest is in SMM triggered + * during L2 execution. + */ + if (stage == 10) + inject_smi(vm); + state = vcpu_save_state(vm, VCPU_ID); kvm_vm_release(vm); kvm_vm_restart(vm, O_RDWR); diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh index b37585e6aa38..46a97f318f58 100755 --- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh +++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh @@ -282,7 +282,9 @@ done # echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error for memory in `hotpluggable_online_memory`; do - offline_memory_expect_fail $memory + if [ $((RANDOM % 100)) -lt $ratio ]; then + offline_memory_expect_fail $memory + fi done echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index c19ecc6a8614..ecbf57f264ed 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -313,9 +313,10 @@ check_exception() fi log_test $? 0 "IPv4: ${desc}" - if [ "$with_redirect" = "yes" ]; then + # No PMTU info for test "redirect" and "mtu exception plus redirect" + if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ - grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}" + grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0" elif [ -n "${mtu}" ]; then ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -q "${mtu}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9a191c1a5de8..f02f4de2f3a0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1409,7 +1409,7 @@ syncookies_tests() ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr "subflows limited by server w cookies" 2 2 1 + chk_join_nr "subflows limited by server w cookies" 2 1 1 # test signal address with cookies reset_with_cookies diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 6365c7fd1262..bd6288302094 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -11,9 +11,11 @@ #include <sys/socket.h> #include <sys/wait.h> #include <linux/tcp.h> +#include <linux/udp.h> #include <arpa/inet.h> #include <net/if.h> #include <netinet/in.h> +#include <netinet/ip.h> #include <netdb.h> #include <fcntl.h> #include <libgen.h> @@ -27,6 +29,10 @@ #include <time.h> #include <errno.h> +#include <linux/xfrm.h> +#include <linux/ipsec.h> +#include <linux/pfkeyv2.h> + #ifndef IPV6_UNICAST_IF #define IPV6_UNICAST_IF 76 #endif @@ -114,6 +120,9 @@ struct sock_args { struct in_addr in; struct in6_addr in6; } expected_raddr; + + /* ESP in UDP encap test */ + int use_xfrm; }; static int server_mode; @@ -1346,6 +1355,41 @@ static int bind_socket(int sd, struct sock_args *args) return 0; } +static int config_xfrm_policy(int sd, struct sock_args *args) +{ + struct xfrm_userpolicy_info policy = {}; + int type = UDP_ENCAP_ESPINUDP; + int xfrm_af = IP_XFRM_POLICY; + int level = SOL_IP; + + if (args->type != SOCK_DGRAM) { + log_error("Invalid socket type. Only DGRAM could be used for XFRM\n"); + return 1; + } + + policy.action = XFRM_POLICY_ALLOW; + policy.sel.family = args->version; + if (args->version == AF_INET6) { + xfrm_af = IPV6_XFRM_POLICY; + level = SOL_IPV6; + } + + policy.dir = XFRM_POLICY_OUT; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + policy.dir = XFRM_POLICY_IN; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) { + log_err_errno("Failed to set xfrm encap"); + return 1; + } + + return 0; +} + static int lsock_init(struct sock_args *args) { long flags; @@ -1389,6 +1433,11 @@ static int lsock_init(struct sock_args *args) if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0) log_err_errno("Failed to set close-on-exec flag"); + if (args->use_xfrm && config_xfrm_policy(sd, args)) { + log_err_errno("Failed to set xfrm policy"); + goto err; + } + out: return sd; @@ -1772,7 +1821,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" +#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" static void print_usage(char *prog) { @@ -1795,6 +1844,7 @@ static void print_usage(char *prog) " -D|R datagram (D) / raw (R) socket (default stream)\n" " -l addr local address to bind to in server mode\n" " -c addr local address to bind to in client mode\n" + " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" " -I dev bind socket to given device name - server mode\n" @@ -1966,6 +2016,9 @@ int main(int argc, char *argv[]) case 'q': quiet = 1; break; + case 'x': + args.use_xfrm = 1; + break; default: print_usage(argv[0]); return 1; diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 64cd2e23c568..543ad7513a8e 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -118,6 +118,16 @@ # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU # changes alone won't affect PMTU # +# - pmtu_vti4_udp_exception +# Same as pmtu_vti4_exception, but using ESP-in-UDP +# +# - pmtu_vti4_udp_routed_exception +# Set up vti tunnel on top of veth connected through routing namespace and +# add xfrm states and policies with ESP-in-UDP encapsulation. Check that +# route exception is not created if link layer MTU is not exceeded, then +# lower MTU on second part of routed environment and check that exception +# is created with the expected PMTU. +# # - pmtu_vti6_exception # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is @@ -125,6 +135,13 @@ # decrease and increase MTU of tunnel, checking that route exception PMTU # changes accordingly # +# - pmtu_vti6_udp_exception +# Same as pmtu_vti6_exception, but using ESP-in-UDP +# +# - pmtu_vti6_udp_routed_exception +# Same as pmtu_vti6_udp_routed_exception but with routing between vti +# endpoints +# # - pmtu_vti4_default_mtu # Set up vti4 tunnel on top of veth, in two namespaces with matching # endpoints. Check that MTU assigned to vti interface is the MTU of the @@ -224,6 +241,10 @@ tests=" pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 pmtu_vti6_exception vti6: PMTU exceptions 0 pmtu_vti4_exception vti4: PMTU exceptions 0 + pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0 + pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0 pmtu_vti4_default_mtu vti4: default MTU assignment 0 pmtu_vti6_default_mtu vti6: default MTU assignment 0 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 @@ -246,7 +267,6 @@ ns_b="ip netns exec ${NS_B}" ns_c="ip netns exec ${NS_C}" ns_r1="ip netns exec ${NS_R1}" ns_r2="ip netns exec ${NS_R2}" - # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). @@ -279,7 +299,6 @@ routes=" A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 B default ${prefix6}:${b_r1}::2 " - USE_NH="no" # ns family nh id destination gateway nexthops=" @@ -326,6 +345,7 @@ dummy6_mask="64" err_buf= tcpdump_pids= +nettest_pids= err() { err_buf="${err_buf}${1} @@ -548,6 +568,14 @@ setup_vti6() { setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} } +setup_vti4routed() { + setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} +} + +setup_vti6routed() { + setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} +} + setup_vxlan_or_geneve() { type="${1}" a_addr="${2}" @@ -619,18 +647,36 @@ setup_xfrm() { proto=${1} veth_a_addr="${2}" veth_b_addr="${3}" + encap=${4} - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel } +setup_nettest_xfrm() { + which nettest >/dev/null + if [ $? -ne 0 ]; then + echo "'nettest' command not found; skipping tests" + return 1 + fi + + [ ${1} -eq 6 ] && proto="-6" || proto="" + port=${2} + + run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" + + run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" +} + setup_xfrm4() { setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} } @@ -639,6 +685,26 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_xfrm4udp() { + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udp() { + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + +setup_xfrm4udprouted() { + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udprouted() { + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + setup_routing_old() { for i in ${routes}; do [ "${ns}" = "" ] && ns="${i}" && continue @@ -823,6 +889,11 @@ cleanup() { done tcpdump_pids= + for pid in ${nettest_pids}; do + kill ${pid} + done + nettest_pids= + for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do ip netns del ${n} 2> /dev/null done @@ -1432,6 +1503,135 @@ test_pmtu_vti6_exception() { return ${fail} } +test_pmtu_vti4_udp_exception() { + setup namespaces veth vti4 xfrm4udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_a ${veth_mtu} + mtu "${ns_b}" veth_b ${veth_mtu} + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now exceed link layer MTU by one byte, check that exception is created + # with the right PMTU value + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" +} + +test_pmtu_vti6_udp_exception() { + setup namespaces veth vti6 xfrm6udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + fail=0 + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_a 4000 + mtu "${ns_b}" veth_b 4000 + mtu "${ns_a}" vti6_a 5000 + mtu "${ns_b}" vti6_b 5000 + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 + + # Decrease tunnel MTU, check for PMTU decrease in route exception + mtu "${ns_a}" vti6_a 3000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 + + # Increase tunnel MTU, check for PMTU increase in route exception + mtu "${ns_a}" vti6_a 9000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 + + return ${fail} +} + +test_pmtu_vti4_udp_routed_exception() { + setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" +} + +test_pmtu_vti6_udp_routed_exception() { + setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 40)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 48)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + # mtu "${ns_a}" vti6_a ${vti_mtu} + # mtu "${ns_b}" vti6_b ${vti_mtu} + + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr} + + # Check that exception was not created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" + +} + test_pmtu_vti4_default_mtu() { setup namespaces veth vti4 || return $ksft_skip diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index 21091be70688..aee631c5284e 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -47,7 +47,7 @@ static void usage(const char *error) { if (error) printf("invalid option: %s\n", error); - printf("timestamping interface option*\n\n" + printf("timestamping <interface> [bind_phc_index] [option]*\n\n" "Options:\n" " IP_MULTICAST_LOOP - looping outgoing multicasts\n" " SO_TIMESTAMP - normal software time stamping, ms resolution\n" @@ -58,6 +58,7 @@ static void usage(const char *error) " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" + " SOF_TIMESTAMPING_BIND_PHC - request to bind a PHC of PTP vclock\n" " SIOCGSTAMP - check last socket time stamp\n" " SIOCGSTAMPNS - more accurate socket time stamp\n" " PTPV2 - use PTPv2 messages\n"); @@ -311,7 +312,6 @@ static void recvpacket(int sock, int recvmsg_flags, int main(int argc, char **argv) { - int so_timestamping_flags = 0; int so_timestamp = 0; int so_timestampns = 0; int siocgstamp = 0; @@ -325,6 +325,8 @@ int main(int argc, char **argv) struct ifreq device; struct ifreq hwtstamp; struct hwtstamp_config hwconfig, hwconfig_requested; + struct so_timestamping so_timestamping_get = { 0, -1 }; + struct so_timestamping so_timestamping = { 0, -1 }; struct sockaddr_in addr; struct ip_mreq imr; struct in_addr iaddr; @@ -342,7 +344,12 @@ int main(int argc, char **argv) exit(1); } - for (i = 2; i < argc; i++) { + if (argc >= 3 && sscanf(argv[2], "%d", &so_timestamping.bind_phc) == 1) + val = 3; + else + val = 2; + + for (i = val; i < argc; i++) { if (!strcasecmp(argv[i], "SO_TIMESTAMP")) so_timestamp = 1; else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) @@ -356,17 +363,19 @@ int main(int argc, char **argv) else if (!strcasecmp(argv[i], "PTPV2")) ptpv2 = 1; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_TX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_TX_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RX_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_BIND_PHC")) + so_timestamping.flags |= SOF_TIMESTAMPING_BIND_PHC; else usage(argv[i]); } @@ -385,10 +394,10 @@ int main(int argc, char **argv) hwtstamp.ifr_data = (void *)&hwconfig; memset(&hwconfig, 0, sizeof(hwconfig)); hwconfig.tx_type = - (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? + (so_timestamping.flags & SOF_TIMESTAMPING_TX_HARDWARE) ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; hwconfig.rx_filter = - (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + (so_timestamping.flags & SOF_TIMESTAMPING_RX_HARDWARE) ? ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC : HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; hwconfig_requested = hwconfig; @@ -413,6 +422,9 @@ int main(int argc, char **argv) sizeof(struct sockaddr_in)) < 0) bail("bind"); + if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, interface, if_len)) + bail("bind device"); + /* set multicast group for outgoing packets */ inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ addr.sin_addr = iaddr; @@ -444,10 +456,9 @@ int main(int argc, char **argv) &enabled, sizeof(enabled)) < 0) bail("setsockopt SO_TIMESTAMPNS"); - if (so_timestamping_flags && - setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, - &so_timestamping_flags, - sizeof(so_timestamping_flags)) < 0) + if (so_timestamping.flags && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping, + sizeof(so_timestamping)) < 0) bail("setsockopt SO_TIMESTAMPING"); /* request IP_PKTINFO for debugging purposes */ @@ -468,14 +479,18 @@ int main(int argc, char **argv) else printf("SO_TIMESTAMPNS %d\n", val); - if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { + len = sizeof(so_timestamping_get); + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping_get, + &len) < 0) { printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", strerror(errno)); } else { - printf("SO_TIMESTAMPING %d\n", val); - if (val != so_timestamping_flags) - printf(" not the expected value %d\n", - so_timestamping_flags); + printf("SO_TIMESTAMPING flags %d, bind phc %d\n", + so_timestamping_get.flags, so_timestamping_get.bind_phc); + if (so_timestamping_get.flags != so_timestamping.flags || + so_timestamping_get.bind_phc != so_timestamping.bind_phc) + printf(" not expected, flags %d, bind phc %d\n", + so_timestamping.flags, so_timestamping.bind_phc); } /* send packets forever every five seconds */ diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index cd6430b39982..8748199ac109 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -5,7 +5,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ - ipip-conntrack-mtu.sh + ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh new file mode 100755 index 000000000000..e7d7bf13cff5 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh @@ -0,0 +1,167 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that UNREPLIED tcp conntrack will eventually timeout. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +waittime=20 +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup() { + ip netns pids $ns1 | xargs kill 2>/dev/null + ip netns pids $ns2 | xargs kill 2>/dev/null + + ip netns del $ns1 + ip netns del $ns2 +} + +ipv4() { + echo -n 192.168.$1.2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns2 nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +# Create test namespaces +ip netns add $ns1 || exit 1 + +trap cleanup EXIT + +ip netns add $ns2 || exit 1 + +# Connect the namespace to the host using a veth pair +ip -net $ns1 link add name veth1 type veth peer name veth2 +ip -net $ns1 link set netns $ns2 dev veth2 + +ip -net $ns1 link set up dev lo +ip -net $ns2 link set up dev lo +ip -net $ns1 link set up dev veth1 +ip -net $ns2 link set up dev veth2 + +ip -net $ns2 addr add 10.11.11.2/24 dev veth2 +ip -net $ns2 route add default via 10.11.11.1 + +ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1 + +# add a rule inside NS so we enable conntrack +ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT + +ip -net $ns1 addr add 10.11.11.1/24 dev veth1 +ip -net $ns1 route add 10.99.99.99 via 10.11.11.2 + +# Check connectivity works +ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1 + +ip netns exec $ns2 nc -l -p 8080 < /dev/null & + +# however, conntrack entries are there + +ip netns exec $ns2 nft -f - <<EOF +table inet filter { + counter connreq { } + counter redir { } + chain input { + type filter hook input priority 0; policy accept; + ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept + ct state new ct status dnat tcp dport 8080 counter name "redir" accept + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nft rules" + exit 1 +fi + +ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10 + +echo "INFO: connect $ns1 -> $ns2 to the virtual ip" +ip netns exec $ns1 bash -c 'while true ; do + nc -p 60000 10.99.99.99 80 + sleep 1 + done' & + +sleep 1 + +ip netns exec $ns2 nft -f - <<EOF +table inet nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + ip daddr 10.99.99.99 tcp dport 80 redirect to :8080 + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nat redirect" + exit 1 +fi + +count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l) +if [ $count -eq 0 ]; then + echo "ERROR: $ns2 did not pick up tcp connection from peer" + exit 1 +fi + +echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect" +for i in $(seq 1 $waittime); do + echo -n "." + + sleep 1 + + count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l) + if [ $count -gt 0 ]; then + echo + echo "PASS: redirection took effect after $i seconds" + break + fi + + m=$((i%20)) + if [ $m -eq 0 ]; then + echo " waited for $i seconds" + fi +done + +expect="packets 1 bytes 60" +check_counter "$ns2" "redir" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: redirection counter has expected values" +else + echo "ERROR: no tcp connection was redirected" +fi + +exit $ret diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index d683a49d07d5..f0fd80ef17df 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -24,5 +24,6 @@ va_128TBswitch map_fixed_noreplace write_to_hugetlbfs hmm-tests +memfd_secret local_config.* split_huge_page_test diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 812bc03e3142..521243770f26 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -35,6 +35,7 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +TEST_GEN_FILES += memfd_secret TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests TEST_GEN_FILES += mremap_dontunmap @@ -135,7 +136,7 @@ warn_32bit_failure: endif endif -$(OUTPUT)/mlock-random-test: LDLIBS += -lcap +$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap $(OUTPUT)/gup_test: ../../../../mm/gup_test.h diff --git a/tools/testing/selftests/vm/memfd_secret.c b/tools/testing/selftests/vm/memfd_secret.c new file mode 100644 index 000000000000..93e7e7ffed33 --- /dev/null +++ b/tools/testing/selftests/vm/memfd_secret.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corporation, 2021 + * + * Author: Mike Rapoport <rppt@linux.ibm.com> + */ + +#define _GNU_SOURCE +#include <sys/uio.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <sys/resource.h> +#include <sys/capability.h> + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> + +#include "../kselftest.h" + +#define fail(fmt, ...) ksft_test_result_fail(fmt, ##__VA_ARGS__) +#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__) +#define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__) + +#ifdef __NR_memfd_secret + +#define PATTERN 0x55 + +static const int prot = PROT_READ | PROT_WRITE; +static const int mode = MAP_SHARED; + +static unsigned long page_size; +static unsigned long mlock_limit_cur; +static unsigned long mlock_limit_max; + +static int memfd_secret(unsigned int flags) +{ + return syscall(__NR_memfd_secret, flags); +} + +static void test_file_apis(int fd) +{ + char buf[64]; + + if ((read(fd, buf, sizeof(buf)) >= 0) || + (write(fd, buf, sizeof(buf)) >= 0) || + (pread(fd, buf, sizeof(buf), 0) >= 0) || + (pwrite(fd, buf, sizeof(buf), 0) >= 0)) + fail("unexpected file IO\n"); + else + pass("file IO is blocked as expected\n"); +} + +static void test_mlock_limit(int fd) +{ + size_t len; + char *mem; + + len = mlock_limit_cur; + mem = mmap(NULL, len, prot, mode, fd, 0); + if (mem == MAP_FAILED) { + fail("unable to mmap secret memory\n"); + return; + } + munmap(mem, len); + + len = mlock_limit_max * 2; + mem = mmap(NULL, len, prot, mode, fd, 0); + if (mem != MAP_FAILED) { + fail("unexpected mlock limit violation\n"); + munmap(mem, len); + return; + } + + pass("mlock limit is respected\n"); +} + +static void try_process_vm_read(int fd, int pipefd[2]) +{ + struct iovec liov, riov; + char buf[64]; + char *mem; + + if (read(pipefd[0], &mem, sizeof(mem)) < 0) { + fail("pipe write: %s\n", strerror(errno)); + exit(KSFT_FAIL); + } + + liov.iov_len = riov.iov_len = sizeof(buf); + liov.iov_base = buf; + riov.iov_base = mem; + + if (process_vm_readv(getppid(), &liov, 1, &riov, 1, 0) < 0) { + if (errno == ENOSYS) + exit(KSFT_SKIP); + exit(KSFT_PASS); + } + + exit(KSFT_FAIL); +} + +static void try_ptrace(int fd, int pipefd[2]) +{ + pid_t ppid = getppid(); + int status; + char *mem; + long ret; + + if (read(pipefd[0], &mem, sizeof(mem)) < 0) { + perror("pipe write"); + exit(KSFT_FAIL); + } + + ret = ptrace(PTRACE_ATTACH, ppid, 0, 0); + if (ret) { + perror("ptrace_attach"); + exit(KSFT_FAIL); + } + + ret = waitpid(ppid, &status, WUNTRACED); + if ((ret != ppid) || !(WIFSTOPPED(status))) { + fprintf(stderr, "weird waitppid result %ld stat %x\n", + ret, status); + exit(KSFT_FAIL); + } + + if (ptrace(PTRACE_PEEKDATA, ppid, mem, 0)) + exit(KSFT_PASS); + + exit(KSFT_FAIL); +} + +static void check_child_status(pid_t pid, const char *name) +{ + int status; + + waitpid(pid, &status, 0); + + if (WIFEXITED(status) && WEXITSTATUS(status) == KSFT_SKIP) { + skip("%s is not supported\n", name); + return; + } + + if ((WIFEXITED(status) && WEXITSTATUS(status) == KSFT_PASS) || + WIFSIGNALED(status)) { + pass("%s is blocked as expected\n", name); + return; + } + + fail("%s: unexpected memory access\n", name); +} + +static void test_remote_access(int fd, const char *name, + void (*func)(int fd, int pipefd[2])) +{ + int pipefd[2]; + pid_t pid; + char *mem; + + if (pipe(pipefd)) { + fail("pipe failed: %s\n", strerror(errno)); + return; + } + + pid = fork(); + if (pid < 0) { + fail("fork failed: %s\n", strerror(errno)); + return; + } + + if (pid == 0) { + func(fd, pipefd); + return; + } + + mem = mmap(NULL, page_size, prot, mode, fd, 0); + if (mem == MAP_FAILED) { + fail("Unable to mmap secret memory\n"); + return; + } + + ftruncate(fd, page_size); + memset(mem, PATTERN, page_size); + + if (write(pipefd[1], &mem, sizeof(mem)) < 0) { + fail("pipe write: %s\n", strerror(errno)); + return; + } + + check_child_status(pid, name); +} + +static void test_process_vm_read(int fd) +{ + test_remote_access(fd, "process_vm_read", try_process_vm_read); +} + +static void test_ptrace(int fd) +{ + test_remote_access(fd, "ptrace", try_ptrace); +} + +static int set_cap_limits(rlim_t max) +{ + struct rlimit new; + cap_t cap = cap_init(); + + new.rlim_cur = max; + new.rlim_max = max; + if (setrlimit(RLIMIT_MEMLOCK, &new)) { + perror("setrlimit() returns error"); + return -1; + } + + /* drop capabilities including CAP_IPC_LOCK */ + if (cap_set_proc(cap)) { + perror("cap_set_proc() returns error"); + return -2; + } + + return 0; +} + +static void prepare(void) +{ + struct rlimit rlim; + + page_size = sysconf(_SC_PAGE_SIZE); + if (!page_size) + ksft_exit_fail_msg("Failed to get page size %s\n", + strerror(errno)); + + if (getrlimit(RLIMIT_MEMLOCK, &rlim)) + ksft_exit_fail_msg("Unable to detect mlock limit: %s\n", + strerror(errno)); + + mlock_limit_cur = rlim.rlim_cur; + mlock_limit_max = rlim.rlim_max; + + printf("page_size: %ld, mlock.soft: %ld, mlock.hard: %ld\n", + page_size, mlock_limit_cur, mlock_limit_max); + + if (page_size > mlock_limit_cur) + mlock_limit_cur = page_size; + if (page_size > mlock_limit_max) + mlock_limit_max = page_size; + + if (set_cap_limits(mlock_limit_max)) + ksft_exit_fail_msg("Unable to set mlock limit: %s\n", + strerror(errno)); +} + +#define NUM_TESTS 4 + +int main(int argc, char *argv[]) +{ + int fd; + + prepare(); + + ksft_print_header(); + ksft_set_plan(NUM_TESTS); + + fd = memfd_secret(0); + if (fd < 0) { + if (errno == ENOSYS) + ksft_exit_skip("memfd_secret is not supported\n"); + else + ksft_exit_fail_msg("memfd_secret failed: %s\n", + strerror(errno)); + } + + test_mlock_limit(fd); + test_file_apis(fd); + test_process_vm_read(fd); + test_ptrace(fd); + + close(fd); + + ksft_exit(!ksft_get_fail_cnt()); +} + +#else /* __NR_memfd_secret */ + +int main(int argc, char *argv[]) +{ + printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_memfd_secret */ diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c index 9c391d016922..0624d1bd71b5 100644 --- a/tools/testing/selftests/vm/mremap_test.c +++ b/tools/testing/selftests/vm/mremap_test.c @@ -45,14 +45,15 @@ enum { _4MB = 4ULL << 20, _1GB = 1ULL << 30, _2GB = 2ULL << 30, - PTE = _4KB, PMD = _2MB, PUD = _1GB, }; +#define PTE page_size + #define MAKE_TEST(source_align, destination_align, size, \ overlaps, should_fail, test_name) \ -{ \ +(struct test){ \ .name = test_name, \ .config = { \ .src_alignment = source_align, \ @@ -74,9 +75,10 @@ static void *get_source_mapping(struct config c) retry: addr += c.src_alignment; src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0); + MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, + -1, 0); if (src_addr == MAP_FAILED) { - if (errno == EPERM) + if (errno == EPERM || errno == EEXIST) goto retry; goto error; } @@ -252,12 +254,17 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb, return 0; } +#define MAX_TEST 13 +#define MAX_PERF_TEST 3 int main(int argc, char **argv) { int failures = 0; int i, run_perf_tests; unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; unsigned int pattern_seed; + struct test test_cases[MAX_TEST]; + struct test perf_test_cases[MAX_PERF_TEST]; + int page_size; time_t t; pattern_seed = (unsigned int) time(&t); @@ -268,56 +275,59 @@ int main(int argc, char **argv) ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n", threshold_mb, pattern_seed); - struct test test_cases[] = { - /* Expected mremap failures */ - MAKE_TEST(_4KB, _4KB, _4KB, OVERLAPPING, EXPECT_FAILURE, - "mremap - Source and Destination Regions Overlapping"), - MAKE_TEST(_4KB, _1KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE, - "mremap - Destination Address Misaligned (1KB-aligned)"), - MAKE_TEST(_1KB, _4KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE, - "mremap - Source Address Misaligned (1KB-aligned)"), - - /* Src addr PTE aligned */ - MAKE_TEST(PTE, PTE, _8KB, NON_OVERLAPPING, EXPECT_SUCCESS, - "8KB mremap - Source PTE-aligned, Destination PTE-aligned"), - - /* Src addr 1MB aligned */ - MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS, - "2MB mremap - Source 1MB-aligned, Destination PTE-aligned"), - MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS, - "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned"), - - /* Src addr PMD aligned */ - MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS, - "4MB mremap - Source PMD-aligned, Destination PTE-aligned"), - MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS, - "4MB mremap - Source PMD-aligned, Destination 1MB-aligned"), - MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS, - "4MB mremap - Source PMD-aligned, Destination PMD-aligned"), - - /* Src addr PUD aligned */ - MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, - "2GB mremap - Source PUD-aligned, Destination PTE-aligned"), - MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, - "2GB mremap - Source PUD-aligned, Destination 1MB-aligned"), - MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, - "2GB mremap - Source PUD-aligned, Destination PMD-aligned"), - MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, - "2GB mremap - Source PUD-aligned, Destination PUD-aligned"), - }; - - struct test perf_test_cases[] = { - /* - * mremap 1GB region - Page table level aligned time - * comparison. - */ - MAKE_TEST(PTE, PTE, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, - "1GB mremap - Source PTE-aligned, Destination PTE-aligned"), - MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, - "1GB mremap - Source PMD-aligned, Destination PMD-aligned"), - MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, - "1GB mremap - Source PUD-aligned, Destination PUD-aligned"), - }; + page_size = sysconf(_SC_PAGESIZE); + + /* Expected mremap failures */ + test_cases[0] = MAKE_TEST(page_size, page_size, page_size, + OVERLAPPING, EXPECT_FAILURE, + "mremap - Source and Destination Regions Overlapping"); + + test_cases[1] = MAKE_TEST(page_size, page_size/4, page_size, + NON_OVERLAPPING, EXPECT_FAILURE, + "mremap - Destination Address Misaligned (1KB-aligned)"); + test_cases[2] = MAKE_TEST(page_size/4, page_size, page_size, + NON_OVERLAPPING, EXPECT_FAILURE, + "mremap - Source Address Misaligned (1KB-aligned)"); + + /* Src addr PTE aligned */ + test_cases[3] = MAKE_TEST(PTE, PTE, PTE * 2, + NON_OVERLAPPING, EXPECT_SUCCESS, + "8KB mremap - Source PTE-aligned, Destination PTE-aligned"); + + /* Src addr 1MB aligned */ + test_cases[4] = MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2MB mremap - Source 1MB-aligned, Destination PTE-aligned"); + test_cases[5] = MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned"); + + /* Src addr PMD aligned */ + test_cases[6] = MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "4MB mremap - Source PMD-aligned, Destination PTE-aligned"); + test_cases[7] = MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "4MB mremap - Source PMD-aligned, Destination 1MB-aligned"); + test_cases[8] = MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "4MB mremap - Source PMD-aligned, Destination PMD-aligned"); + + /* Src addr PUD aligned */ + test_cases[9] = MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2GB mremap - Source PUD-aligned, Destination PTE-aligned"); + test_cases[10] = MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2GB mremap - Source PUD-aligned, Destination 1MB-aligned"); + test_cases[11] = MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2GB mremap - Source PUD-aligned, Destination PMD-aligned"); + test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "2GB mremap - Source PUD-aligned, Destination PUD-aligned"); + + perf_test_cases[0] = MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "1GB mremap - Source PTE-aligned, Destination PTE-aligned"); + /* + * mremap 1GB region - Page table level aligned time + * comparison. + */ + perf_test_cases[1] = MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "1GB mremap - Source PMD-aligned, Destination PMD-aligned"); + perf_test_cases[2] = MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, + "1GB mremap - Source PUD-aligned, Destination PUD-aligned"); run_perf_tests = (threshold_mb == VALIDATION_NO_THRESHOLD) || (threshold_mb * _1MB >= _1GB); diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 955782d138ab..d09a6b71f1e9 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -362,4 +362,21 @@ else exitcode=1 fi +echo "running memfd_secret test" +echo "------------------------------------" +./memfd_secret +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +exit $exitcode + exit $exitcode diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index e363bdaff59d..2ea438e6b8b1 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area) static void anon_allocate_area(void **alloc_area) { - if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) - err("posix_memalign() failed"); + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (*alloc_area == MAP_FAILED) + err("mmap of anonymous memory failed"); } static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) |